1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Driver for HiSilicon PCIe tune and trace device
4 *
5 * Copyright (c) 2022 HiSilicon Technologies Co., Ltd.
6 * Author: Yicong Yang <yangyicong@hisilicon.com>
7 */
8
9#include <linux/bitfield.h>
10#include <linux/bitops.h>
11#include <linux/cpuhotplug.h>
12#include <linux/delay.h>
13#include <linux/dma-mapping.h>
14#include <linux/interrupt.h>
15#include <linux/io.h>
16#include <linux/iommu.h>
17#include <linux/iopoll.h>
18#include <linux/module.h>
19#include <linux/sysfs.h>
20#include <linux/vmalloc.h>
21
22#include "hisi_ptt.h"
23
24/* Dynamic CPU hotplug state used by PTT */
25static enum cpuhp_state hisi_ptt_pmu_online;
26
27static bool hisi_ptt_wait_tuning_finish(struct hisi_ptt *hisi_ptt)
28{
29	u32 val;
30
31	return !readl_poll_timeout(hisi_ptt->iobase + HISI_PTT_TUNING_INT_STAT,
32				   val, !(val & HISI_PTT_TUNING_INT_STAT_MASK),
33				   HISI_PTT_WAIT_POLL_INTERVAL_US,
34				   HISI_PTT_WAIT_TUNE_TIMEOUT_US);
35}
36
37static ssize_t hisi_ptt_tune_attr_show(struct device *dev,
38				       struct device_attribute *attr,
39				       char *buf)
40{
41	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
42	struct dev_ext_attribute *ext_attr;
43	struct hisi_ptt_tune_desc *desc;
44	u32 reg;
45	u16 val;
46
47	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
48	desc = ext_attr->var;
49
50	mutex_lock(&hisi_ptt->tune_lock);
51
52	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
53	reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
54	reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
55			  desc->event_code);
56	writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
57
58	/* Write all 1 to indicates it's the read process */
59	writel(~0U, hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
60
61	if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
62		mutex_unlock(&hisi_ptt->tune_lock);
63		return -ETIMEDOUT;
64	}
65
66	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
67	reg &= HISI_PTT_TUNING_DATA_VAL_MASK;
68	val = FIELD_GET(HISI_PTT_TUNING_DATA_VAL_MASK, reg);
69
70	mutex_unlock(&hisi_ptt->tune_lock);
71	return sysfs_emit(buf, "%u\n", val);
72}
73
74static ssize_t hisi_ptt_tune_attr_store(struct device *dev,
75					struct device_attribute *attr,
76					const char *buf, size_t count)
77{
78	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
79	struct dev_ext_attribute *ext_attr;
80	struct hisi_ptt_tune_desc *desc;
81	u32 reg;
82	u16 val;
83
84	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
85	desc = ext_attr->var;
86
87	if (kstrtou16(buf, 10, &val))
88		return -EINVAL;
89
90	mutex_lock(&hisi_ptt->tune_lock);
91
92	reg = readl(hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
93	reg &= ~(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB);
94	reg |= FIELD_PREP(HISI_PTT_TUNING_CTRL_CODE | HISI_PTT_TUNING_CTRL_SUB,
95			  desc->event_code);
96	writel(reg, hisi_ptt->iobase + HISI_PTT_TUNING_CTRL);
97	writel(FIELD_PREP(HISI_PTT_TUNING_DATA_VAL_MASK, val),
98	       hisi_ptt->iobase + HISI_PTT_TUNING_DATA);
99
100	if (!hisi_ptt_wait_tuning_finish(hisi_ptt)) {
101		mutex_unlock(&hisi_ptt->tune_lock);
102		return -ETIMEDOUT;
103	}
104
105	mutex_unlock(&hisi_ptt->tune_lock);
106	return count;
107}
108
109#define HISI_PTT_TUNE_ATTR(_name, _val, _show, _store)			\
110	static struct hisi_ptt_tune_desc _name##_desc = {		\
111		.name = #_name,						\
112		.event_code = (_val),					\
113	};								\
114	static struct dev_ext_attribute hisi_ptt_##_name##_attr = {	\
115		.attr	= __ATTR(_name, 0600, _show, _store),		\
116		.var	= &_name##_desc,				\
117	}
118
119#define HISI_PTT_TUNE_ATTR_COMMON(_name, _val)		\
120	HISI_PTT_TUNE_ATTR(_name, _val,			\
121			   hisi_ptt_tune_attr_show,	\
122			   hisi_ptt_tune_attr_store)
123
124/*
125 * The value of the tuning event are composed of two parts: main event code
126 * in BIT[0,15] and subevent code in BIT[16,23]. For example, qox_tx_cpl is
127 * a subevent of 'Tx path QoS control' which for tuning the weight of Tx
128 * completion TLPs. See hisi_ptt.rst documentation for more information.
129 */
130#define HISI_PTT_TUNE_QOS_TX_CPL		(0x4 | (3 << 16))
131#define HISI_PTT_TUNE_QOS_TX_NP			(0x4 | (4 << 16))
132#define HISI_PTT_TUNE_QOS_TX_P			(0x4 | (5 << 16))
133#define HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL	(0x5 | (6 << 16))
134#define HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL	(0x5 | (7 << 16))
135
136HISI_PTT_TUNE_ATTR_COMMON(qos_tx_cpl, HISI_PTT_TUNE_QOS_TX_CPL);
137HISI_PTT_TUNE_ATTR_COMMON(qos_tx_np, HISI_PTT_TUNE_QOS_TX_NP);
138HISI_PTT_TUNE_ATTR_COMMON(qos_tx_p, HISI_PTT_TUNE_QOS_TX_P);
139HISI_PTT_TUNE_ATTR_COMMON(rx_alloc_buf_level, HISI_PTT_TUNE_RX_ALLOC_BUF_LEVEL);
140HISI_PTT_TUNE_ATTR_COMMON(tx_alloc_buf_level, HISI_PTT_TUNE_TX_ALLOC_BUF_LEVEL);
141
142static struct attribute *hisi_ptt_tune_attrs[] = {
143	&hisi_ptt_qos_tx_cpl_attr.attr.attr,
144	&hisi_ptt_qos_tx_np_attr.attr.attr,
145	&hisi_ptt_qos_tx_p_attr.attr.attr,
146	&hisi_ptt_rx_alloc_buf_level_attr.attr.attr,
147	&hisi_ptt_tx_alloc_buf_level_attr.attr.attr,
148	NULL,
149};
150
151static struct attribute_group hisi_ptt_tune_group = {
152	.name	= "tune",
153	.attrs	= hisi_ptt_tune_attrs,
154};
155
156static u16 hisi_ptt_get_filter_val(u16 devid, bool is_port)
157{
158	if (is_port)
159		return BIT(HISI_PCIE_CORE_PORT_ID(devid & 0xff));
160
161	return devid;
162}
163
164static bool hisi_ptt_wait_trace_hw_idle(struct hisi_ptt *hisi_ptt)
165{
166	u32 val;
167
168	return !readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_STS,
169					  val, val & HISI_PTT_TRACE_IDLE,
170					  HISI_PTT_WAIT_POLL_INTERVAL_US,
171					  HISI_PTT_WAIT_TRACE_TIMEOUT_US);
172}
173
174static void hisi_ptt_wait_dma_reset_done(struct hisi_ptt *hisi_ptt)
175{
176	u32 val;
177
178	readl_poll_timeout_atomic(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS,
179				  val, !val, HISI_PTT_RESET_POLL_INTERVAL_US,
180				  HISI_PTT_RESET_TIMEOUT_US);
181}
182
183static void hisi_ptt_trace_end(struct hisi_ptt *hisi_ptt)
184{
185	writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
186	hisi_ptt->trace_ctrl.started = false;
187}
188
189static int hisi_ptt_trace_start(struct hisi_ptt *hisi_ptt)
190{
191	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
192	u32 val;
193	int i;
194
195	/* Check device idle before start trace */
196	if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt)) {
197		pci_err(hisi_ptt->pdev, "Failed to start trace, the device is still busy\n");
198		return -EBUSY;
199	}
200
201	ctrl->started = true;
202
203	/* Reset the DMA before start tracing */
204	val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
205	val |= HISI_PTT_TRACE_CTRL_RST;
206	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
207
208	hisi_ptt_wait_dma_reset_done(hisi_ptt);
209
210	val = readl(hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
211	val &= ~HISI_PTT_TRACE_CTRL_RST;
212	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
213
214	/* Reset the index of current buffer */
215	hisi_ptt->trace_ctrl.buf_index = 0;
216
217	/* Zero the trace buffers */
218	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++)
219		memset(ctrl->trace_buf[i].addr, 0, HISI_PTT_TRACE_BUF_SIZE);
220
221	/* Clear the interrupt status */
222	writel(HISI_PTT_TRACE_INT_STAT_MASK, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
223	writel(0, hisi_ptt->iobase + HISI_PTT_TRACE_INT_MASK);
224
225	/* Set the trace control register */
226	val = FIELD_PREP(HISI_PTT_TRACE_CTRL_TYPE_SEL, ctrl->type);
227	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_RXTX_SEL, ctrl->direction);
228	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_DATA_FORMAT, ctrl->format);
229	val |= FIELD_PREP(HISI_PTT_TRACE_CTRL_TARGET_SEL, hisi_ptt->trace_ctrl.filter);
230	if (!hisi_ptt->trace_ctrl.is_port)
231		val |= HISI_PTT_TRACE_CTRL_FILTER_MODE;
232
233	/* Start the Trace */
234	val |= HISI_PTT_TRACE_CTRL_EN;
235	writel(val, hisi_ptt->iobase + HISI_PTT_TRACE_CTRL);
236
237	return 0;
238}
239
240static int hisi_ptt_update_aux(struct hisi_ptt *hisi_ptt, int index, bool stop)
241{
242	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
243	struct perf_output_handle *handle = &ctrl->handle;
244	struct perf_event *event = handle->event;
245	struct hisi_ptt_pmu_buf *buf;
246	size_t size;
247	void *addr;
248
249	buf = perf_get_aux(handle);
250	if (!buf || !handle->size)
251		return -EINVAL;
252
253	addr = ctrl->trace_buf[ctrl->buf_index].addr;
254
255	/*
256	 * If we're going to stop, read the size of already traced data from
257	 * HISI_PTT_TRACE_WR_STS. Otherwise we're coming from the interrupt,
258	 * the data size is always HISI_PTT_TRACE_BUF_SIZE.
259	 */
260	if (stop) {
261		u32 reg;
262
263		reg = readl(hisi_ptt->iobase + HISI_PTT_TRACE_WR_STS);
264		size = FIELD_GET(HISI_PTT_TRACE_WR_STS_WRITE, reg);
265	} else {
266		size = HISI_PTT_TRACE_BUF_SIZE;
267	}
268
269	memcpy(buf->base + buf->pos, addr, size);
270	buf->pos += size;
271
272	/*
273	 * Just commit the traced data if we're going to stop. Otherwise if the
274	 * resident AUX buffer cannot contain the data of next trace buffer,
275	 * apply a new one.
276	 */
277	if (stop) {
278		perf_aux_output_end(handle, buf->pos);
279	} else if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
280		perf_aux_output_end(handle, buf->pos);
281
282		buf = perf_aux_output_begin(handle, event);
283		if (!buf)
284			return -EINVAL;
285
286		buf->pos = handle->head % buf->length;
287		if (buf->length - buf->pos < HISI_PTT_TRACE_BUF_SIZE) {
288			perf_aux_output_end(handle, 0);
289			return -EINVAL;
290		}
291	}
292
293	return 0;
294}
295
296static irqreturn_t hisi_ptt_isr(int irq, void *context)
297{
298	struct hisi_ptt *hisi_ptt = context;
299	u32 status, buf_idx;
300
301	status = readl(hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
302	if (!(status & HISI_PTT_TRACE_INT_STAT_MASK))
303		return IRQ_NONE;
304
305	buf_idx = ffs(status) - 1;
306
307	/* Clear the interrupt status of buffer @buf_idx */
308	writel(status, hisi_ptt->iobase + HISI_PTT_TRACE_INT_STAT);
309
310	/*
311	 * Update the AUX buffer and cache the current buffer index,
312	 * as we need to know this and save the data when the trace
313	 * is ended out of the interrupt handler. End the trace
314	 * if the updating fails.
315	 */
316	if (hisi_ptt_update_aux(hisi_ptt, buf_idx, false))
317		hisi_ptt_trace_end(hisi_ptt);
318	else
319		hisi_ptt->trace_ctrl.buf_index = (buf_idx + 1) % HISI_PTT_TRACE_BUF_CNT;
320
321	return IRQ_HANDLED;
322}
323
324static void hisi_ptt_irq_free_vectors(void *pdev)
325{
326	pci_free_irq_vectors(pdev);
327}
328
329static int hisi_ptt_register_irq(struct hisi_ptt *hisi_ptt)
330{
331	struct pci_dev *pdev = hisi_ptt->pdev;
332	int ret;
333
334	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_MSI);
335	if (ret < 0) {
336		pci_err(pdev, "failed to allocate irq vector, ret = %d\n", ret);
337		return ret;
338	}
339
340	ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_irq_free_vectors, pdev);
341	if (ret < 0)
342		return ret;
343
344	hisi_ptt->trace_irq = pci_irq_vector(pdev, HISI_PTT_TRACE_DMA_IRQ);
345	ret = devm_request_irq(&pdev->dev, hisi_ptt->trace_irq, hisi_ptt_isr,
346				IRQF_NOBALANCING | IRQF_NO_THREAD, DRV_NAME,
347				hisi_ptt);
348	if (ret) {
349		pci_err(pdev, "failed to request irq %d, ret = %d\n",
350			hisi_ptt->trace_irq, ret);
351		return ret;
352	}
353
354	return 0;
355}
356
357static void hisi_ptt_del_free_filter(struct hisi_ptt *hisi_ptt,
358				      struct hisi_ptt_filter_desc *filter)
359{
360	if (filter->is_port)
361		hisi_ptt->port_mask &= ~hisi_ptt_get_filter_val(filter->devid, true);
362
363	list_del(&filter->list);
364	kfree(filter->name);
365	kfree(filter);
366}
367
368static struct hisi_ptt_filter_desc *
369hisi_ptt_alloc_add_filter(struct hisi_ptt *hisi_ptt, u16 devid, bool is_port)
370{
371	struct hisi_ptt_filter_desc *filter;
372	u8 devfn = devid & 0xff;
373	char *filter_name;
374
375	filter_name = kasprintf(GFP_KERNEL, "%04x:%02x:%02x.%d", pci_domain_nr(hisi_ptt->pdev->bus),
376				 PCI_BUS_NUM(devid), PCI_SLOT(devfn), PCI_FUNC(devfn));
377	if (!filter_name) {
378		pci_err(hisi_ptt->pdev, "failed to allocate name for filter %04x:%02x:%02x.%d\n",
379			pci_domain_nr(hisi_ptt->pdev->bus), PCI_BUS_NUM(devid),
380			PCI_SLOT(devfn), PCI_FUNC(devfn));
381		return NULL;
382	}
383
384	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
385	if (!filter) {
386		pci_err(hisi_ptt->pdev, "failed to add filter for %s\n",
387			filter_name);
388		kfree(filter_name);
389		return NULL;
390	}
391
392	filter->name = filter_name;
393	filter->is_port = is_port;
394	filter->devid = devid;
395
396	if (filter->is_port) {
397		list_add_tail(&filter->list, &hisi_ptt->port_filters);
398
399		/* Update the available port mask */
400		hisi_ptt->port_mask |= hisi_ptt_get_filter_val(filter->devid, true);
401	} else {
402		list_add_tail(&filter->list, &hisi_ptt->req_filters);
403	}
404
405	return filter;
406}
407
408static ssize_t hisi_ptt_filter_show(struct device *dev, struct device_attribute *attr,
409				    char *buf)
410{
411	struct hisi_ptt_filter_desc *filter;
412	unsigned long filter_val;
413
414	filter = container_of(attr, struct hisi_ptt_filter_desc, attr);
415	filter_val = hisi_ptt_get_filter_val(filter->devid, filter->is_port) |
416		     (filter->is_port ? HISI_PTT_PMU_FILTER_IS_PORT : 0);
417
418	return sysfs_emit(buf, "0x%05lx\n", filter_val);
419}
420
421static int hisi_ptt_create_rp_filter_attr(struct hisi_ptt *hisi_ptt,
422					  struct hisi_ptt_filter_desc *filter)
423{
424	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
425
426	sysfs_attr_init(&filter->attr.attr);
427	filter->attr.attr.name = filter->name;
428	filter->attr.attr.mode = 0400; /* DEVICE_ATTR_ADMIN_RO */
429	filter->attr.show = hisi_ptt_filter_show;
430
431	return sysfs_add_file_to_group(kobj, &filter->attr.attr,
432				       HISI_PTT_RP_FILTERS_GRP_NAME);
433}
434
435static void hisi_ptt_remove_rp_filter_attr(struct hisi_ptt *hisi_ptt,
436					  struct hisi_ptt_filter_desc *filter)
437{
438	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
439
440	sysfs_remove_file_from_group(kobj, &filter->attr.attr,
441				     HISI_PTT_RP_FILTERS_GRP_NAME);
442}
443
444static int hisi_ptt_create_req_filter_attr(struct hisi_ptt *hisi_ptt,
445					   struct hisi_ptt_filter_desc *filter)
446{
447	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
448
449	sysfs_attr_init(&filter->attr.attr);
450	filter->attr.attr.name = filter->name;
451	filter->attr.attr.mode = 0400; /* DEVICE_ATTR_ADMIN_RO */
452	filter->attr.show = hisi_ptt_filter_show;
453
454	return sysfs_add_file_to_group(kobj, &filter->attr.attr,
455				       HISI_PTT_REQ_FILTERS_GRP_NAME);
456}
457
458static void hisi_ptt_remove_req_filter_attr(struct hisi_ptt *hisi_ptt,
459					   struct hisi_ptt_filter_desc *filter)
460{
461	struct kobject *kobj = &hisi_ptt->hisi_ptt_pmu.dev->kobj;
462
463	sysfs_remove_file_from_group(kobj, &filter->attr.attr,
464				     HISI_PTT_REQ_FILTERS_GRP_NAME);
465}
466
467static int hisi_ptt_create_filter_attr(struct hisi_ptt *hisi_ptt,
468				       struct hisi_ptt_filter_desc *filter)
469{
470	int ret;
471
472	if (filter->is_port)
473		ret = hisi_ptt_create_rp_filter_attr(hisi_ptt, filter);
474	else
475		ret = hisi_ptt_create_req_filter_attr(hisi_ptt, filter);
476
477	if (ret)
478		pci_err(hisi_ptt->pdev, "failed to create sysfs attribute for filter %s\n",
479			filter->name);
480
481	return ret;
482}
483
484static void hisi_ptt_remove_filter_attr(struct hisi_ptt *hisi_ptt,
485					struct hisi_ptt_filter_desc *filter)
486{
487	if (filter->is_port)
488		hisi_ptt_remove_rp_filter_attr(hisi_ptt, filter);
489	else
490		hisi_ptt_remove_req_filter_attr(hisi_ptt, filter);
491}
492
493static void hisi_ptt_remove_all_filter_attributes(void *data)
494{
495	struct hisi_ptt_filter_desc *filter;
496	struct hisi_ptt *hisi_ptt = data;
497
498	mutex_lock(&hisi_ptt->filter_lock);
499
500	list_for_each_entry(filter, &hisi_ptt->req_filters, list)
501		hisi_ptt_remove_filter_attr(hisi_ptt, filter);
502
503	list_for_each_entry(filter, &hisi_ptt->port_filters, list)
504		hisi_ptt_remove_filter_attr(hisi_ptt, filter);
505
506	hisi_ptt->sysfs_inited = false;
507	mutex_unlock(&hisi_ptt->filter_lock);
508}
509
510static int hisi_ptt_init_filter_attributes(struct hisi_ptt *hisi_ptt)
511{
512	struct hisi_ptt_filter_desc *filter;
513	int ret;
514
515	mutex_lock(&hisi_ptt->filter_lock);
516
517	/*
518	 * Register the reset callback in the first stage. In reset we traverse
519	 * the filters list to remove the sysfs attributes so the callback can
520	 * be called safely even without below filter attributes creation.
521	 */
522	ret = devm_add_action(&hisi_ptt->pdev->dev,
523			      hisi_ptt_remove_all_filter_attributes,
524			      hisi_ptt);
525	if (ret)
526		goto out;
527
528	list_for_each_entry(filter, &hisi_ptt->port_filters, list) {
529		ret = hisi_ptt_create_filter_attr(hisi_ptt, filter);
530		if (ret)
531			goto out;
532	}
533
534	list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
535		ret = hisi_ptt_create_filter_attr(hisi_ptt, filter);
536		if (ret)
537			goto out;
538	}
539
540	hisi_ptt->sysfs_inited = true;
541out:
542	mutex_unlock(&hisi_ptt->filter_lock);
543	return ret;
544}
545
546static void hisi_ptt_update_filters(struct work_struct *work)
547{
548	struct delayed_work *delayed_work = to_delayed_work(work);
549	struct hisi_ptt_filter_update_info info;
550	struct hisi_ptt_filter_desc *filter;
551	struct hisi_ptt *hisi_ptt;
552
553	hisi_ptt = container_of(delayed_work, struct hisi_ptt, work);
554
555	if (!mutex_trylock(&hisi_ptt->filter_lock)) {
556		schedule_delayed_work(&hisi_ptt->work, HISI_PTT_WORK_DELAY_MS);
557		return;
558	}
559
560	while (kfifo_get(&hisi_ptt->filter_update_kfifo, &info)) {
561		if (info.is_add) {
562			/*
563			 * Notify the users if failed to add this filter, others
564			 * still work and available. See the comments in
565			 * hisi_ptt_init_filters().
566			 */
567			filter = hisi_ptt_alloc_add_filter(hisi_ptt, info.devid, info.is_port);
568			if (!filter)
569				continue;
570
571			/*
572			 * If filters' sysfs entries hasn't been initialized,
573			 * then we're still at probe stage. Add the filters to
574			 * the list and later hisi_ptt_init_filter_attributes()
575			 * will create sysfs attributes for all the filters.
576			 */
577			if (hisi_ptt->sysfs_inited &&
578			    hisi_ptt_create_filter_attr(hisi_ptt, filter)) {
579				hisi_ptt_del_free_filter(hisi_ptt, filter);
580				continue;
581			}
582		} else {
583			struct hisi_ptt_filter_desc *tmp;
584			struct list_head *target_list;
585
586			target_list = info.is_port ? &hisi_ptt->port_filters :
587				      &hisi_ptt->req_filters;
588
589			list_for_each_entry_safe(filter, tmp, target_list, list)
590				if (filter->devid == info.devid) {
591					if (hisi_ptt->sysfs_inited)
592						hisi_ptt_remove_filter_attr(hisi_ptt, filter);
593
594					hisi_ptt_del_free_filter(hisi_ptt, filter);
595					break;
596				}
597		}
598	}
599
600	mutex_unlock(&hisi_ptt->filter_lock);
601}
602
603/*
604 * A PCI bus notifier is used here for dynamically updating the filter
605 * list.
606 */
607static int hisi_ptt_notifier_call(struct notifier_block *nb, unsigned long action,
608				  void *data)
609{
610	struct hisi_ptt *hisi_ptt = container_of(nb, struct hisi_ptt, hisi_ptt_nb);
611	struct hisi_ptt_filter_update_info info;
612	struct pci_dev *pdev, *root_port;
613	struct device *dev = data;
614	u32 port_devid;
615
616	pdev = to_pci_dev(dev);
617	root_port = pcie_find_root_port(pdev);
618	if (!root_port)
619		return 0;
620
621	port_devid = pci_dev_id(root_port);
622	if (port_devid < hisi_ptt->lower_bdf ||
623	    port_devid > hisi_ptt->upper_bdf)
624		return 0;
625
626	info.is_port = pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT;
627	info.devid = pci_dev_id(pdev);
628
629	switch (action) {
630	case BUS_NOTIFY_ADD_DEVICE:
631		info.is_add = true;
632		break;
633	case BUS_NOTIFY_DEL_DEVICE:
634		info.is_add = false;
635		break;
636	default:
637		return 0;
638	}
639
640	/*
641	 * The FIFO size is 16 which is sufficient for almost all the cases,
642	 * since each PCIe core will have most 8 Root Ports (typically only
643	 * 1~4 Root Ports). On failure log the failed filter and let user
644	 * handle it.
645	 */
646	if (kfifo_in_spinlocked(&hisi_ptt->filter_update_kfifo, &info, 1,
647				&hisi_ptt->filter_update_lock))
648		schedule_delayed_work(&hisi_ptt->work, 0);
649	else
650		pci_warn(hisi_ptt->pdev,
651			 "filter update fifo overflow for target %s\n",
652			 pci_name(pdev));
653
654	return 0;
655}
656
657static int hisi_ptt_init_filters(struct pci_dev *pdev, void *data)
658{
659	struct pci_dev *root_port = pcie_find_root_port(pdev);
660	struct hisi_ptt_filter_desc *filter;
661	struct hisi_ptt *hisi_ptt = data;
662	u32 port_devid;
663
664	if (!root_port)
665		return 0;
666
667	port_devid = pci_dev_id(root_port);
668	if (port_devid < hisi_ptt->lower_bdf ||
669	    port_devid > hisi_ptt->upper_bdf)
670		return 0;
671
672	/*
673	 * We won't fail the probe if filter allocation failed here. The filters
674	 * should be partial initialized and users would know which filter fails
675	 * through the log. Other functions of PTT device are still available.
676	 */
677	filter = hisi_ptt_alloc_add_filter(hisi_ptt, pci_dev_id(pdev),
678					    pci_pcie_type(pdev) == PCI_EXP_TYPE_ROOT_PORT);
679	if (!filter)
680		return -ENOMEM;
681
682	return 0;
683}
684
685static void hisi_ptt_release_filters(void *data)
686{
687	struct hisi_ptt_filter_desc *filter, *tmp;
688	struct hisi_ptt *hisi_ptt = data;
689
690	list_for_each_entry_safe(filter, tmp, &hisi_ptt->req_filters, list)
691		hisi_ptt_del_free_filter(hisi_ptt, filter);
692
693	list_for_each_entry_safe(filter, tmp, &hisi_ptt->port_filters, list)
694		hisi_ptt_del_free_filter(hisi_ptt, filter);
695}
696
697static int hisi_ptt_config_trace_buf(struct hisi_ptt *hisi_ptt)
698{
699	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
700	struct device *dev = &hisi_ptt->pdev->dev;
701	int i;
702
703	ctrl->trace_buf = devm_kcalloc(dev, HISI_PTT_TRACE_BUF_CNT,
704				       sizeof(*ctrl->trace_buf), GFP_KERNEL);
705	if (!ctrl->trace_buf)
706		return -ENOMEM;
707
708	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; ++i) {
709		ctrl->trace_buf[i].addr = dmam_alloc_coherent(dev, HISI_PTT_TRACE_BUF_SIZE,
710							     &ctrl->trace_buf[i].dma,
711							     GFP_KERNEL);
712		if (!ctrl->trace_buf[i].addr)
713			return -ENOMEM;
714	}
715
716	/* Configure the trace DMA buffer */
717	for (i = 0; i < HISI_PTT_TRACE_BUF_CNT; i++) {
718		writel(lower_32_bits(ctrl->trace_buf[i].dma),
719		       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_LO_0 +
720		       i * HISI_PTT_TRACE_ADDR_STRIDE);
721		writel(upper_32_bits(ctrl->trace_buf[i].dma),
722		       hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_BASE_HI_0 +
723		       i * HISI_PTT_TRACE_ADDR_STRIDE);
724	}
725	writel(HISI_PTT_TRACE_BUF_SIZE, hisi_ptt->iobase + HISI_PTT_TRACE_ADDR_SIZE);
726
727	return 0;
728}
729
730static int hisi_ptt_init_ctrls(struct hisi_ptt *hisi_ptt)
731{
732	struct pci_dev *pdev = hisi_ptt->pdev;
733	struct pci_bus *bus;
734	int ret;
735	u32 reg;
736
737	INIT_DELAYED_WORK(&hisi_ptt->work, hisi_ptt_update_filters);
738	INIT_KFIFO(hisi_ptt->filter_update_kfifo);
739	spin_lock_init(&hisi_ptt->filter_update_lock);
740
741	INIT_LIST_HEAD(&hisi_ptt->port_filters);
742	INIT_LIST_HEAD(&hisi_ptt->req_filters);
743	mutex_init(&hisi_ptt->filter_lock);
744
745	ret = hisi_ptt_config_trace_buf(hisi_ptt);
746	if (ret)
747		return ret;
748
749	/*
750	 * The device range register provides the information about the root
751	 * ports which the RCiEP can control and trace. The RCiEP and the root
752	 * ports which it supports are on the same PCIe core, with same domain
753	 * number but maybe different bus number. The device range register
754	 * will tell us which root ports we can support, Bit[31:16] indicates
755	 * the upper BDF numbers of the root port, while Bit[15:0] indicates
756	 * the lower.
757	 */
758	reg = readl(hisi_ptt->iobase + HISI_PTT_DEVICE_RANGE);
759	hisi_ptt->upper_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_UPPER, reg);
760	hisi_ptt->lower_bdf = FIELD_GET(HISI_PTT_DEVICE_RANGE_LOWER, reg);
761
762	bus = pci_find_bus(pci_domain_nr(pdev->bus), PCI_BUS_NUM(hisi_ptt->upper_bdf));
763	if (bus)
764		pci_walk_bus(bus, hisi_ptt_init_filters, hisi_ptt);
765
766	ret = devm_add_action_or_reset(&pdev->dev, hisi_ptt_release_filters, hisi_ptt);
767	if (ret)
768		return ret;
769
770	hisi_ptt->trace_ctrl.on_cpu = -1;
771	return 0;
772}
773
774static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
775			    char *buf)
776{
777	struct hisi_ptt *hisi_ptt = to_hisi_ptt(dev_get_drvdata(dev));
778	const cpumask_t *cpumask = cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev));
779
780	return cpumap_print_to_pagebuf(true, buf, cpumask);
781}
782static DEVICE_ATTR_RO(cpumask);
783
784static struct attribute *hisi_ptt_cpumask_attrs[] = {
785	&dev_attr_cpumask.attr,
786	NULL
787};
788
789static const struct attribute_group hisi_ptt_cpumask_attr_group = {
790	.attrs = hisi_ptt_cpumask_attrs,
791};
792
793/*
794 * Bit 19 indicates the filter type, 1 for Root Port filter and 0 for Requester
795 * filter. Bit[15:0] indicates the filter value, for Root Port filter it's
796 * a bit mask of desired ports and for Requester filter it's the Requester ID
797 * of the desired PCIe function. Bit[18:16] is reserved for extension.
798 *
799 * See hisi_ptt.rst documentation for detailed information.
800 */
801PMU_FORMAT_ATTR(filter,		"config:0-19");
802PMU_FORMAT_ATTR(direction,	"config:20-23");
803PMU_FORMAT_ATTR(type,		"config:24-31");
804PMU_FORMAT_ATTR(format,		"config:32-35");
805
806static struct attribute *hisi_ptt_pmu_format_attrs[] = {
807	&format_attr_filter.attr,
808	&format_attr_direction.attr,
809	&format_attr_type.attr,
810	&format_attr_format.attr,
811	NULL
812};
813
814static struct attribute_group hisi_ptt_pmu_format_group = {
815	.name = "format",
816	.attrs = hisi_ptt_pmu_format_attrs,
817};
818
819static ssize_t hisi_ptt_filter_multiselect_show(struct device *dev,
820						struct device_attribute *attr,
821						char *buf)
822{
823	struct dev_ext_attribute *ext_attr;
824
825	ext_attr = container_of(attr, struct dev_ext_attribute, attr);
826	return sysfs_emit(buf, "%s\n", (char *)ext_attr->var);
827}
828
829static struct dev_ext_attribute root_port_filters_multiselect = {
830	.attr = {
831		.attr = { .name = "multiselect", .mode = 0400 },
832		.show = hisi_ptt_filter_multiselect_show,
833	},
834	.var = "1",
835};
836
837static struct attribute *hisi_ptt_pmu_root_ports_attrs[] = {
838	&root_port_filters_multiselect.attr.attr,
839	NULL
840};
841
842static struct attribute_group hisi_ptt_pmu_root_ports_group = {
843	.name = HISI_PTT_RP_FILTERS_GRP_NAME,
844	.attrs = hisi_ptt_pmu_root_ports_attrs,
845};
846
847static struct dev_ext_attribute requester_filters_multiselect = {
848	.attr = {
849		.attr = { .name = "multiselect", .mode = 0400 },
850		.show = hisi_ptt_filter_multiselect_show,
851	},
852	.var = "0",
853};
854
855static struct attribute *hisi_ptt_pmu_requesters_attrs[] = {
856	&requester_filters_multiselect.attr.attr,
857	NULL
858};
859
860static struct attribute_group hisi_ptt_pmu_requesters_group = {
861	.name = HISI_PTT_REQ_FILTERS_GRP_NAME,
862	.attrs = hisi_ptt_pmu_requesters_attrs,
863};
864
865static const struct attribute_group *hisi_ptt_pmu_groups[] = {
866	&hisi_ptt_cpumask_attr_group,
867	&hisi_ptt_pmu_format_group,
868	&hisi_ptt_tune_group,
869	&hisi_ptt_pmu_root_ports_group,
870	&hisi_ptt_pmu_requesters_group,
871	NULL
872};
873
874static int hisi_ptt_trace_valid_direction(u32 val)
875{
876	/*
877	 * The direction values have different effects according to the data
878	 * format (specified in the parentheses). TLP set A/B means different
879	 * set of TLP types. See hisi_ptt.rst documentation for more details.
880	 */
881	static const u32 hisi_ptt_trace_available_direction[] = {
882		0,	/* inbound(4DW) or reserved(8DW) */
883		1,	/* outbound(4DW) */
884		2,	/* {in, out}bound(4DW) or inbound(8DW), TLP set A */
885		3,	/* {in, out}bound(4DW) or inbound(8DW), TLP set B */
886	};
887	int i;
888
889	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_direction); i++) {
890		if (val == hisi_ptt_trace_available_direction[i])
891			return 0;
892	}
893
894	return -EINVAL;
895}
896
897static int hisi_ptt_trace_valid_type(u32 val)
898{
899	/* Different types can be set simultaneously */
900	static const u32 hisi_ptt_trace_available_type[] = {
901		1,	/* posted_request */
902		2,	/* non-posted_request */
903		4,	/* completion */
904	};
905	int i;
906
907	if (!val)
908		return -EINVAL;
909
910	/*
911	 * Walk the available list and clear the valid bits of
912	 * the config. If there is any resident bit after the
913	 * walk then the config is invalid.
914	 */
915	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_available_type); i++)
916		val &= ~hisi_ptt_trace_available_type[i];
917
918	if (val)
919		return -EINVAL;
920
921	return 0;
922}
923
924static int hisi_ptt_trace_valid_format(u32 val)
925{
926	static const u32 hisi_ptt_trace_availble_format[] = {
927		0,	/* 4DW */
928		1,	/* 8DW */
929	};
930	int i;
931
932	for (i = 0; i < ARRAY_SIZE(hisi_ptt_trace_availble_format); i++) {
933		if (val == hisi_ptt_trace_availble_format[i])
934			return 0;
935	}
936
937	return -EINVAL;
938}
939
940static int hisi_ptt_trace_valid_filter(struct hisi_ptt *hisi_ptt, u64 config)
941{
942	unsigned long val, port_mask = hisi_ptt->port_mask;
943	struct hisi_ptt_filter_desc *filter;
944	int ret = 0;
945
946	hisi_ptt->trace_ctrl.is_port = FIELD_GET(HISI_PTT_PMU_FILTER_IS_PORT, config);
947	val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, config);
948
949	/*
950	 * Port filters are defined as bit mask. For port filters, check
951	 * the bits in the @val are within the range of hisi_ptt->port_mask
952	 * and whether it's empty or not, otherwise user has specified
953	 * some unsupported root ports.
954	 *
955	 * For Requester ID filters, walk the available filter list to see
956	 * whether we have one matched.
957	 */
958	mutex_lock(&hisi_ptt->filter_lock);
959	if (!hisi_ptt->trace_ctrl.is_port) {
960		list_for_each_entry(filter, &hisi_ptt->req_filters, list) {
961			if (val == hisi_ptt_get_filter_val(filter->devid, filter->is_port))
962				goto out;
963		}
964	} else if (bitmap_subset(&val, &port_mask, BITS_PER_LONG)) {
965		goto out;
966	}
967
968	ret = -EINVAL;
969out:
970	mutex_unlock(&hisi_ptt->filter_lock);
971	return ret;
972}
973
974static void hisi_ptt_pmu_init_configs(struct hisi_ptt *hisi_ptt, struct perf_event *event)
975{
976	struct hisi_ptt_trace_ctrl *ctrl = &hisi_ptt->trace_ctrl;
977	u32 val;
978
979	val = FIELD_GET(HISI_PTT_PMU_FILTER_VAL_MASK, event->attr.config);
980	hisi_ptt->trace_ctrl.filter = val;
981
982	val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
983	ctrl->direction = val;
984
985	val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
986	ctrl->type = val;
987
988	val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
989	ctrl->format = val;
990}
991
992static int hisi_ptt_pmu_event_init(struct perf_event *event)
993{
994	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
995	int ret;
996	u32 val;
997
998	if (event->attr.type != hisi_ptt->hisi_ptt_pmu.type)
999		return -ENOENT;
1000
1001	if (event->cpu < 0) {
1002		dev_dbg(event->pmu->dev, "Per-task mode not supported\n");
1003		return -EOPNOTSUPP;
1004	}
1005
1006	if (event->attach_state & PERF_ATTACH_TASK)
1007		return -EOPNOTSUPP;
1008
1009	ret = hisi_ptt_trace_valid_filter(hisi_ptt, event->attr.config);
1010	if (ret < 0)
1011		return ret;
1012
1013	val = FIELD_GET(HISI_PTT_PMU_DIRECTION_MASK, event->attr.config);
1014	ret = hisi_ptt_trace_valid_direction(val);
1015	if (ret < 0)
1016		return ret;
1017
1018	val = FIELD_GET(HISI_PTT_PMU_TYPE_MASK, event->attr.config);
1019	ret = hisi_ptt_trace_valid_type(val);
1020	if (ret < 0)
1021		return ret;
1022
1023	val = FIELD_GET(HISI_PTT_PMU_FORMAT_MASK, event->attr.config);
1024	return hisi_ptt_trace_valid_format(val);
1025}
1026
1027static void *hisi_ptt_pmu_setup_aux(struct perf_event *event, void **pages,
1028				    int nr_pages, bool overwrite)
1029{
1030	struct hisi_ptt_pmu_buf *buf;
1031	struct page **pagelist;
1032	int i;
1033
1034	if (overwrite) {
1035		dev_warn(event->pmu->dev, "Overwrite mode is not supported\n");
1036		return NULL;
1037	}
1038
1039	/* If the pages size less than buffers, we cannot start trace */
1040	if (nr_pages < HISI_PTT_TRACE_TOTAL_BUF_SIZE / PAGE_SIZE)
1041		return NULL;
1042
1043	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
1044	if (!buf)
1045		return NULL;
1046
1047	pagelist = kcalloc(nr_pages, sizeof(*pagelist), GFP_KERNEL);
1048	if (!pagelist)
1049		goto err;
1050
1051	for (i = 0; i < nr_pages; i++)
1052		pagelist[i] = virt_to_page(pages[i]);
1053
1054	buf->base = vmap(pagelist, nr_pages, VM_MAP, PAGE_KERNEL);
1055	if (!buf->base) {
1056		kfree(pagelist);
1057		goto err;
1058	}
1059
1060	buf->nr_pages = nr_pages;
1061	buf->length = nr_pages * PAGE_SIZE;
1062	buf->pos = 0;
1063
1064	kfree(pagelist);
1065	return buf;
1066err:
1067	kfree(buf);
1068	return NULL;
1069}
1070
1071static void hisi_ptt_pmu_free_aux(void *aux)
1072{
1073	struct hisi_ptt_pmu_buf *buf = aux;
1074
1075	vunmap(buf->base);
1076	kfree(buf);
1077}
1078
1079static void hisi_ptt_pmu_start(struct perf_event *event, int flags)
1080{
1081	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
1082	struct perf_output_handle *handle = &hisi_ptt->trace_ctrl.handle;
1083	struct hw_perf_event *hwc = &event->hw;
1084	struct device *dev = event->pmu->dev;
1085	struct hisi_ptt_pmu_buf *buf;
1086	int cpu = event->cpu;
1087	int ret;
1088
1089	hwc->state = 0;
1090
1091	/* Serialize the perf process if user specified several CPUs */
1092	spin_lock(&hisi_ptt->pmu_lock);
1093	if (hisi_ptt->trace_ctrl.started) {
1094		dev_dbg(dev, "trace has already started\n");
1095		goto stop;
1096	}
1097
1098	/*
1099	 * Handle the interrupt on the same cpu which starts the trace to avoid
1100	 * context mismatch. Otherwise we'll trigger the WARN from the perf
1101	 * core in event_function_local(). If CPU passed is offline we'll fail
1102	 * here, just log it since we can do nothing here.
1103	 */
1104	ret = irq_set_affinity(hisi_ptt->trace_irq, cpumask_of(cpu));
1105	if (ret)
1106		dev_warn(dev, "failed to set the affinity of trace interrupt\n");
1107
1108	hisi_ptt->trace_ctrl.on_cpu = cpu;
1109
1110	buf = perf_aux_output_begin(handle, event);
1111	if (!buf) {
1112		dev_dbg(dev, "aux output begin failed\n");
1113		goto stop;
1114	}
1115
1116	buf->pos = handle->head % buf->length;
1117
1118	hisi_ptt_pmu_init_configs(hisi_ptt, event);
1119
1120	ret = hisi_ptt_trace_start(hisi_ptt);
1121	if (ret) {
1122		dev_dbg(dev, "trace start failed, ret = %d\n", ret);
1123		perf_aux_output_end(handle, 0);
1124		goto stop;
1125	}
1126
1127	spin_unlock(&hisi_ptt->pmu_lock);
1128	return;
1129stop:
1130	event->hw.state |= PERF_HES_STOPPED;
1131	spin_unlock(&hisi_ptt->pmu_lock);
1132}
1133
1134static void hisi_ptt_pmu_stop(struct perf_event *event, int flags)
1135{
1136	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
1137	struct hw_perf_event *hwc = &event->hw;
1138
1139	if (hwc->state & PERF_HES_STOPPED)
1140		return;
1141
1142	spin_lock(&hisi_ptt->pmu_lock);
1143	if (hisi_ptt->trace_ctrl.started) {
1144		hisi_ptt_trace_end(hisi_ptt);
1145
1146		if (!hisi_ptt_wait_trace_hw_idle(hisi_ptt))
1147			dev_warn(event->pmu->dev, "Device is still busy\n");
1148
1149		hisi_ptt_update_aux(hisi_ptt, hisi_ptt->trace_ctrl.buf_index, true);
1150	}
1151	spin_unlock(&hisi_ptt->pmu_lock);
1152
1153	hwc->state |= PERF_HES_STOPPED;
1154	perf_event_update_userpage(event);
1155	hwc->state |= PERF_HES_UPTODATE;
1156}
1157
1158static int hisi_ptt_pmu_add(struct perf_event *event, int flags)
1159{
1160	struct hisi_ptt *hisi_ptt = to_hisi_ptt(event->pmu);
1161	struct hw_perf_event *hwc = &event->hw;
1162	int cpu = event->cpu;
1163
1164	/* Only allow the cpus on the device's node to add the event */
1165	if (!cpumask_test_cpu(cpu, cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev))))
1166		return 0;
1167
1168	hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE;
1169
1170	if (flags & PERF_EF_START) {
1171		hisi_ptt_pmu_start(event, PERF_EF_RELOAD);
1172		if (hwc->state & PERF_HES_STOPPED)
1173			return -EINVAL;
1174	}
1175
1176	return 0;
1177}
1178
1179static void hisi_ptt_pmu_del(struct perf_event *event, int flags)
1180{
1181	hisi_ptt_pmu_stop(event, PERF_EF_UPDATE);
1182}
1183
1184static void hisi_ptt_pmu_read(struct perf_event *event)
1185{
1186}
1187
1188static void hisi_ptt_remove_cpuhp_instance(void *hotplug_node)
1189{
1190	cpuhp_state_remove_instance_nocalls(hisi_ptt_pmu_online, hotplug_node);
1191}
1192
1193static void hisi_ptt_unregister_pmu(void *pmu)
1194{
1195	perf_pmu_unregister(pmu);
1196}
1197
1198static int hisi_ptt_register_pmu(struct hisi_ptt *hisi_ptt)
1199{
1200	u16 core_id, sicl_id;
1201	char *pmu_name;
1202	u32 reg;
1203	int ret;
1204
1205	ret = cpuhp_state_add_instance_nocalls(hisi_ptt_pmu_online,
1206					       &hisi_ptt->hotplug_node);
1207	if (ret)
1208		return ret;
1209
1210	ret = devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1211				       hisi_ptt_remove_cpuhp_instance,
1212				       &hisi_ptt->hotplug_node);
1213	if (ret)
1214		return ret;
1215
1216	mutex_init(&hisi_ptt->tune_lock);
1217	spin_lock_init(&hisi_ptt->pmu_lock);
1218
1219	hisi_ptt->hisi_ptt_pmu = (struct pmu) {
1220		.module		= THIS_MODULE,
1221		.capabilities	= PERF_PMU_CAP_EXCLUSIVE | PERF_PMU_CAP_NO_EXCLUDE,
1222		.task_ctx_nr	= perf_sw_context,
1223		.attr_groups	= hisi_ptt_pmu_groups,
1224		.event_init	= hisi_ptt_pmu_event_init,
1225		.setup_aux	= hisi_ptt_pmu_setup_aux,
1226		.free_aux	= hisi_ptt_pmu_free_aux,
1227		.start		= hisi_ptt_pmu_start,
1228		.stop		= hisi_ptt_pmu_stop,
1229		.add		= hisi_ptt_pmu_add,
1230		.del		= hisi_ptt_pmu_del,
1231		.read		= hisi_ptt_pmu_read,
1232	};
1233
1234	reg = readl(hisi_ptt->iobase + HISI_PTT_LOCATION);
1235	core_id = FIELD_GET(HISI_PTT_CORE_ID, reg);
1236	sicl_id = FIELD_GET(HISI_PTT_SICL_ID, reg);
1237
1238	pmu_name = devm_kasprintf(&hisi_ptt->pdev->dev, GFP_KERNEL, "hisi_ptt%u_%u",
1239				  sicl_id, core_id);
1240	if (!pmu_name)
1241		return -ENOMEM;
1242
1243	ret = perf_pmu_register(&hisi_ptt->hisi_ptt_pmu, pmu_name, -1);
1244	if (ret)
1245		return ret;
1246
1247	return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1248					hisi_ptt_unregister_pmu,
1249					&hisi_ptt->hisi_ptt_pmu);
1250}
1251
1252static void hisi_ptt_unregister_filter_update_notifier(void *data)
1253{
1254	struct hisi_ptt *hisi_ptt = data;
1255
1256	bus_unregister_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
1257
1258	/* Cancel any work that has been queued */
1259	cancel_delayed_work_sync(&hisi_ptt->work);
1260}
1261
1262/* Register the bus notifier for dynamically updating the filter list */
1263static int hisi_ptt_register_filter_update_notifier(struct hisi_ptt *hisi_ptt)
1264{
1265	int ret;
1266
1267	hisi_ptt->hisi_ptt_nb.notifier_call = hisi_ptt_notifier_call;
1268	ret = bus_register_notifier(&pci_bus_type, &hisi_ptt->hisi_ptt_nb);
1269	if (ret)
1270		return ret;
1271
1272	return devm_add_action_or_reset(&hisi_ptt->pdev->dev,
1273					hisi_ptt_unregister_filter_update_notifier,
1274					hisi_ptt);
1275}
1276
1277/*
1278 * The DMA of PTT trace can only use direct mappings due to some
1279 * hardware restriction. Check whether there is no IOMMU or the
1280 * policy of the IOMMU domain is passthrough, otherwise the trace
1281 * cannot work.
1282 *
1283 * The PTT device is supposed to behind an ARM SMMUv3, which
1284 * should have passthrough the device by a quirk.
1285 */
1286static int hisi_ptt_check_iommu_mapping(struct pci_dev *pdev)
1287{
1288	struct iommu_domain *iommu_domain;
1289
1290	iommu_domain = iommu_get_domain_for_dev(&pdev->dev);
1291	if (!iommu_domain || iommu_domain->type == IOMMU_DOMAIN_IDENTITY)
1292		return 0;
1293
1294	return -EOPNOTSUPP;
1295}
1296
1297static int hisi_ptt_probe(struct pci_dev *pdev,
1298			  const struct pci_device_id *id)
1299{
1300	struct hisi_ptt *hisi_ptt;
1301	int ret;
1302
1303	ret = hisi_ptt_check_iommu_mapping(pdev);
1304	if (ret) {
1305		pci_err(pdev, "requires direct DMA mappings\n");
1306		return ret;
1307	}
1308
1309	hisi_ptt = devm_kzalloc(&pdev->dev, sizeof(*hisi_ptt), GFP_KERNEL);
1310	if (!hisi_ptt)
1311		return -ENOMEM;
1312
1313	hisi_ptt->pdev = pdev;
1314	pci_set_drvdata(pdev, hisi_ptt);
1315
1316	ret = pcim_enable_device(pdev);
1317	if (ret) {
1318		pci_err(pdev, "failed to enable device, ret = %d\n", ret);
1319		return ret;
1320	}
1321
1322	ret = pcim_iomap_regions(pdev, BIT(2), DRV_NAME);
1323	if (ret) {
1324		pci_err(pdev, "failed to remap io memory, ret = %d\n", ret);
1325		return ret;
1326	}
1327
1328	hisi_ptt->iobase = pcim_iomap_table(pdev)[2];
1329
1330	ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1331	if (ret) {
1332		pci_err(pdev, "failed to set 64 bit dma mask, ret = %d\n", ret);
1333		return ret;
1334	}
1335
1336	pci_set_master(pdev);
1337
1338	ret = hisi_ptt_register_irq(hisi_ptt);
1339	if (ret)
1340		return ret;
1341
1342	ret = hisi_ptt_init_ctrls(hisi_ptt);
1343	if (ret) {
1344		pci_err(pdev, "failed to init controls, ret = %d\n", ret);
1345		return ret;
1346	}
1347
1348	ret = hisi_ptt_register_filter_update_notifier(hisi_ptt);
1349	if (ret)
1350		pci_warn(pdev, "failed to register filter update notifier, ret = %d", ret);
1351
1352	ret = hisi_ptt_register_pmu(hisi_ptt);
1353	if (ret) {
1354		pci_err(pdev, "failed to register PMU device, ret = %d", ret);
1355		return ret;
1356	}
1357
1358	ret = hisi_ptt_init_filter_attributes(hisi_ptt);
1359	if (ret) {
1360		pci_err(pdev, "failed to init sysfs filter attributes, ret = %d", ret);
1361		return ret;
1362	}
1363
1364	return 0;
1365}
1366
1367static const struct pci_device_id hisi_ptt_id_tbl[] = {
1368	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, 0xa12e) },
1369	{ }
1370};
1371MODULE_DEVICE_TABLE(pci, hisi_ptt_id_tbl);
1372
1373static struct pci_driver hisi_ptt_driver = {
1374	.name = DRV_NAME,
1375	.id_table = hisi_ptt_id_tbl,
1376	.probe = hisi_ptt_probe,
1377};
1378
1379static int hisi_ptt_cpu_teardown(unsigned int cpu, struct hlist_node *node)
1380{
1381	struct hisi_ptt *hisi_ptt;
1382	struct device *dev;
1383	int target, src;
1384
1385	hisi_ptt = hlist_entry_safe(node, struct hisi_ptt, hotplug_node);
1386	src = hisi_ptt->trace_ctrl.on_cpu;
1387	dev = hisi_ptt->hisi_ptt_pmu.dev;
1388
1389	if (!hisi_ptt->trace_ctrl.started || src != cpu)
1390		return 0;
1391
1392	target = cpumask_any_but(cpumask_of_node(dev_to_node(&hisi_ptt->pdev->dev)), cpu);
1393	if (target >= nr_cpu_ids) {
1394		dev_err(dev, "no available cpu for perf context migration\n");
1395		return 0;
1396	}
1397
1398	perf_pmu_migrate_context(&hisi_ptt->hisi_ptt_pmu, src, target);
1399
1400	/*
1401	 * Also make sure the interrupt bind to the migrated CPU as well. Warn
1402	 * the user on failure here.
1403	 */
1404	if (irq_set_affinity(hisi_ptt->trace_irq, cpumask_of(target)))
1405		dev_warn(dev, "failed to set the affinity of trace interrupt\n");
1406
1407	hisi_ptt->trace_ctrl.on_cpu = target;
1408	return 0;
1409}
1410
1411static int __init hisi_ptt_init(void)
1412{
1413	int ret;
1414
1415	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, DRV_NAME, NULL,
1416				      hisi_ptt_cpu_teardown);
1417	if (ret < 0)
1418		return ret;
1419	hisi_ptt_pmu_online = ret;
1420
1421	ret = pci_register_driver(&hisi_ptt_driver);
1422	if (ret)
1423		cpuhp_remove_multi_state(hisi_ptt_pmu_online);
1424
1425	return ret;
1426}
1427module_init(hisi_ptt_init);
1428
1429static void __exit hisi_ptt_exit(void)
1430{
1431	pci_unregister_driver(&hisi_ptt_driver);
1432	cpuhp_remove_multi_state(hisi_ptt_pmu_online);
1433}
1434module_exit(hisi_ptt_exit);
1435
1436MODULE_LICENSE("GPL");
1437MODULE_AUTHOR("Yicong Yang <yangyicong@hisilicon.com>");
1438MODULE_DESCRIPTION("Driver for HiSilicon PCIe tune and trace device");
1439