xref: /kernel/linux/linux-5.10/drivers/pci/p2pdma.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * PCI Peer 2 Peer DMA support.
4 *
5 * Copyright (c) 2016-2018, Logan Gunthorpe
6 * Copyright (c) 2016-2017, Microsemi Corporation
7 * Copyright (c) 2017, Christoph Hellwig
8 * Copyright (c) 2018, Eideticom Inc.
9 */
10
11#define pr_fmt(fmt) "pci-p2pdma: " fmt
12#include <linux/ctype.h>
13#include <linux/pci-p2pdma.h>
14#include <linux/module.h>
15#include <linux/slab.h>
16#include <linux/genalloc.h>
17#include <linux/memremap.h>
18#include <linux/percpu-refcount.h>
19#include <linux/random.h>
20#include <linux/seq_buf.h>
21#include <linux/xarray.h>
22
23enum pci_p2pdma_map_type {
24	PCI_P2PDMA_MAP_UNKNOWN = 0,
25	PCI_P2PDMA_MAP_NOT_SUPPORTED,
26	PCI_P2PDMA_MAP_BUS_ADDR,
27	PCI_P2PDMA_MAP_THRU_HOST_BRIDGE,
28};
29
30struct pci_p2pdma {
31	struct gen_pool *pool;
32	bool p2pmem_published;
33	struct xarray map_types;
34};
35
36struct pci_p2pdma_pagemap {
37	struct dev_pagemap pgmap;
38	struct pci_dev *provider;
39	u64 bus_offset;
40};
41
42static struct pci_p2pdma_pagemap *to_p2p_pgmap(struct dev_pagemap *pgmap)
43{
44	return container_of(pgmap, struct pci_p2pdma_pagemap, pgmap);
45}
46
47static ssize_t size_show(struct device *dev, struct device_attribute *attr,
48			 char *buf)
49{
50	struct pci_dev *pdev = to_pci_dev(dev);
51	size_t size = 0;
52
53	if (pdev->p2pdma->pool)
54		size = gen_pool_size(pdev->p2pdma->pool);
55
56	return scnprintf(buf, PAGE_SIZE, "%zd\n", size);
57}
58static DEVICE_ATTR_RO(size);
59
60static ssize_t available_show(struct device *dev, struct device_attribute *attr,
61			      char *buf)
62{
63	struct pci_dev *pdev = to_pci_dev(dev);
64	size_t avail = 0;
65
66	if (pdev->p2pdma->pool)
67		avail = gen_pool_avail(pdev->p2pdma->pool);
68
69	return scnprintf(buf, PAGE_SIZE, "%zd\n", avail);
70}
71static DEVICE_ATTR_RO(available);
72
73static ssize_t published_show(struct device *dev, struct device_attribute *attr,
74			      char *buf)
75{
76	struct pci_dev *pdev = to_pci_dev(dev);
77
78	return scnprintf(buf, PAGE_SIZE, "%d\n",
79			 pdev->p2pdma->p2pmem_published);
80}
81static DEVICE_ATTR_RO(published);
82
83static struct attribute *p2pmem_attrs[] = {
84	&dev_attr_size.attr,
85	&dev_attr_available.attr,
86	&dev_attr_published.attr,
87	NULL,
88};
89
90static const struct attribute_group p2pmem_group = {
91	.attrs = p2pmem_attrs,
92	.name = "p2pmem",
93};
94
95static void pci_p2pdma_release(void *data)
96{
97	struct pci_dev *pdev = data;
98	struct pci_p2pdma *p2pdma = pdev->p2pdma;
99
100	if (!p2pdma)
101		return;
102
103	/* Flush and disable pci_alloc_p2p_mem() */
104	pdev->p2pdma = NULL;
105	synchronize_rcu();
106
107	gen_pool_destroy(p2pdma->pool);
108	sysfs_remove_group(&pdev->dev.kobj, &p2pmem_group);
109	xa_destroy(&p2pdma->map_types);
110}
111
112static int pci_p2pdma_setup(struct pci_dev *pdev)
113{
114	int error = -ENOMEM;
115	struct pci_p2pdma *p2p;
116
117	p2p = devm_kzalloc(&pdev->dev, sizeof(*p2p), GFP_KERNEL);
118	if (!p2p)
119		return -ENOMEM;
120
121	xa_init(&p2p->map_types);
122
123	p2p->pool = gen_pool_create(PAGE_SHIFT, dev_to_node(&pdev->dev));
124	if (!p2p->pool)
125		goto out;
126
127	error = devm_add_action_or_reset(&pdev->dev, pci_p2pdma_release, pdev);
128	if (error)
129		goto out_pool_destroy;
130
131	pdev->p2pdma = p2p;
132
133	error = sysfs_create_group(&pdev->dev.kobj, &p2pmem_group);
134	if (error)
135		goto out_pool_destroy;
136
137	return 0;
138
139out_pool_destroy:
140	pdev->p2pdma = NULL;
141	gen_pool_destroy(p2p->pool);
142out:
143	devm_kfree(&pdev->dev, p2p);
144	return error;
145}
146
147/**
148 * pci_p2pdma_add_resource - add memory for use as p2p memory
149 * @pdev: the device to add the memory to
150 * @bar: PCI BAR to add
151 * @size: size of the memory to add, may be zero to use the whole BAR
152 * @offset: offset into the PCI BAR
153 *
154 * The memory will be given ZONE_DEVICE struct pages so that it may
155 * be used with any DMA request.
156 */
157int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
158			    u64 offset)
159{
160	struct pci_p2pdma_pagemap *p2p_pgmap;
161	struct dev_pagemap *pgmap;
162	void *addr;
163	int error;
164
165	if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
166		return -EINVAL;
167
168	if (offset >= pci_resource_len(pdev, bar))
169		return -EINVAL;
170
171	if (!size)
172		size = pci_resource_len(pdev, bar) - offset;
173
174	if (size + offset > pci_resource_len(pdev, bar))
175		return -EINVAL;
176
177	if (!pdev->p2pdma) {
178		error = pci_p2pdma_setup(pdev);
179		if (error)
180			return error;
181	}
182
183	p2p_pgmap = devm_kzalloc(&pdev->dev, sizeof(*p2p_pgmap), GFP_KERNEL);
184	if (!p2p_pgmap)
185		return -ENOMEM;
186
187	pgmap = &p2p_pgmap->pgmap;
188	pgmap->range.start = pci_resource_start(pdev, bar) + offset;
189	pgmap->range.end = pgmap->range.start + size - 1;
190	pgmap->nr_range = 1;
191	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
192
193	p2p_pgmap->provider = pdev;
194	p2p_pgmap->bus_offset = pci_bus_address(pdev, bar) -
195		pci_resource_start(pdev, bar);
196
197	addr = devm_memremap_pages(&pdev->dev, pgmap);
198	if (IS_ERR(addr)) {
199		error = PTR_ERR(addr);
200		goto pgmap_free;
201	}
202
203	error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
204			pci_bus_address(pdev, bar) + offset,
205			range_len(&pgmap->range), dev_to_node(&pdev->dev),
206			pgmap->ref);
207	if (error)
208		goto pages_free;
209
210	pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n",
211		 pgmap->range.start, pgmap->range.end);
212
213	return 0;
214
215pages_free:
216	devm_memunmap_pages(&pdev->dev, pgmap);
217pgmap_free:
218	devm_kfree(&pdev->dev, pgmap);
219	return error;
220}
221EXPORT_SYMBOL_GPL(pci_p2pdma_add_resource);
222
223/*
224 * Note this function returns the parent PCI device with a
225 * reference taken. It is the caller's responsibility to drop
226 * the reference.
227 */
228static struct pci_dev *find_parent_pci_dev(struct device *dev)
229{
230	struct device *parent;
231
232	dev = get_device(dev);
233
234	while (dev) {
235		if (dev_is_pci(dev))
236			return to_pci_dev(dev);
237
238		parent = get_device(dev->parent);
239		put_device(dev);
240		dev = parent;
241	}
242
243	return NULL;
244}
245
246/*
247 * Check if a PCI bridge has its ACS redirection bits set to redirect P2P
248 * TLPs upstream via ACS. Returns 1 if the packets will be redirected
249 * upstream, 0 otherwise.
250 */
251static int pci_bridge_has_acs_redir(struct pci_dev *pdev)
252{
253	int pos;
254	u16 ctrl;
255
256	pos = pdev->acs_cap;
257	if (!pos)
258		return 0;
259
260	pci_read_config_word(pdev, pos + PCI_ACS_CTRL, &ctrl);
261
262	if (ctrl & (PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_EC))
263		return 1;
264
265	return 0;
266}
267
268static void seq_buf_print_bus_devfn(struct seq_buf *buf, struct pci_dev *pdev)
269{
270	if (!buf)
271		return;
272
273	seq_buf_printf(buf, "%s;", pci_name(pdev));
274}
275
276static bool cpu_supports_p2pdma(void)
277{
278#ifdef CONFIG_X86
279	struct cpuinfo_x86 *c = &cpu_data(0);
280
281	/* Any AMD CPU whose family ID is Zen or newer supports p2pdma */
282	if (c->x86_vendor == X86_VENDOR_AMD && c->x86 >= 0x17)
283		return true;
284#endif
285
286	return false;
287}
288
289static const struct pci_p2pdma_whitelist_entry {
290	unsigned short vendor;
291	unsigned short device;
292	enum {
293		REQ_SAME_HOST_BRIDGE	= 1 << 0,
294	} flags;
295} pci_p2pdma_whitelist[] = {
296	/* Intel Xeon E5/Core i7 */
297	{PCI_VENDOR_ID_INTEL,	0x3c00, REQ_SAME_HOST_BRIDGE},
298	{PCI_VENDOR_ID_INTEL,	0x3c01, REQ_SAME_HOST_BRIDGE},
299	/* Intel Xeon E7 v3/Xeon E5 v3/Core i7 */
300	{PCI_VENDOR_ID_INTEL,	0x2f00, REQ_SAME_HOST_BRIDGE},
301	{PCI_VENDOR_ID_INTEL,	0x2f01, REQ_SAME_HOST_BRIDGE},
302	/* Intel SkyLake-E */
303	{PCI_VENDOR_ID_INTEL,	0x2030, 0},
304	{PCI_VENDOR_ID_INTEL,	0x2031, 0},
305	{PCI_VENDOR_ID_INTEL,	0x2032, 0},
306	{PCI_VENDOR_ID_INTEL,	0x2033, 0},
307	{PCI_VENDOR_ID_INTEL,	0x2020, 0},
308	{}
309};
310
311/*
312 * This lookup function tries to find the PCI device corresponding to a given
313 * host bridge.
314 *
315 * It assumes the host bridge device is the first PCI device in the
316 * bus->devices list and that the devfn is 00.0. These assumptions should hold
317 * for all the devices in the whitelist above.
318 *
319 * This function is equivalent to pci_get_slot(host->bus, 0), however it does
320 * not take the pci_bus_sem lock seeing __host_bridge_whitelist() must not
321 * sleep.
322 *
323 * For this to be safe, the caller should hold a reference to a device on the
324 * bridge, which should ensure the host_bridge device will not be freed
325 * or removed from the head of the devices list.
326 */
327static struct pci_dev *pci_host_bridge_dev(struct pci_host_bridge *host)
328{
329	struct pci_dev *root;
330
331	root = list_first_entry_or_null(&host->bus->devices,
332					struct pci_dev, bus_list);
333
334	if (!root)
335		return NULL;
336	if (root->devfn != PCI_DEVFN(0, 0))
337		return NULL;
338
339	return root;
340}
341
342static bool __host_bridge_whitelist(struct pci_host_bridge *host,
343				    bool same_host_bridge)
344{
345	struct pci_dev *root = pci_host_bridge_dev(host);
346	const struct pci_p2pdma_whitelist_entry *entry;
347	unsigned short vendor, device;
348
349	if (!root)
350		return false;
351
352	vendor = root->vendor;
353	device = root->device;
354
355	for (entry = pci_p2pdma_whitelist; entry->vendor; entry++) {
356		if (vendor != entry->vendor || device != entry->device)
357			continue;
358		if (entry->flags & REQ_SAME_HOST_BRIDGE && !same_host_bridge)
359			return false;
360
361		return true;
362	}
363
364	return false;
365}
366
367/*
368 * If we can't find a common upstream bridge take a look at the root
369 * complex and compare it to a whitelist of known good hardware.
370 */
371static bool host_bridge_whitelist(struct pci_dev *a, struct pci_dev *b)
372{
373	struct pci_host_bridge *host_a = pci_find_host_bridge(a->bus);
374	struct pci_host_bridge *host_b = pci_find_host_bridge(b->bus);
375
376	if (host_a == host_b)
377		return __host_bridge_whitelist(host_a, true);
378
379	if (__host_bridge_whitelist(host_a, false) &&
380	    __host_bridge_whitelist(host_b, false))
381		return true;
382
383	return false;
384}
385
386static enum pci_p2pdma_map_type
387__upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client,
388		int *dist, bool *acs_redirects, struct seq_buf *acs_list)
389{
390	struct pci_dev *a = provider, *b = client, *bb;
391	int dist_a = 0;
392	int dist_b = 0;
393	int acs_cnt = 0;
394
395	if (acs_redirects)
396		*acs_redirects = false;
397
398	/*
399	 * Note, we don't need to take references to devices returned by
400	 * pci_upstream_bridge() seeing we hold a reference to a child
401	 * device which will already hold a reference to the upstream bridge.
402	 */
403
404	while (a) {
405		dist_b = 0;
406
407		if (pci_bridge_has_acs_redir(a)) {
408			seq_buf_print_bus_devfn(acs_list, a);
409			acs_cnt++;
410		}
411
412		bb = b;
413
414		while (bb) {
415			if (a == bb)
416				goto check_b_path_acs;
417
418			bb = pci_upstream_bridge(bb);
419			dist_b++;
420		}
421
422		a = pci_upstream_bridge(a);
423		dist_a++;
424	}
425
426	if (dist)
427		*dist = dist_a + dist_b;
428
429	return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
430
431check_b_path_acs:
432	bb = b;
433
434	while (bb) {
435		if (a == bb)
436			break;
437
438		if (pci_bridge_has_acs_redir(bb)) {
439			seq_buf_print_bus_devfn(acs_list, bb);
440			acs_cnt++;
441		}
442
443		bb = pci_upstream_bridge(bb);
444	}
445
446	if (dist)
447		*dist = dist_a + dist_b;
448
449	if (acs_cnt) {
450		if (acs_redirects)
451			*acs_redirects = true;
452
453		return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE;
454	}
455
456	return PCI_P2PDMA_MAP_BUS_ADDR;
457}
458
459static unsigned long map_types_idx(struct pci_dev *client)
460{
461	return (pci_domain_nr(client->bus) << 16) |
462		(client->bus->number << 8) | client->devfn;
463}
464
465/*
466 * Find the distance through the nearest common upstream bridge between
467 * two PCI devices.
468 *
469 * If the two devices are the same device then 0 will be returned.
470 *
471 * If there are two virtual functions of the same device behind the same
472 * bridge port then 2 will be returned (one step down to the PCIe switch,
473 * then one step back to the same device).
474 *
475 * In the case where two devices are connected to the same PCIe switch, the
476 * value 4 will be returned. This corresponds to the following PCI tree:
477 *
478 *     -+  Root Port
479 *      \+ Switch Upstream Port
480 *       +-+ Switch Downstream Port
481 *       + \- Device A
482 *       \-+ Switch Downstream Port
483 *         \- Device B
484 *
485 * The distance is 4 because we traverse from Device A through the downstream
486 * port of the switch, to the common upstream port, back up to the second
487 * downstream port and then to Device B.
488 *
489 * Any two devices that cannot communicate using p2pdma will return
490 * PCI_P2PDMA_MAP_NOT_SUPPORTED.
491 *
492 * Any two devices that have a data path that goes through the host bridge
493 * will consult a whitelist. If the host bridges are on the whitelist,
494 * this function will return PCI_P2PDMA_MAP_THRU_HOST_BRIDGE.
495 *
496 * If either bridge is not on the whitelist this function returns
497 * PCI_P2PDMA_MAP_NOT_SUPPORTED.
498 *
499 * If a bridge which has any ACS redirection bits set is in the path,
500 * acs_redirects will be set to true. In this case, a list of all infringing
501 * bridge addresses will be populated in acs_list (assuming it's non-null)
502 * for printk purposes.
503 */
504static enum pci_p2pdma_map_type
505upstream_bridge_distance(struct pci_dev *provider, struct pci_dev *client,
506		int *dist, bool *acs_redirects, struct seq_buf *acs_list)
507{
508	enum pci_p2pdma_map_type map_type;
509
510	map_type = __upstream_bridge_distance(provider, client, dist,
511					      acs_redirects, acs_list);
512
513	if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE) {
514		if (!cpu_supports_p2pdma() &&
515		    !host_bridge_whitelist(provider, client))
516			map_type = PCI_P2PDMA_MAP_NOT_SUPPORTED;
517	}
518
519	if (provider->p2pdma)
520		xa_store(&provider->p2pdma->map_types, map_types_idx(client),
521			 xa_mk_value(map_type), GFP_KERNEL);
522
523	return map_type;
524}
525
526static enum pci_p2pdma_map_type
527upstream_bridge_distance_warn(struct pci_dev *provider, struct pci_dev *client,
528			      int *dist)
529{
530	struct seq_buf acs_list;
531	bool acs_redirects;
532	int ret;
533
534	seq_buf_init(&acs_list, kmalloc(PAGE_SIZE, GFP_KERNEL), PAGE_SIZE);
535	if (!acs_list.buffer)
536		return -ENOMEM;
537
538	ret = upstream_bridge_distance(provider, client, dist, &acs_redirects,
539				       &acs_list);
540	if (acs_redirects) {
541		pci_warn(client, "ACS redirect is set between the client and provider (%s)\n",
542			 pci_name(provider));
543		/* Drop final semicolon */
544		acs_list.buffer[acs_list.len-1] = 0;
545		pci_warn(client, "to disable ACS redirect for this path, add the kernel parameter: pci=disable_acs_redir=%s\n",
546			 acs_list.buffer);
547	}
548
549	if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED) {
550		pci_warn(client, "cannot be used for peer-to-peer DMA as the client and provider (%s) do not share an upstream bridge or whitelisted host bridge\n",
551			 pci_name(provider));
552	}
553
554	kfree(acs_list.buffer);
555
556	return ret;
557}
558
559/**
560 * pci_p2pdma_distance_many - Determine the cumulative distance between
561 *	a p2pdma provider and the clients in use.
562 * @provider: p2pdma provider to check against the client list
563 * @clients: array of devices to check (NULL-terminated)
564 * @num_clients: number of clients in the array
565 * @verbose: if true, print warnings for devices when we return -1
566 *
567 * Returns -1 if any of the clients are not compatible, otherwise returns a
568 * positive number where a lower number is the preferable choice. (If there's
569 * one client that's the same as the provider it will return 0, which is best
570 * choice).
571 *
572 * "compatible" means the provider and the clients are either all behind
573 * the same PCI root port or the host bridges connected to each of the devices
574 * are listed in the 'pci_p2pdma_whitelist'.
575 */
576int pci_p2pdma_distance_many(struct pci_dev *provider, struct device **clients,
577			     int num_clients, bool verbose)
578{
579	bool not_supported = false;
580	struct pci_dev *pci_client;
581	int total_dist = 0;
582	int distance;
583	int i, ret;
584
585	if (num_clients == 0)
586		return -1;
587
588	for (i = 0; i < num_clients; i++) {
589#ifdef CONFIG_DMA_VIRT_OPS
590		if (clients[i]->dma_ops == &dma_virt_ops) {
591			if (verbose)
592				dev_warn(clients[i],
593					 "cannot be used for peer-to-peer DMA because the driver makes use of dma_virt_ops\n");
594			return -1;
595		}
596#endif
597
598		pci_client = find_parent_pci_dev(clients[i]);
599		if (!pci_client) {
600			if (verbose)
601				dev_warn(clients[i],
602					 "cannot be used for peer-to-peer DMA as it is not a PCI device\n");
603			return -1;
604		}
605
606		if (verbose)
607			ret = upstream_bridge_distance_warn(provider,
608					pci_client, &distance);
609		else
610			ret = upstream_bridge_distance(provider, pci_client,
611						       &distance, NULL, NULL);
612
613		pci_dev_put(pci_client);
614
615		if (ret == PCI_P2PDMA_MAP_NOT_SUPPORTED)
616			not_supported = true;
617
618		if (not_supported && !verbose)
619			break;
620
621		total_dist += distance;
622	}
623
624	if (not_supported)
625		return -1;
626
627	return total_dist;
628}
629EXPORT_SYMBOL_GPL(pci_p2pdma_distance_many);
630
631/**
632 * pci_has_p2pmem - check if a given PCI device has published any p2pmem
633 * @pdev: PCI device to check
634 */
635bool pci_has_p2pmem(struct pci_dev *pdev)
636{
637	return pdev->p2pdma && pdev->p2pdma->p2pmem_published;
638}
639EXPORT_SYMBOL_GPL(pci_has_p2pmem);
640
641/**
642 * pci_p2pmem_find - find a peer-to-peer DMA memory device compatible with
643 *	the specified list of clients and shortest distance (as determined
644 *	by pci_p2pmem_dma())
645 * @clients: array of devices to check (NULL-terminated)
646 * @num_clients: number of client devices in the list
647 *
648 * If multiple devices are behind the same switch, the one "closest" to the
649 * client devices in use will be chosen first. (So if one of the providers is
650 * the same as one of the clients, that provider will be used ahead of any
651 * other providers that are unrelated). If multiple providers are an equal
652 * distance away, one will be chosen at random.
653 *
654 * Returns a pointer to the PCI device with a reference taken (use pci_dev_put
655 * to return the reference) or NULL if no compatible device is found. The
656 * found provider will also be assigned to the client list.
657 */
658struct pci_dev *pci_p2pmem_find_many(struct device **clients, int num_clients)
659{
660	struct pci_dev *pdev = NULL;
661	int distance;
662	int closest_distance = INT_MAX;
663	struct pci_dev **closest_pdevs;
664	int dev_cnt = 0;
665	const int max_devs = PAGE_SIZE / sizeof(*closest_pdevs);
666	int i;
667
668	closest_pdevs = kmalloc(PAGE_SIZE, GFP_KERNEL);
669	if (!closest_pdevs)
670		return NULL;
671
672	while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) {
673		if (!pci_has_p2pmem(pdev))
674			continue;
675
676		distance = pci_p2pdma_distance_many(pdev, clients,
677						    num_clients, false);
678		if (distance < 0 || distance > closest_distance)
679			continue;
680
681		if (distance == closest_distance && dev_cnt >= max_devs)
682			continue;
683
684		if (distance < closest_distance) {
685			for (i = 0; i < dev_cnt; i++)
686				pci_dev_put(closest_pdevs[i]);
687
688			dev_cnt = 0;
689			closest_distance = distance;
690		}
691
692		closest_pdevs[dev_cnt++] = pci_dev_get(pdev);
693	}
694
695	if (dev_cnt)
696		pdev = pci_dev_get(closest_pdevs[prandom_u32_max(dev_cnt)]);
697
698	for (i = 0; i < dev_cnt; i++)
699		pci_dev_put(closest_pdevs[i]);
700
701	kfree(closest_pdevs);
702	return pdev;
703}
704EXPORT_SYMBOL_GPL(pci_p2pmem_find_many);
705
706/**
707 * pci_alloc_p2p_mem - allocate peer-to-peer DMA memory
708 * @pdev: the device to allocate memory from
709 * @size: number of bytes to allocate
710 *
711 * Returns the allocated memory or NULL on error.
712 */
713void *pci_alloc_p2pmem(struct pci_dev *pdev, size_t size)
714{
715	void *ret = NULL;
716	struct percpu_ref *ref;
717
718	/*
719	 * Pairs with synchronize_rcu() in pci_p2pdma_release() to
720	 * ensure pdev->p2pdma is non-NULL for the duration of the
721	 * read-lock.
722	 */
723	rcu_read_lock();
724	if (unlikely(!pdev->p2pdma))
725		goto out;
726
727	ret = (void *)gen_pool_alloc_owner(pdev->p2pdma->pool, size,
728			(void **) &ref);
729	if (!ret)
730		goto out;
731
732	if (unlikely(!percpu_ref_tryget_live(ref))) {
733		gen_pool_free(pdev->p2pdma->pool, (unsigned long) ret, size);
734		ret = NULL;
735		goto out;
736	}
737out:
738	rcu_read_unlock();
739	return ret;
740}
741EXPORT_SYMBOL_GPL(pci_alloc_p2pmem);
742
743/**
744 * pci_free_p2pmem - free peer-to-peer DMA memory
745 * @pdev: the device the memory was allocated from
746 * @addr: address of the memory that was allocated
747 * @size: number of bytes that were allocated
748 */
749void pci_free_p2pmem(struct pci_dev *pdev, void *addr, size_t size)
750{
751	struct percpu_ref *ref;
752
753	gen_pool_free_owner(pdev->p2pdma->pool, (uintptr_t)addr, size,
754			(void **) &ref);
755	percpu_ref_put(ref);
756}
757EXPORT_SYMBOL_GPL(pci_free_p2pmem);
758
759/**
760 * pci_virt_to_bus - return the PCI bus address for a given virtual
761 *	address obtained with pci_alloc_p2pmem()
762 * @pdev: the device the memory was allocated from
763 * @addr: address of the memory that was allocated
764 */
765pci_bus_addr_t pci_p2pmem_virt_to_bus(struct pci_dev *pdev, void *addr)
766{
767	if (!addr)
768		return 0;
769	if (!pdev->p2pdma)
770		return 0;
771
772	/*
773	 * Note: when we added the memory to the pool we used the PCI
774	 * bus address as the physical address. So gen_pool_virt_to_phys()
775	 * actually returns the bus address despite the misleading name.
776	 */
777	return gen_pool_virt_to_phys(pdev->p2pdma->pool, (unsigned long)addr);
778}
779EXPORT_SYMBOL_GPL(pci_p2pmem_virt_to_bus);
780
781/**
782 * pci_p2pmem_alloc_sgl - allocate peer-to-peer DMA memory in a scatterlist
783 * @pdev: the device to allocate memory from
784 * @nents: the number of SG entries in the list
785 * @length: number of bytes to allocate
786 *
787 * Return: %NULL on error or &struct scatterlist pointer and @nents on success
788 */
789struct scatterlist *pci_p2pmem_alloc_sgl(struct pci_dev *pdev,
790					 unsigned int *nents, u32 length)
791{
792	struct scatterlist *sg;
793	void *addr;
794
795	sg = kmalloc(sizeof(*sg), GFP_KERNEL);
796	if (!sg)
797		return NULL;
798
799	sg_init_table(sg, 1);
800
801	addr = pci_alloc_p2pmem(pdev, length);
802	if (!addr)
803		goto out_free_sg;
804
805	sg_set_buf(sg, addr, length);
806	*nents = 1;
807	return sg;
808
809out_free_sg:
810	kfree(sg);
811	return NULL;
812}
813EXPORT_SYMBOL_GPL(pci_p2pmem_alloc_sgl);
814
815/**
816 * pci_p2pmem_free_sgl - free a scatterlist allocated by pci_p2pmem_alloc_sgl()
817 * @pdev: the device to allocate memory from
818 * @sgl: the allocated scatterlist
819 */
820void pci_p2pmem_free_sgl(struct pci_dev *pdev, struct scatterlist *sgl)
821{
822	struct scatterlist *sg;
823	int count;
824
825	for_each_sg(sgl, sg, INT_MAX, count) {
826		if (!sg)
827			break;
828
829		pci_free_p2pmem(pdev, sg_virt(sg), sg->length);
830	}
831	kfree(sgl);
832}
833EXPORT_SYMBOL_GPL(pci_p2pmem_free_sgl);
834
835/**
836 * pci_p2pmem_publish - publish the peer-to-peer DMA memory for use by
837 *	other devices with pci_p2pmem_find()
838 * @pdev: the device with peer-to-peer DMA memory to publish
839 * @publish: set to true to publish the memory, false to unpublish it
840 *
841 * Published memory can be used by other PCI device drivers for
842 * peer-2-peer DMA operations. Non-published memory is reserved for
843 * exclusive use of the device driver that registers the peer-to-peer
844 * memory.
845 */
846void pci_p2pmem_publish(struct pci_dev *pdev, bool publish)
847{
848	if (pdev->p2pdma)
849		pdev->p2pdma->p2pmem_published = publish;
850}
851EXPORT_SYMBOL_GPL(pci_p2pmem_publish);
852
853static enum pci_p2pdma_map_type pci_p2pdma_map_type(struct pci_dev *provider,
854						    struct pci_dev *client)
855{
856	if (!provider->p2pdma)
857		return PCI_P2PDMA_MAP_NOT_SUPPORTED;
858
859	return xa_to_value(xa_load(&provider->p2pdma->map_types,
860				   map_types_idx(client)));
861}
862
863static int __pci_p2pdma_map_sg(struct pci_p2pdma_pagemap *p2p_pgmap,
864		struct device *dev, struct scatterlist *sg, int nents)
865{
866	struct scatterlist *s;
867	phys_addr_t paddr;
868	int i;
869
870	/*
871	 * p2pdma mappings are not compatible with devices that use
872	 * dma_virt_ops. If the upper layers do the right thing
873	 * this should never happen because it will be prevented
874	 * by the check in pci_p2pdma_distance_many()
875	 */
876#ifdef CONFIG_DMA_VIRT_OPS
877	if (WARN_ON_ONCE(dev->dma_ops == &dma_virt_ops))
878		return 0;
879#endif
880
881	for_each_sg(sg, s, nents, i) {
882		paddr = sg_phys(s);
883
884		s->dma_address = paddr - p2p_pgmap->bus_offset;
885		sg_dma_len(s) = s->length;
886	}
887
888	return nents;
889}
890
891/**
892 * pci_p2pdma_map_sg - map a PCI peer-to-peer scatterlist for DMA
893 * @dev: device doing the DMA request
894 * @sg: scatter list to map
895 * @nents: elements in the scatterlist
896 * @dir: DMA direction
897 * @attrs: DMA attributes passed to dma_map_sg() (if called)
898 *
899 * Scatterlists mapped with this function should be unmapped using
900 * pci_p2pdma_unmap_sg_attrs().
901 *
902 * Returns the number of SG entries mapped or 0 on error.
903 */
904int pci_p2pdma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
905		int nents, enum dma_data_direction dir, unsigned long attrs)
906{
907	struct pci_p2pdma_pagemap *p2p_pgmap =
908		to_p2p_pgmap(sg_page(sg)->pgmap);
909	struct pci_dev *client;
910
911	if (WARN_ON_ONCE(!dev_is_pci(dev)))
912		return 0;
913
914	client = to_pci_dev(dev);
915
916	switch (pci_p2pdma_map_type(p2p_pgmap->provider, client)) {
917	case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
918		return dma_map_sg_attrs(dev, sg, nents, dir, attrs);
919	case PCI_P2PDMA_MAP_BUS_ADDR:
920		return __pci_p2pdma_map_sg(p2p_pgmap, dev, sg, nents);
921	default:
922		WARN_ON_ONCE(1);
923		return 0;
924	}
925}
926EXPORT_SYMBOL_GPL(pci_p2pdma_map_sg_attrs);
927
928/**
929 * pci_p2pdma_unmap_sg - unmap a PCI peer-to-peer scatterlist that was
930 *	mapped with pci_p2pdma_map_sg()
931 * @dev: device doing the DMA request
932 * @sg: scatter list to map
933 * @nents: number of elements returned by pci_p2pdma_map_sg()
934 * @dir: DMA direction
935 * @attrs: DMA attributes passed to dma_unmap_sg() (if called)
936 */
937void pci_p2pdma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
938		int nents, enum dma_data_direction dir, unsigned long attrs)
939{
940	struct pci_p2pdma_pagemap *p2p_pgmap =
941		to_p2p_pgmap(sg_page(sg)->pgmap);
942	enum pci_p2pdma_map_type map_type;
943	struct pci_dev *client;
944
945	if (WARN_ON_ONCE(!dev_is_pci(dev)))
946		return;
947
948	client = to_pci_dev(dev);
949
950	map_type = pci_p2pdma_map_type(p2p_pgmap->provider, client);
951
952	if (map_type == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
953		dma_unmap_sg_attrs(dev, sg, nents, dir, attrs);
954}
955EXPORT_SYMBOL_GPL(pci_p2pdma_unmap_sg_attrs);
956
957/**
958 * pci_p2pdma_enable_store - parse a configfs/sysfs attribute store
959 *		to enable p2pdma
960 * @page: contents of the value to be stored
961 * @p2p_dev: returns the PCI device that was selected to be used
962 *		(if one was specified in the stored value)
963 * @use_p2pdma: returns whether to enable p2pdma or not
964 *
965 * Parses an attribute value to decide whether to enable p2pdma.
966 * The value can select a PCI device (using its full BDF device
967 * name) or a boolean (in any format strtobool() accepts). A false
968 * value disables p2pdma, a true value expects the caller
969 * to automatically find a compatible device and specifying a PCI device
970 * expects the caller to use the specific provider.
971 *
972 * pci_p2pdma_enable_show() should be used as the show operation for
973 * the attribute.
974 *
975 * Returns 0 on success
976 */
977int pci_p2pdma_enable_store(const char *page, struct pci_dev **p2p_dev,
978			    bool *use_p2pdma)
979{
980	struct device *dev;
981
982	dev = bus_find_device_by_name(&pci_bus_type, NULL, page);
983	if (dev) {
984		*use_p2pdma = true;
985		*p2p_dev = to_pci_dev(dev);
986
987		if (!pci_has_p2pmem(*p2p_dev)) {
988			pci_err(*p2p_dev,
989				"PCI device has no peer-to-peer memory: %s\n",
990				page);
991			pci_dev_put(*p2p_dev);
992			return -ENODEV;
993		}
994
995		return 0;
996	} else if ((page[0] == '0' || page[0] == '1') && !iscntrl(page[1])) {
997		/*
998		 * If the user enters a PCI device that  doesn't exist
999		 * like "0000:01:00.1", we don't want strtobool to think
1000		 * it's a '0' when it's clearly not what the user wanted.
1001		 * So we require 0's and 1's to be exactly one character.
1002		 */
1003	} else if (!strtobool(page, use_p2pdma)) {
1004		return 0;
1005	}
1006
1007	pr_err("No such PCI device: %.*s\n", (int)strcspn(page, "\n"), page);
1008	return -ENODEV;
1009}
1010EXPORT_SYMBOL_GPL(pci_p2pdma_enable_store);
1011
1012/**
1013 * pci_p2pdma_enable_show - show a configfs/sysfs attribute indicating
1014 *		whether p2pdma is enabled
1015 * @page: contents of the stored value
1016 * @p2p_dev: the selected p2p device (NULL if no device is selected)
1017 * @use_p2pdma: whether p2pdma has been enabled
1018 *
1019 * Attributes that use pci_p2pdma_enable_store() should use this function
1020 * to show the value of the attribute.
1021 *
1022 * Returns 0 on success
1023 */
1024ssize_t pci_p2pdma_enable_show(char *page, struct pci_dev *p2p_dev,
1025			       bool use_p2pdma)
1026{
1027	if (!use_p2pdma)
1028		return sprintf(page, "0\n");
1029
1030	if (!p2p_dev)
1031		return sprintf(page, "1\n");
1032
1033	return sprintf(page, "%s\n", pci_name(p2p_dev));
1034}
1035EXPORT_SYMBOL_GPL(pci_p2pdma_enable_show);
1036