1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * TCE helpers for IODA PCI/PCIe on PowerNV platforms
4 *
5 * Copyright 2018 IBM Corp.
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
13#include <linux/kernel.h>
14#include <linux/iommu.h>
15
16#include <asm/iommu.h>
17#include <asm/tce.h>
18#include "pci.h"
19
20unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb)
21{
22	struct pci_controller *hose = phb->hose;
23	struct device_node *dn = hose->dn;
24	unsigned long mask = 0;
25	int i, rc, count;
26	u32 val;
27
28	count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes");
29	if (count <= 0) {
30		mask = SZ_4K | SZ_64K;
31		/* Add 16M for POWER8 by default */
32		if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
33				!cpu_has_feature(CPU_FTR_ARCH_300))
34			mask |= SZ_16M | SZ_256M;
35		return mask;
36	}
37
38	for (i = 0; i < count; i++) {
39		rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes",
40						i, &val);
41		if (rc == 0)
42			mask |= 1ULL << val;
43	}
44
45	return mask;
46}
47
48void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
49		void *tce_mem, u64 tce_size,
50		u64 dma_offset, unsigned int page_shift)
51{
52	tbl->it_blocksize = 16;
53	tbl->it_base = (unsigned long)tce_mem;
54	tbl->it_page_shift = page_shift;
55	tbl->it_offset = dma_offset >> tbl->it_page_shift;
56	tbl->it_index = 0;
57	tbl->it_size = tce_size >> 3;
58	tbl->it_busno = 0;
59	tbl->it_type = TCE_PCI;
60}
61
62static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift)
63{
64	struct page *tce_mem = NULL;
65	__be64 *addr;
66
67	tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN,
68			shift - PAGE_SHIFT);
69	if (!tce_mem) {
70		pr_err("Failed to allocate a TCE memory, level shift=%d\n",
71				shift);
72		return NULL;
73	}
74	addr = page_address(tce_mem);
75	memset(addr, 0, 1UL << shift);
76
77	return addr;
78}
79
80static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
81		unsigned long size, unsigned int levels);
82
83static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc)
84{
85	__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
86	int  level = tbl->it_indirect_levels;
87	const long shift = ilog2(tbl->it_level_size);
88	unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
89
90	while (level) {
91		int n = (idx & mask) >> (level * shift);
92		unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n]));
93
94		if (!tce) {
95			__be64 *tmp2;
96
97			if (!alloc)
98				return NULL;
99
100			tmp2 = pnv_alloc_tce_level(tbl->it_nid,
101					ilog2(tbl->it_level_size) + 3);
102			if (!tmp2)
103				return NULL;
104
105			tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE;
106			oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0,
107					cpu_to_be64(tce)));
108			if (oldtce) {
109				pnv_pci_ioda2_table_do_free_pages(tmp2,
110					ilog2(tbl->it_level_size) + 3, 1);
111				tce = oldtce;
112			}
113		}
114
115		tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE));
116		idx &= ~mask;
117		mask >>= shift;
118		--level;
119	}
120
121	return tmp + idx;
122}
123
124int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
125		unsigned long uaddr, enum dma_data_direction direction,
126		unsigned long attrs)
127{
128	u64 proto_tce = iommu_direction_to_tce_perm(direction);
129	u64 rpn = __pa(uaddr) >> tbl->it_page_shift;
130	long i;
131
132	if (proto_tce & TCE_PCI_WRITE)
133		proto_tce |= TCE_PCI_READ;
134
135	for (i = 0; i < npages; i++) {
136		unsigned long newtce = proto_tce |
137			((rpn + i) << tbl->it_page_shift);
138		unsigned long idx = index - tbl->it_offset + i;
139
140		*(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce);
141	}
142
143	return 0;
144}
145
146#ifdef CONFIG_IOMMU_API
147int pnv_tce_xchg(struct iommu_table *tbl, long index,
148		unsigned long *hpa, enum dma_data_direction *direction,
149		bool alloc)
150{
151	u64 proto_tce = iommu_direction_to_tce_perm(*direction);
152	unsigned long newtce = *hpa | proto_tce, oldtce;
153	unsigned long idx = index - tbl->it_offset;
154	__be64 *ptce = NULL;
155
156	BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl));
157
158	if (*direction == DMA_NONE) {
159		ptce = pnv_tce(tbl, false, idx, false);
160		if (!ptce) {
161			*hpa = 0;
162			return 0;
163		}
164	}
165
166	if (!ptce) {
167		ptce = pnv_tce(tbl, false, idx, alloc);
168		if (!ptce)
169			return -ENOMEM;
170	}
171
172	if (newtce & TCE_PCI_WRITE)
173		newtce |= TCE_PCI_READ;
174
175	oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce)));
176	*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
177	*direction = iommu_tce_direction(oldtce);
178
179	return 0;
180}
181
182__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc)
183{
184	if (WARN_ON_ONCE(!tbl->it_userspace))
185		return NULL;
186
187	return pnv_tce(tbl, true, index - tbl->it_offset, alloc);
188}
189#endif
190
191void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
192{
193	long i;
194
195	for (i = 0; i < npages; i++) {
196		unsigned long idx = index - tbl->it_offset + i;
197		__be64 *ptce = pnv_tce(tbl, false, idx,	false);
198
199		if (ptce)
200			*ptce = cpu_to_be64(0);
201		else
202			/* Skip the rest of the level */
203			i |= tbl->it_level_size - 1;
204	}
205}
206
207unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
208{
209	__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false);
210
211	if (!ptce)
212		return 0;
213
214	return be64_to_cpu(*ptce);
215}
216
217static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
218		unsigned long size, unsigned int levels)
219{
220	const unsigned long addr_ul = (unsigned long) addr &
221			~(TCE_PCI_READ | TCE_PCI_WRITE);
222
223	if (levels) {
224		long i;
225		u64 *tmp = (u64 *) addr_ul;
226
227		for (i = 0; i < size; ++i) {
228			unsigned long hpa = be64_to_cpu(tmp[i]);
229
230			if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE)))
231				continue;
232
233			pnv_pci_ioda2_table_do_free_pages(__va(hpa), size,
234					levels - 1);
235		}
236	}
237
238	free_pages(addr_ul, get_order(size << 3));
239}
240
241void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
242{
243	const unsigned long size = tbl->it_indirect_levels ?
244			tbl->it_level_size : tbl->it_size;
245
246	if (!tbl->it_size)
247		return;
248
249	pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
250			tbl->it_indirect_levels);
251	if (tbl->it_userspace) {
252		pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
253				tbl->it_indirect_levels);
254	}
255}
256
257static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
258		unsigned int levels, unsigned long limit,
259		unsigned long *current_offset, unsigned long *total_allocated)
260{
261	__be64 *addr, *tmp;
262	unsigned long allocated = 1UL << shift;
263	unsigned int entries = 1UL << (shift - 3);
264	long i;
265
266	addr = pnv_alloc_tce_level(nid, shift);
267	*total_allocated += allocated;
268
269	--levels;
270	if (!levels) {
271		*current_offset += allocated;
272		return addr;
273	}
274
275	for (i = 0; i < entries; ++i) {
276		tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift,
277				levels, limit, current_offset, total_allocated);
278		if (!tmp)
279			break;
280
281		addr[i] = cpu_to_be64(__pa(tmp) |
282				TCE_PCI_READ | TCE_PCI_WRITE);
283
284		if (*current_offset >= limit)
285			break;
286	}
287
288	return addr;
289}
290
291long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
292		__u32 page_shift, __u64 window_size, __u32 levels,
293		bool alloc_userspace_copy, struct iommu_table *tbl)
294{
295	void *addr, *uas = NULL;
296	unsigned long offset = 0, level_shift, total_allocated = 0;
297	unsigned long total_allocated_uas = 0;
298	const unsigned int window_shift = ilog2(window_size);
299	unsigned int entries_shift = window_shift - page_shift;
300	unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
301			PAGE_SHIFT);
302	const unsigned long tce_table_size = 1UL << table_shift;
303
304	if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS))
305		return -EINVAL;
306
307	if (!is_power_of_2(window_size))
308		return -EINVAL;
309
310	/* Adjust direct table size from window_size and levels */
311	entries_shift = (entries_shift + levels - 1) / levels;
312	level_shift = entries_shift + 3;
313	level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT);
314
315	if ((level_shift - 3) * levels + page_shift >= 55)
316		return -EINVAL;
317
318	/* Allocate TCE table */
319	addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
320			1, tce_table_size, &offset, &total_allocated);
321
322	/* addr==NULL means that the first level allocation failed */
323	if (!addr)
324		return -ENOMEM;
325
326	/*
327	 * First level was allocated but some lower level failed as
328	 * we did not allocate as much as we wanted,
329	 * release partially allocated table.
330	 */
331	if (levels == 1 && offset < tce_table_size)
332		goto free_tces_exit;
333
334	/* Allocate userspace view of the TCE table */
335	if (alloc_userspace_copy) {
336		offset = 0;
337		uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
338				1, tce_table_size, &offset,
339				&total_allocated_uas);
340		if (!uas)
341			goto free_tces_exit;
342		if (levels == 1 && (offset < tce_table_size ||
343				total_allocated_uas != total_allocated))
344			goto free_uas_exit;
345	}
346
347	/* Setup linux iommu table */
348	pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset,
349			page_shift);
350	tbl->it_level_size = 1ULL << (level_shift - 3);
351	tbl->it_indirect_levels = levels - 1;
352	tbl->it_userspace = uas;
353	tbl->it_nid = nid;
354
355	pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n",
356			window_size, tce_table_size, bus_offset, tbl->it_base,
357			tbl->it_userspace, 1, levels);
358
359	return 0;
360
361free_uas_exit:
362	pnv_pci_ioda2_table_do_free_pages(uas,
363			1ULL << (level_shift - 3), levels - 1);
364free_tces_exit:
365	pnv_pci_ioda2_table_do_free_pages(addr,
366			1ULL << (level_shift - 3), levels - 1);
367
368	return -ENOMEM;
369}
370
371void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
372		struct iommu_table_group *table_group)
373{
374	long i;
375	bool found;
376	struct iommu_table_group_link *tgl;
377
378	if (!tbl || !table_group)
379		return;
380
381	/* Remove link to a group from table's list of attached groups */
382	found = false;
383
384	rcu_read_lock();
385	list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
386		if (tgl->table_group == table_group) {
387			list_del_rcu(&tgl->next);
388			kfree_rcu(tgl, rcu);
389			found = true;
390			break;
391		}
392	}
393	rcu_read_unlock();
394
395	if (WARN_ON(!found))
396		return;
397
398	/* Clean a pointer to iommu_table in iommu_table_group::tables[] */
399	found = false;
400	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
401		if (table_group->tables[i] == tbl) {
402			iommu_tce_table_put(tbl);
403			table_group->tables[i] = NULL;
404			found = true;
405			break;
406		}
407	}
408	WARN_ON(!found);
409}
410
411long pnv_pci_link_table_and_group(int node, int num,
412		struct iommu_table *tbl,
413		struct iommu_table_group *table_group)
414{
415	struct iommu_table_group_link *tgl = NULL;
416
417	if (WARN_ON(!tbl || !table_group))
418		return -EINVAL;
419
420	tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL,
421			node);
422	if (!tgl)
423		return -ENOMEM;
424
425	tgl->table_group = table_group;
426	list_add_rcu(&tgl->next, &tbl->it_group_list);
427
428	table_group->tables[num] = iommu_tce_table_get(tbl);
429
430	return 0;
431}
432