1/*
2 * Copyright 2018 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22#include "nouveau_svm.h"
23#include "nouveau_drv.h"
24#include "nouveau_chan.h"
25#include "nouveau_dmem.h"
26
27#include <nvif/notify.h>
28#include <nvif/object.h>
29#include <nvif/vmm.h>
30
31#include <nvif/class.h>
32#include <nvif/clb069.h>
33#include <nvif/ifc00d.h>
34
35#include <linux/sched/mm.h>
36#include <linux/sort.h>
37#include <linux/hmm.h>
38
39struct nouveau_svm {
40	struct nouveau_drm *drm;
41	struct mutex mutex;
42	struct list_head inst;
43
44	struct nouveau_svm_fault_buffer {
45		int id;
46		struct nvif_object object;
47		u32 entries;
48		u32 getaddr;
49		u32 putaddr;
50		u32 get;
51		u32 put;
52		struct nvif_notify notify;
53
54		struct nouveau_svm_fault {
55			u64 inst;
56			u64 addr;
57			u64 time;
58			u32 engine;
59			u8  gpc;
60			u8  hub;
61			u8  access;
62			u8  client;
63			u8  fault;
64			struct nouveau_svmm *svmm;
65		} **fault;
66		int fault_nr;
67	} buffer[1];
68};
69
70#define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a)
71#define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a)
72
73struct nouveau_pfnmap_args {
74	struct nvif_ioctl_v0 i;
75	struct nvif_ioctl_mthd_v0 m;
76	struct nvif_vmm_pfnmap_v0 p;
77};
78
79struct nouveau_ivmm {
80	struct nouveau_svmm *svmm;
81	u64 inst;
82	struct list_head head;
83};
84
85static struct nouveau_ivmm *
86nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
87{
88	struct nouveau_ivmm *ivmm;
89	list_for_each_entry(ivmm, &svm->inst, head) {
90		if (ivmm->inst == inst)
91			return ivmm;
92	}
93	return NULL;
94}
95
96#define SVMM_DBG(s,f,a...)                                                     \
97	NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
98#define SVMM_ERR(s,f,a...)                                                     \
99	NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
100
101int
102nouveau_svmm_bind(struct drm_device *dev, void *data,
103		  struct drm_file *file_priv)
104{
105	struct nouveau_cli *cli = nouveau_cli(file_priv);
106	struct drm_nouveau_svm_bind *args = data;
107	unsigned target, cmd, priority;
108	unsigned long addr, end;
109	struct mm_struct *mm;
110
111	args->va_start &= PAGE_MASK;
112	args->va_end = ALIGN(args->va_end, PAGE_SIZE);
113
114	/* Sanity check arguments */
115	if (args->reserved0 || args->reserved1)
116		return -EINVAL;
117	if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK))
118		return -EINVAL;
119	if (args->va_start >= args->va_end)
120		return -EINVAL;
121
122	cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT;
123	cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK;
124	switch (cmd) {
125	case NOUVEAU_SVM_BIND_COMMAND__MIGRATE:
126		break;
127	default:
128		return -EINVAL;
129	}
130
131	priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT;
132	priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK;
133
134	/* FIXME support CPU target ie all target value < GPU_VRAM */
135	target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT;
136	target &= NOUVEAU_SVM_BIND_TARGET_MASK;
137	switch (target) {
138	case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM:
139		break;
140	default:
141		return -EINVAL;
142	}
143
144	/*
145	 * FIXME: For now refuse non 0 stride, we need to change the migrate
146	 * kernel function to handle stride to avoid to create a mess within
147	 * each device driver.
148	 */
149	if (args->stride)
150		return -EINVAL;
151
152	/*
153	 * Ok we are ask to do something sane, for now we only support migrate
154	 * commands but we will add things like memory policy (what to do on
155	 * page fault) and maybe some other commands.
156	 */
157
158	mm = get_task_mm(current);
159	if (!mm) {
160		return -EINVAL;
161	}
162	mmap_read_lock(mm);
163
164	if (!cli->svm.svmm) {
165		mmap_read_unlock(mm);
166		mmput(mm);
167		return -EINVAL;
168	}
169
170	for (addr = args->va_start, end = args->va_end; addr < end;) {
171		struct vm_area_struct *vma;
172		unsigned long next;
173
174		vma = find_vma_intersection(mm, addr, end);
175		if (!vma)
176			break;
177
178		addr = max(addr, vma->vm_start);
179		next = min(vma->vm_end, end);
180		/* This is a best effort so we ignore errors */
181		nouveau_dmem_migrate_vma(cli->drm, cli->svm.svmm, vma, addr,
182					 next);
183		addr = next;
184	}
185
186	/*
187	 * FIXME Return the number of page we have migrated, again we need to
188	 * update the migrate API to return that information so that we can
189	 * report it to user space.
190	 */
191	args->result = 0;
192
193	mmap_read_unlock(mm);
194	mmput(mm);
195
196	return 0;
197}
198
199/* Unlink channel instance from SVMM. */
200void
201nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst)
202{
203	struct nouveau_ivmm *ivmm;
204	if (svmm) {
205		mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
206		ivmm = nouveau_ivmm_find(svmm->vmm->cli->drm->svm, inst);
207		if (ivmm) {
208			list_del(&ivmm->head);
209			kfree(ivmm);
210		}
211		mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
212	}
213}
214
215/* Link channel instance to SVMM. */
216int
217nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
218{
219	struct nouveau_ivmm *ivmm;
220	if (svmm) {
221		if (!(ivmm = kmalloc(sizeof(*ivmm), GFP_KERNEL)))
222			return -ENOMEM;
223		ivmm->svmm = svmm;
224		ivmm->inst = inst;
225
226		mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
227		list_add(&ivmm->head, &svmm->vmm->cli->drm->svm->inst);
228		mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
229	}
230	return 0;
231}
232
233/* Invalidate SVMM address-range on GPU. */
234void
235nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
236{
237	if (limit > start) {
238		bool super = svmm->vmm->vmm.object.client->super;
239		svmm->vmm->vmm.object.client->super = true;
240		nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR,
241				 &(struct nvif_vmm_pfnclr_v0) {
242					.addr = start,
243					.size = limit - start,
244				 }, sizeof(struct nvif_vmm_pfnclr_v0));
245		svmm->vmm->vmm.object.client->super = super;
246	}
247}
248
249static int
250nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
251				    const struct mmu_notifier_range *update)
252{
253	struct nouveau_svmm *svmm =
254		container_of(mn, struct nouveau_svmm, notifier);
255	unsigned long start = update->start;
256	unsigned long limit = update->end;
257
258	if (!mmu_notifier_range_blockable(update))
259		return -EAGAIN;
260
261	SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
262
263	mutex_lock(&svmm->mutex);
264	if (unlikely(!svmm->vmm))
265		goto out;
266
267	/*
268	 * Ignore invalidation callbacks for device private pages since
269	 * the invalidation is handled as part of the migration process.
270	 */
271	if (update->event == MMU_NOTIFY_MIGRATE &&
272	    update->migrate_pgmap_owner == svmm->vmm->cli->drm->dev)
273		goto out;
274
275	if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
276		if (start < svmm->unmanaged.start) {
277			nouveau_svmm_invalidate(svmm, start,
278						svmm->unmanaged.limit);
279		}
280		start = svmm->unmanaged.limit;
281	}
282
283	nouveau_svmm_invalidate(svmm, start, limit);
284
285out:
286	mutex_unlock(&svmm->mutex);
287	return 0;
288}
289
290static void nouveau_svmm_free_notifier(struct mmu_notifier *mn)
291{
292	kfree(container_of(mn, struct nouveau_svmm, notifier));
293}
294
295static const struct mmu_notifier_ops nouveau_mn_ops = {
296	.invalidate_range_start = nouveau_svmm_invalidate_range_start,
297	.free_notifier = nouveau_svmm_free_notifier,
298};
299
300void
301nouveau_svmm_fini(struct nouveau_svmm **psvmm)
302{
303	struct nouveau_svmm *svmm = *psvmm;
304	if (svmm) {
305		mutex_lock(&svmm->mutex);
306		svmm->vmm = NULL;
307		mutex_unlock(&svmm->mutex);
308		mmu_notifier_put(&svmm->notifier);
309		*psvmm = NULL;
310	}
311}
312
313int
314nouveau_svmm_init(struct drm_device *dev, void *data,
315		  struct drm_file *file_priv)
316{
317	struct nouveau_cli *cli = nouveau_cli(file_priv);
318	struct nouveau_svmm *svmm;
319	struct drm_nouveau_svm_init *args = data;
320	int ret;
321
322	/* We need to fail if svm is disabled */
323	if (!cli->drm->svm)
324		return -ENOSYS;
325
326	/* Allocate tracking for SVM-enabled VMM. */
327	if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL)))
328		return -ENOMEM;
329	svmm->vmm = &cli->svm;
330	svmm->unmanaged.start = args->unmanaged_addr;
331	svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size;
332	mutex_init(&svmm->mutex);
333
334	/* Check that SVM isn't already enabled for the client. */
335	mutex_lock(&cli->mutex);
336	if (cli->svm.cli) {
337		ret = -EBUSY;
338		goto out_free;
339	}
340
341	/* Allocate a new GPU VMM that can support SVM (managed by the
342	 * client, with replayable faults enabled).
343	 *
344	 * All future channel/memory allocations will make use of this
345	 * VMM instead of the standard one.
346	 */
347	ret = nvif_vmm_ctor(&cli->mmu, "svmVmm",
348			    cli->vmm.vmm.object.oclass, true,
349			    args->unmanaged_addr, args->unmanaged_size,
350			    &(struct gp100_vmm_v0) {
351				.fault_replay = true,
352			    }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
353	if (ret)
354		goto out_free;
355
356	mmap_write_lock(current->mm);
357	svmm->notifier.ops = &nouveau_mn_ops;
358	ret = __mmu_notifier_register(&svmm->notifier, current->mm);
359	if (ret)
360		goto out_mm_unlock;
361	/* Note, ownership of svmm transfers to mmu_notifier */
362
363	cli->svm.svmm = svmm;
364	cli->svm.cli = cli;
365	mmap_write_unlock(current->mm);
366	mutex_unlock(&cli->mutex);
367	return 0;
368
369out_mm_unlock:
370	mmap_write_unlock(current->mm);
371out_free:
372	mutex_unlock(&cli->mutex);
373	kfree(svmm);
374	return ret;
375}
376
377/* Issue fault replay for GPU to retry accesses that faulted previously. */
378static void
379nouveau_svm_fault_replay(struct nouveau_svm *svm)
380{
381	SVM_DBG(svm, "replay");
382	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
383				 GP100_VMM_VN_FAULT_REPLAY,
384				 &(struct gp100_vmm_fault_replay_vn) {},
385				 sizeof(struct gp100_vmm_fault_replay_vn)));
386}
387
388/* Cancel a replayable fault that could not be handled.
389 *
390 * Cancelling the fault will trigger recovery to reset the engine
391 * and kill the offending channel (ie. GPU SIGSEGV).
392 */
393static void
394nouveau_svm_fault_cancel(struct nouveau_svm *svm,
395			 u64 inst, u8 hub, u8 gpc, u8 client)
396{
397	SVM_DBG(svm, "cancel %016llx %d %02x %02x", inst, hub, gpc, client);
398	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
399				 GP100_VMM_VN_FAULT_CANCEL,
400				 &(struct gp100_vmm_fault_cancel_v0) {
401					.hub = hub,
402					.gpc = gpc,
403					.client = client,
404					.inst = inst,
405				 }, sizeof(struct gp100_vmm_fault_cancel_v0)));
406}
407
408static void
409nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm,
410			       struct nouveau_svm_fault *fault)
411{
412	nouveau_svm_fault_cancel(svm, fault->inst,
413				      fault->hub,
414				      fault->gpc,
415				      fault->client);
416}
417
418static int
419nouveau_svm_fault_cmp(const void *a, const void *b)
420{
421	const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a;
422	const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b;
423	int ret;
424	if ((ret = (s64)fa->inst - fb->inst))
425		return ret;
426	if ((ret = (s64)fa->addr - fb->addr))
427		return ret;
428	/*XXX: atomic? */
429	return (fa->access == 0 || fa->access == 3) -
430	       (fb->access == 0 || fb->access == 3);
431}
432
433static void
434nouveau_svm_fault_cache(struct nouveau_svm *svm,
435			struct nouveau_svm_fault_buffer *buffer, u32 offset)
436{
437	struct nvif_object *memory = &buffer->object;
438	const u32 instlo = nvif_rd32(memory, offset + 0x00);
439	const u32 insthi = nvif_rd32(memory, offset + 0x04);
440	const u32 addrlo = nvif_rd32(memory, offset + 0x08);
441	const u32 addrhi = nvif_rd32(memory, offset + 0x0c);
442	const u32 timelo = nvif_rd32(memory, offset + 0x10);
443	const u32 timehi = nvif_rd32(memory, offset + 0x14);
444	const u32 engine = nvif_rd32(memory, offset + 0x18);
445	const u32   info = nvif_rd32(memory, offset + 0x1c);
446	const u64   inst = (u64)insthi << 32 | instlo;
447	const u8     gpc = (info & 0x1f000000) >> 24;
448	const u8     hub = (info & 0x00100000) >> 20;
449	const u8  client = (info & 0x00007f00) >> 8;
450	struct nouveau_svm_fault *fault;
451
452	//XXX: i think we're supposed to spin waiting */
453	if (WARN_ON(!(info & 0x80000000)))
454		return;
455
456	nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000);
457
458	if (!buffer->fault[buffer->fault_nr]) {
459		fault = kmalloc(sizeof(*fault), GFP_KERNEL);
460		if (WARN_ON(!fault)) {
461			nouveau_svm_fault_cancel(svm, inst, hub, gpc, client);
462			return;
463		}
464		buffer->fault[buffer->fault_nr] = fault;
465	}
466
467	fault = buffer->fault[buffer->fault_nr++];
468	fault->inst   = inst;
469	fault->addr   = (u64)addrhi << 32 | addrlo;
470	fault->time   = (u64)timehi << 32 | timelo;
471	fault->engine = engine;
472	fault->gpc    = gpc;
473	fault->hub    = hub;
474	fault->access = (info & 0x000f0000) >> 16;
475	fault->client = client;
476	fault->fault  = (info & 0x0000001f);
477
478	SVM_DBG(svm, "fault %016llx %016llx %02x",
479		fault->inst, fault->addr, fault->access);
480}
481
482struct svm_notifier {
483	struct mmu_interval_notifier notifier;
484	struct nouveau_svmm *svmm;
485};
486
487static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
488					 const struct mmu_notifier_range *range,
489					 unsigned long cur_seq)
490{
491	struct svm_notifier *sn =
492		container_of(mni, struct svm_notifier, notifier);
493
494	/*
495	 * serializes the update to mni->invalidate_seq done by caller and
496	 * prevents invalidation of the PTE from progressing while HW is being
497	 * programmed. This is very hacky and only works because the normal
498	 * notifier that does invalidation is always called after the range
499	 * notifier.
500	 */
501	if (mmu_notifier_range_blockable(range))
502		mutex_lock(&sn->svmm->mutex);
503	else if (!mutex_trylock(&sn->svmm->mutex))
504		return false;
505	mmu_interval_set_seq(mni, cur_seq);
506	mutex_unlock(&sn->svmm->mutex);
507	return true;
508}
509
510static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
511	.invalidate = nouveau_svm_range_invalidate,
512};
513
514static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
515				    struct hmm_range *range,
516				    struct nouveau_pfnmap_args *args)
517{
518	struct page *page;
519
520	/*
521	 * The address prepared here is passed through nvif_object_ioctl()
522	 * to an eventual DMA map in something like gp100_vmm_pgt_pfn()
523	 *
524	 * This is all just encoding the internal hmm representation into a
525	 * different nouveau internal representation.
526	 */
527	if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) {
528		args->p.phys[0] = 0;
529		return;
530	}
531
532	page = hmm_pfn_to_page(range->hmm_pfns[0]);
533	/*
534	 * Only map compound pages to the GPU if the CPU is also mapping the
535	 * page as a compound page. Otherwise, the PTE protections might not be
536	 * consistent (e.g., CPU only maps part of a compound page).
537	 * Note that the underlying page might still be larger than the
538	 * CPU mapping (e.g., a PUD sized compound page partially mapped with
539	 * a PMD sized page table entry).
540	 */
541	if (hmm_pfn_to_map_order(range->hmm_pfns[0])) {
542		unsigned long addr = args->p.addr;
543
544		args->p.page = hmm_pfn_to_map_order(range->hmm_pfns[0]) +
545				PAGE_SHIFT;
546		args->p.size = 1UL << args->p.page;
547		args->p.addr &= ~(args->p.size - 1);
548		page -= (addr - args->p.addr) >> PAGE_SHIFT;
549	}
550	if (is_device_private_page(page))
551		args->p.phys[0] = nouveau_dmem_page_addr(page) |
552				NVIF_VMM_PFNMAP_V0_V |
553				NVIF_VMM_PFNMAP_V0_VRAM;
554	else
555		args->p.phys[0] = page_to_phys(page) |
556				NVIF_VMM_PFNMAP_V0_V |
557				NVIF_VMM_PFNMAP_V0_HOST;
558	if (range->hmm_pfns[0] & HMM_PFN_WRITE)
559		args->p.phys[0] |= NVIF_VMM_PFNMAP_V0_W;
560}
561
562static int nouveau_range_fault(struct nouveau_svmm *svmm,
563			       struct nouveau_drm *drm,
564			       struct nouveau_pfnmap_args *args, u32 size,
565			       unsigned long hmm_flags,
566			       struct svm_notifier *notifier)
567{
568	unsigned long timeout =
569		jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
570	/* Have HMM fault pages within the fault window to the GPU. */
571	unsigned long hmm_pfns[1];
572	struct hmm_range range = {
573		.notifier = &notifier->notifier,
574		.start = notifier->notifier.interval_tree.start,
575		.end = notifier->notifier.interval_tree.last + 1,
576		.default_flags = hmm_flags,
577		.hmm_pfns = hmm_pfns,
578		.dev_private_owner = drm->dev,
579	};
580	struct mm_struct *mm = notifier->notifier.mm;
581	int ret;
582
583	while (true) {
584		if (time_after(jiffies, timeout))
585			return -EBUSY;
586
587		range.notifier_seq = mmu_interval_read_begin(range.notifier);
588		mmap_read_lock(mm);
589		ret = hmm_range_fault(&range);
590		mmap_read_unlock(mm);
591		if (ret) {
592			if (ret == -EBUSY)
593				continue;
594			return ret;
595		}
596
597		mutex_lock(&svmm->mutex);
598		if (mmu_interval_read_retry(range.notifier,
599					    range.notifier_seq)) {
600			mutex_unlock(&svmm->mutex);
601			continue;
602		}
603		break;
604	}
605
606	nouveau_hmm_convert_pfn(drm, &range, args);
607
608	svmm->vmm->vmm.object.client->super = true;
609	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
610	svmm->vmm->vmm.object.client->super = false;
611	mutex_unlock(&svmm->mutex);
612
613	return ret;
614}
615
616static int
617nouveau_svm_fault(struct nvif_notify *notify)
618{
619	struct nouveau_svm_fault_buffer *buffer =
620		container_of(notify, typeof(*buffer), notify);
621	struct nouveau_svm *svm =
622		container_of(buffer, typeof(*svm), buffer[buffer->id]);
623	struct nvif_object *device = &svm->drm->client.device.object;
624	struct nouveau_svmm *svmm;
625	struct {
626		struct nouveau_pfnmap_args i;
627		u64 phys[1];
628	} args;
629	unsigned long hmm_flags;
630	u64 inst, start, limit;
631	int fi, fn;
632	int replay = 0, ret;
633
634	/* Parse available fault buffer entries into a cache, and update
635	 * the GET pointer so HW can reuse the entries.
636	 */
637	SVM_DBG(svm, "fault handler");
638	if (buffer->get == buffer->put) {
639		buffer->put = nvif_rd32(device, buffer->putaddr);
640		buffer->get = nvif_rd32(device, buffer->getaddr);
641		if (buffer->get == buffer->put)
642			return NVIF_NOTIFY_KEEP;
643	}
644	buffer->fault_nr = 0;
645
646	SVM_DBG(svm, "get %08x put %08x", buffer->get, buffer->put);
647	while (buffer->get != buffer->put) {
648		nouveau_svm_fault_cache(svm, buffer, buffer->get * 0x20);
649		if (++buffer->get == buffer->entries)
650			buffer->get = 0;
651	}
652	nvif_wr32(device, buffer->getaddr, buffer->get);
653	SVM_DBG(svm, "%d fault(s) pending", buffer->fault_nr);
654
655	/* Sort parsed faults by instance pointer to prevent unnecessary
656	 * instance to SVMM translations, followed by address and access
657	 * type to reduce the amount of work when handling the faults.
658	 */
659	sort(buffer->fault, buffer->fault_nr, sizeof(*buffer->fault),
660	     nouveau_svm_fault_cmp, NULL);
661
662	/* Lookup SVMM structure for each unique instance pointer. */
663	mutex_lock(&svm->mutex);
664	for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) {
665		if (!svmm || buffer->fault[fi]->inst != inst) {
666			struct nouveau_ivmm *ivmm =
667				nouveau_ivmm_find(svm, buffer->fault[fi]->inst);
668			svmm = ivmm ? ivmm->svmm : NULL;
669			inst = buffer->fault[fi]->inst;
670			SVM_DBG(svm, "inst %016llx -> svm-%p", inst, svmm);
671		}
672		buffer->fault[fi]->svmm = svmm;
673	}
674	mutex_unlock(&svm->mutex);
675
676	/* Process list of faults. */
677	args.i.i.version = 0;
678	args.i.i.type = NVIF_IOCTL_V0_MTHD;
679	args.i.m.version = 0;
680	args.i.m.method = NVIF_VMM_V0_PFNMAP;
681	args.i.p.version = 0;
682
683	for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
684		struct svm_notifier notifier;
685		struct mm_struct *mm;
686
687		/* Cancel any faults from non-SVM channels. */
688		if (!(svmm = buffer->fault[fi]->svmm)) {
689			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
690			continue;
691		}
692		SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
693
694		/* We try and group handling of faults within a small
695		 * window into a single update.
696		 */
697		start = buffer->fault[fi]->addr;
698		limit = start + PAGE_SIZE;
699		if (start < svmm->unmanaged.limit)
700			limit = min_t(u64, limit, svmm->unmanaged.start);
701
702		/*
703		 * Prepare the GPU-side update of all pages within the
704		 * fault window, determining required pages and access
705		 * permissions based on pending faults.
706		 */
707		args.i.p.addr = start;
708		args.i.p.page = PAGE_SHIFT;
709		args.i.p.size = PAGE_SIZE;
710		/*
711		 * Determine required permissions based on GPU fault
712		 * access flags.
713		 * XXX: atomic?
714		 */
715		switch (buffer->fault[fi]->access) {
716		case 0: /* READ. */
717			hmm_flags = HMM_PFN_REQ_FAULT;
718			break;
719		case 3: /* PREFETCH. */
720			hmm_flags = 0;
721			break;
722		default:
723			hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE;
724			break;
725		}
726
727		mm = svmm->notifier.mm;
728		if (!mmget_not_zero(mm)) {
729			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
730			continue;
731		}
732
733		notifier.svmm = svmm;
734		ret = mmu_interval_notifier_insert(&notifier.notifier, mm,
735						   args.i.p.addr, args.i.p.size,
736						   &nouveau_svm_mni_ops);
737		if (!ret) {
738			ret = nouveau_range_fault(svmm, svm->drm, &args.i,
739				sizeof(args), hmm_flags, &notifier);
740			mmu_interval_notifier_remove(&notifier.notifier);
741		}
742		mmput(mm);
743
744		limit = args.i.p.addr + args.i.p.size;
745		for (fn = fi; ++fn < buffer->fault_nr; ) {
746			/* It's okay to skip over duplicate addresses from the
747			 * same SVMM as faults are ordered by access type such
748			 * that only the first one needs to be handled.
749			 *
750			 * ie. WRITE faults appear first, thus any handling of
751			 * pending READ faults will already be satisfied.
752			 * But if a large page is mapped, make sure subsequent
753			 * fault addresses have sufficient access permission.
754			 */
755			if (buffer->fault[fn]->svmm != svmm ||
756			    buffer->fault[fn]->addr >= limit ||
757			    (buffer->fault[fi]->access == 0 /* READ. */ &&
758			     !(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) ||
759			    (buffer->fault[fi]->access != 0 /* READ. */ &&
760			     buffer->fault[fi]->access != 3 /* PREFETCH. */ &&
761			     !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)))
762				break;
763		}
764
765		/* If handling failed completely, cancel all faults. */
766		if (ret) {
767			while (fi < fn) {
768				struct nouveau_svm_fault *fault =
769					buffer->fault[fi++];
770
771				nouveau_svm_fault_cancel_fault(svm, fault);
772			}
773		} else
774			replay++;
775	}
776
777	/* Issue fault replay to the GPU. */
778	if (replay)
779		nouveau_svm_fault_replay(svm);
780	return NVIF_NOTIFY_KEEP;
781}
782
783static struct nouveau_pfnmap_args *
784nouveau_pfns_to_args(void *pfns)
785{
786	return container_of(pfns, struct nouveau_pfnmap_args, p.phys);
787}
788
789u64 *
790nouveau_pfns_alloc(unsigned long npages)
791{
792	struct nouveau_pfnmap_args *args;
793
794	args = kzalloc(struct_size(args, p.phys, npages), GFP_KERNEL);
795	if (!args)
796		return NULL;
797
798	args->i.type = NVIF_IOCTL_V0_MTHD;
799	args->m.method = NVIF_VMM_V0_PFNMAP;
800	args->p.page = PAGE_SHIFT;
801
802	return args->p.phys;
803}
804
805void
806nouveau_pfns_free(u64 *pfns)
807{
808	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
809
810	kfree(args);
811}
812
813void
814nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm,
815		 unsigned long addr, u64 *pfns, unsigned long npages)
816{
817	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
818	int ret;
819
820	args->p.addr = addr;
821	args->p.size = npages << PAGE_SHIFT;
822
823	mutex_lock(&svmm->mutex);
824
825	svmm->vmm->vmm.object.client->super = true;
826	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, sizeof(*args) +
827				npages * sizeof(args->p.phys[0]), NULL);
828	svmm->vmm->vmm.object.client->super = false;
829
830	mutex_unlock(&svmm->mutex);
831}
832
833static void
834nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id)
835{
836	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
837	nvif_notify_put(&buffer->notify);
838}
839
840static int
841nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id)
842{
843	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
844	struct nvif_object *device = &svm->drm->client.device.object;
845	buffer->get = nvif_rd32(device, buffer->getaddr);
846	buffer->put = nvif_rd32(device, buffer->putaddr);
847	SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put);
848	return nvif_notify_get(&buffer->notify);
849}
850
851static void
852nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id)
853{
854	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
855	int i;
856
857	if (buffer->fault) {
858		for (i = 0; buffer->fault[i] && i < buffer->entries; i++)
859			kfree(buffer->fault[i]);
860		kvfree(buffer->fault);
861	}
862
863	nouveau_svm_fault_buffer_fini(svm, id);
864
865	nvif_notify_dtor(&buffer->notify);
866	nvif_object_dtor(&buffer->object);
867}
868
869static int
870nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
871{
872	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
873	struct nouveau_drm *drm = svm->drm;
874	struct nvif_object *device = &drm->client.device.object;
875	struct nvif_clb069_v0 args = {};
876	int ret;
877
878	buffer->id = id;
879
880	ret = nvif_object_ctor(device, "svmFaultBuffer", 0, oclass, &args,
881			       sizeof(args), &buffer->object);
882	if (ret < 0) {
883		SVM_ERR(svm, "Fault buffer allocation failed: %d", ret);
884		return ret;
885	}
886
887	nvif_object_map(&buffer->object, NULL, 0);
888	buffer->entries = args.entries;
889	buffer->getaddr = args.get;
890	buffer->putaddr = args.put;
891
892	ret = nvif_notify_ctor(&buffer->object, "svmFault", nouveau_svm_fault,
893			       true, NVB069_V0_NTFY_FAULT, NULL, 0, 0,
894			       &buffer->notify);
895	if (ret)
896		return ret;
897
898	buffer->fault = kvzalloc(sizeof(*buffer->fault) * buffer->entries, GFP_KERNEL);
899	if (!buffer->fault)
900		return -ENOMEM;
901
902	return nouveau_svm_fault_buffer_init(svm, id);
903}
904
905void
906nouveau_svm_resume(struct nouveau_drm *drm)
907{
908	struct nouveau_svm *svm = drm->svm;
909	if (svm)
910		nouveau_svm_fault_buffer_init(svm, 0);
911}
912
913void
914nouveau_svm_suspend(struct nouveau_drm *drm)
915{
916	struct nouveau_svm *svm = drm->svm;
917	if (svm)
918		nouveau_svm_fault_buffer_fini(svm, 0);
919}
920
921void
922nouveau_svm_fini(struct nouveau_drm *drm)
923{
924	struct nouveau_svm *svm = drm->svm;
925	if (svm) {
926		nouveau_svm_fault_buffer_dtor(svm, 0);
927		kfree(drm->svm);
928		drm->svm = NULL;
929	}
930}
931
932void
933nouveau_svm_init(struct nouveau_drm *drm)
934{
935	static const struct nvif_mclass buffers[] = {
936		{   VOLTA_FAULT_BUFFER_A, 0 },
937		{ MAXWELL_FAULT_BUFFER_A, 0 },
938		{}
939	};
940	struct nouveau_svm *svm;
941	int ret;
942
943	/* Disable on Volta and newer until channel recovery is fixed,
944	 * otherwise clients will have a trivial way to trash the GPU
945	 * for everyone.
946	 */
947	if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL)
948		return;
949
950	if (!(drm->svm = svm = kzalloc(sizeof(*drm->svm), GFP_KERNEL)))
951		return;
952
953	drm->svm->drm = drm;
954	mutex_init(&drm->svm->mutex);
955	INIT_LIST_HEAD(&drm->svm->inst);
956
957	ret = nvif_mclass(&drm->client.device.object, buffers);
958	if (ret < 0) {
959		SVM_DBG(svm, "No supported fault buffer class");
960		nouveau_svm_fini(drm);
961		return;
962	}
963
964	ret = nouveau_svm_fault_buffer_ctor(svm, buffers[ret].oclass, 0);
965	if (ret) {
966		nouveau_svm_fini(drm);
967		return;
968	}
969
970	SVM_DBG(svm, "Initialised");
971}
972