1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2022 Intel Corporation
4 */
5
6#include "i915_selftest.h"
7
8#include "gem/i915_gem_internal.h"
9#include "gem/i915_gem_lmem.h"
10#include "gem/i915_gem_region.h"
11
12#include "gen8_engine_cs.h"
13#include "i915_gem_ww.h"
14#include "intel_engine_regs.h"
15#include "intel_gpu_commands.h"
16#include "intel_context.h"
17#include "intel_gt.h"
18#include "intel_ring.h"
19
20#include "selftests/igt_flush_test.h"
21#include "selftests/i915_random.h"
22
23static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val)
24{
25	GEM_BUG_ON(addr < i915_vma_offset(vma));
26	GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val));
27	memset64(page_mask_bits(vma->obj->mm.mapping) +
28		 (addr - i915_vma_offset(vma)), val, 1);
29}
30
31static int
32pte_tlbinv(struct intel_context *ce,
33	   struct i915_vma *va,
34	   struct i915_vma *vb,
35	   u64 align,
36	   void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length),
37	   u64 length,
38	   struct rnd_state *prng)
39{
40	const unsigned int pat_index =
41		i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE);
42	struct drm_i915_gem_object *batch;
43	struct drm_mm_node vb_node;
44	struct i915_request *rq;
45	struct i915_vma *vma;
46	u64 addr;
47	int err;
48	u32 *cs;
49
50	batch = i915_gem_object_create_internal(ce->vm->i915, 4096);
51	if (IS_ERR(batch))
52		return PTR_ERR(batch);
53
54	vma = i915_vma_instance(batch, ce->vm, NULL);
55	if (IS_ERR(vma)) {
56		err = PTR_ERR(vma);
57		goto out;
58	}
59
60	err = i915_vma_pin(vma, 0, 0, PIN_USER);
61	if (err)
62		goto out;
63
64	/* Pin va at random but aligned offset after vma */
65	addr = round_up(vma->node.start + vma->node.size, align);
66	/* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */
67	addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)),
68				 va->size, align);
69	err = i915_vma_pin(va,  0, 0, addr | PIN_OFFSET_FIXED | PIN_USER);
70	if (err) {
71		pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
72		goto out;
73	}
74	GEM_BUG_ON(i915_vma_offset(va) != addr);
75	if (vb != va) {
76		vb_node = vb->node;
77		vb->node = va->node; /* overwrites the _same_ PTE  */
78	}
79
80	/*
81	 * Now choose random dword at the 1st pinned page.
82	 *
83	 * SZ_64K pages on dg1 require that the whole PT be marked
84	 * containing 64KiB entries. So we make sure that vma
85	 * covers the whole PT, despite being randomly aligned to 64KiB
86	 * and restrict our sampling to the 2MiB PT within where
87	 * we know that we will be using 64KiB pages.
88	 */
89	if (align == SZ_64K)
90		addr = round_up(addr, SZ_2M);
91	addr = igt_random_offset(prng, addr, addr + align, 8, 8);
92
93	if (va != vb)
94		pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
95			ce->engine->name, va->obj->mm.region->name ?: "smem",
96			addr, align, va->resource->page_sizes_gtt,
97			va->page_sizes.phys, va->page_sizes.sg,
98			addr & -length, length);
99
100	cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
101	*cs++ = MI_NOOP; /* for later termination */
102	/*
103	 * Sample the target to see if we spot the updated backing store.
104	 * Gen8 VCS compares immediate value with bitwise-and of two
105	 * consecutive DWORDS pointed by addr, other gen/engines compare value
106	 * with DWORD pointed by addr. Moreover we want to exercise DWORD size
107	 * invalidations. To fulfill all these requirements below values
108	 * have been chosen.
109	 */
110	*cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
111	*cs++ = 0; /* break if *addr == 0 */
112	*cs++ = lower_32_bits(addr);
113	*cs++ = upper_32_bits(addr);
114	vma_set_qw(va, addr, -1);
115	vma_set_qw(vb, addr, 0);
116
117	/* Keep sampling until we get bored */
118	*cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
119	*cs++ = lower_32_bits(i915_vma_offset(vma));
120	*cs++ = upper_32_bits(i915_vma_offset(vma));
121
122	i915_gem_object_flush_map(batch);
123
124	rq = i915_request_create(ce);
125	if (IS_ERR(rq)) {
126		err = PTR_ERR(rq);
127		goto out_va;
128	}
129
130	err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
131	if (err) {
132		i915_request_add(rq);
133		goto out_va;
134	}
135
136	i915_request_get(rq);
137	i915_request_add(rq);
138
139	/* Short sleep to sanitycheck the batch is spinning before we begin */
140	msleep(10);
141	if (va == vb) {
142		if (!i915_request_completed(rq)) {
143			pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
144			       ce->engine->name, va->obj->mm.region->name ?: "smem",
145			       addr, align, va->resource->page_sizes_gtt,
146			       va->page_sizes.phys, va->page_sizes.sg);
147			err = -EIO;
148		}
149	} else if (!i915_request_completed(rq)) {
150		struct i915_vma_resource vb_res = {
151			.bi.pages = vb->obj->mm.pages,
152			.bi.page_sizes = vb->obj->mm.page_sizes,
153			.start = i915_vma_offset(vb),
154			.vma_size = i915_vma_size(vb)
155		};
156		unsigned int pte_flags = 0;
157
158		/* Flip the PTE between A and B */
159		if (i915_gem_object_is_lmem(vb->obj))
160			pte_flags |= PTE_LM;
161		ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags);
162
163		/* Flush the PTE update to concurrent HW */
164		tlbinv(ce->vm, addr & -length, length);
165
166		if (wait_for(i915_request_completed(rq), HZ / 2)) {
167			pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
168			       ce->engine->name);
169			err = -EINVAL;
170		}
171	} else {
172		pr_err("Spinner ended unexpectedly\n");
173		err = -EIO;
174	}
175	i915_request_put(rq);
176
177	cs = page_mask_bits(batch->mm.mapping);
178	*cs = MI_BATCH_BUFFER_END;
179	wmb();
180
181out_va:
182	if (vb != va)
183		vb->node = vb_node;
184	i915_vma_unpin(va);
185	if (i915_vma_unbind_unlocked(va))
186		err = -EIO;
187out:
188	i915_gem_object_put(batch);
189	return err;
190}
191
192static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
193{
194	struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0];
195	resource_size_t size = SZ_1G;
196
197	/*
198	 * Allocation of largest possible page size allows to test all types
199	 * of pages. To succeed with both allocations, especially in case of Small
200	 * BAR, try to allocate no more than quarter of mappable memory.
201	 */
202	if (mr && size > mr->io_size / 4)
203		size = mr->io_size / 4;
204
205	return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS);
206}
207
208static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
209{
210	/*
211	 * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1).
212	 * While that does not require the whole 2M block to be contiguous
213	 * it is easier to make it so, since we need that for SZ_2M pagees.
214	 * Since we randomly offset the start of the vma, we need a 4M object
215	 * so that there is a 2M range within it is suitable for SZ_64K PTE.
216	 */
217	return i915_gem_object_create_internal(gt->i915, SZ_4M);
218}
219
220static int
221mem_tlbinv(struct intel_gt *gt,
222	   struct drm_i915_gem_object *(*create_fn)(struct intel_gt *),
223	   void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length))
224{
225	unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size;
226	struct intel_engine_cs *engine;
227	struct drm_i915_gem_object *A, *B;
228	struct i915_ppgtt *ppgtt;
229	struct i915_vma *va, *vb;
230	enum intel_engine_id id;
231	I915_RND_STATE(prng);
232	void *vaddr;
233	int err;
234
235	/*
236	 * Check that the TLB invalidate is able to revoke an active
237	 * page. We load a page into a spinning COND_BBE loop and then
238	 * remap that page to a new physical address. The old address, and
239	 * so the loop keeps spinning, is retained in the TLB cache until
240	 * we issue an invalidate.
241	 */
242
243	A = create_fn(gt);
244	if (IS_ERR(A))
245		return PTR_ERR(A);
246
247	vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC);
248	if (IS_ERR(vaddr)) {
249		err = PTR_ERR(vaddr);
250		goto out_a;
251	}
252
253	B = create_fn(gt);
254	if (IS_ERR(B)) {
255		err = PTR_ERR(B);
256		goto out_a;
257	}
258
259	vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC);
260	if (IS_ERR(vaddr)) {
261		err = PTR_ERR(vaddr);
262		goto out_b;
263	}
264
265	GEM_BUG_ON(A->base.size != B->base.size);
266	if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1))
267		pr_warn("Failed to allocate contiguous pages for size %zx\n",
268			A->base.size);
269
270	ppgtt = i915_ppgtt_create(gt, 0);
271	if (IS_ERR(ppgtt)) {
272		err = PTR_ERR(ppgtt);
273		goto out_b;
274	}
275
276	va = i915_vma_instance(A, &ppgtt->vm, NULL);
277	if (IS_ERR(va)) {
278		err = PTR_ERR(va);
279		goto out_vm;
280	}
281
282	vb = i915_vma_instance(B, &ppgtt->vm, NULL);
283	if (IS_ERR(vb)) {
284		err = PTR_ERR(vb);
285		goto out_vm;
286	}
287
288	err = 0;
289	for_each_engine(engine, gt, id) {
290		struct i915_gem_ww_ctx ww;
291		struct intel_context *ce;
292		int bit;
293
294		ce = intel_context_create(engine);
295		if (IS_ERR(ce)) {
296			err = PTR_ERR(ce);
297			break;
298		}
299
300		i915_vm_put(ce->vm);
301		ce->vm = i915_vm_get(&ppgtt->vm);
302
303		for_i915_gem_ww(&ww, err, true)
304			err = intel_context_pin_ww(ce, &ww);
305		if (err)
306			goto err_put;
307
308		for_each_set_bit(bit,
309				 (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes,
310				 BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) {
311			unsigned int len;
312
313			if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj))
314				continue;
315
316			/* sanitycheck the semaphore wake up */
317			err = pte_tlbinv(ce, va, va,
318					 BIT_ULL(bit),
319					 NULL, SZ_4K,
320					 &prng);
321			if (err)
322				goto err_unpin;
323
324			for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) {
325				err = pte_tlbinv(ce, va, vb,
326						 BIT_ULL(bit),
327						 tlbinv,
328						 BIT_ULL(len),
329						 &prng);
330				if (err)
331					goto err_unpin;
332				if (len == ppgtt_size)
333					break;
334			}
335		}
336err_unpin:
337		intel_context_unpin(ce);
338err_put:
339		intel_context_put(ce);
340		if (err)
341			break;
342	}
343
344	if (igt_flush_test(gt->i915))
345		err = -EIO;
346
347out_vm:
348	i915_vm_put(&ppgtt->vm);
349out_b:
350	i915_gem_object_put(B);
351out_a:
352	i915_gem_object_put(A);
353	return err;
354}
355
356static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
357{
358	intel_gt_invalidate_tlb_full(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
359}
360
361static int invalidate_full(void *arg)
362{
363	struct intel_gt *gt = arg;
364	int err;
365
366	if (GRAPHICS_VER(gt->i915) < 8)
367		return 0; /* TLB invalidate not implemented */
368
369	err = mem_tlbinv(gt, create_smem, tlbinv_full);
370	if (err == 0)
371		err = mem_tlbinv(gt, create_lmem, tlbinv_full);
372	if (err == -ENODEV || err == -ENXIO)
373		err = 0;
374
375	return err;
376}
377
378int intel_tlb_live_selftests(struct drm_i915_private *i915)
379{
380	static const struct i915_subtest tests[] = {
381		SUBTEST(invalidate_full),
382	};
383	struct intel_gt *gt;
384	unsigned int i;
385
386	for_each_gt(gt, i915, i) {
387		int err;
388
389		if (intel_gt_is_wedged(gt))
390			continue;
391
392		err = intel_gt_live_subtests(tests, gt);
393		if (err)
394			return err;
395	}
396
397	return 0;
398}
399