1/*
2 * SPDX-License-Identifier: MIT
3 *
4 * Copyright © 2017-2018 Intel Corporation
5 */
6
7#include <linux/prime_numbers.h>
8
9#include "intel_context.h"
10#include "intel_engine_heartbeat.h"
11#include "intel_engine_pm.h"
12#include "intel_gt.h"
13#include "intel_gt_requests.h"
14#include "intel_ring.h"
15#include "selftest_engine_heartbeat.h"
16
17#include "../selftests/i915_random.h"
18#include "../i915_selftest.h"
19
20#include "../selftests/igt_flush_test.h"
21#include "../selftests/mock_gem_device.h"
22#include "selftests/mock_timeline.h"
23
24static struct page *hwsp_page(struct intel_timeline *tl)
25{
26	struct drm_i915_gem_object *obj = tl->hwsp_ggtt->obj;
27
28	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
29	return sg_page(obj->mm.pages->sgl);
30}
31
32static unsigned long hwsp_cacheline(struct intel_timeline *tl)
33{
34	unsigned long address = (unsigned long)page_address(hwsp_page(tl));
35
36	return (address + tl->hwsp_offset) / CACHELINE_BYTES;
37}
38
39#define CACHELINES_PER_PAGE (PAGE_SIZE / CACHELINE_BYTES)
40
41struct mock_hwsp_freelist {
42	struct intel_gt *gt;
43	struct radix_tree_root cachelines;
44	struct intel_timeline **history;
45	unsigned long count, max;
46	struct rnd_state prng;
47};
48
49enum {
50	SHUFFLE = BIT(0),
51};
52
53static void __mock_hwsp_record(struct mock_hwsp_freelist *state,
54			       unsigned int idx,
55			       struct intel_timeline *tl)
56{
57	tl = xchg(&state->history[idx], tl);
58	if (tl) {
59		radix_tree_delete(&state->cachelines, hwsp_cacheline(tl));
60		intel_timeline_put(tl);
61	}
62}
63
64static int __mock_hwsp_timeline(struct mock_hwsp_freelist *state,
65				unsigned int count,
66				unsigned int flags)
67{
68	struct intel_timeline *tl;
69	unsigned int idx;
70
71	while (count--) {
72		unsigned long cacheline;
73		int err;
74
75		tl = intel_timeline_create(state->gt);
76		if (IS_ERR(tl))
77			return PTR_ERR(tl);
78
79		cacheline = hwsp_cacheline(tl);
80		err = radix_tree_insert(&state->cachelines, cacheline, tl);
81		if (err) {
82			if (err == -EEXIST) {
83				pr_err("HWSP cacheline %lu already used; duplicate allocation!\n",
84				       cacheline);
85			}
86			intel_timeline_put(tl);
87			return err;
88		}
89
90		idx = state->count++ % state->max;
91		__mock_hwsp_record(state, idx, tl);
92	}
93
94	if (flags & SHUFFLE)
95		i915_prandom_shuffle(state->history,
96				     sizeof(*state->history),
97				     min(state->count, state->max),
98				     &state->prng);
99
100	count = i915_prandom_u32_max_state(min(state->count, state->max),
101					   &state->prng);
102	while (count--) {
103		idx = --state->count % state->max;
104		__mock_hwsp_record(state, idx, NULL);
105	}
106
107	return 0;
108}
109
110static int mock_hwsp_freelist(void *arg)
111{
112	struct mock_hwsp_freelist state;
113	struct drm_i915_private *i915;
114	const struct {
115		const char *name;
116		unsigned int flags;
117	} phases[] = {
118		{ "linear", 0 },
119		{ "shuffled", SHUFFLE },
120		{ },
121	}, *p;
122	unsigned int na;
123	int err = 0;
124
125	i915 = mock_gem_device();
126	if (!i915)
127		return -ENOMEM;
128
129	INIT_RADIX_TREE(&state.cachelines, GFP_KERNEL);
130	state.prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed);
131
132	state.gt = &i915->gt;
133
134	/*
135	 * Create a bunch of timelines and check that their HWSP do not overlap.
136	 * Free some, and try again.
137	 */
138
139	state.max = PAGE_SIZE / sizeof(*state.history);
140	state.count = 0;
141	state.history = kcalloc(state.max, sizeof(*state.history), GFP_KERNEL);
142	if (!state.history) {
143		err = -ENOMEM;
144		goto err_put;
145	}
146
147	for (p = phases; p->name; p++) {
148		pr_debug("%s(%s)\n", __func__, p->name);
149		for_each_prime_number_from(na, 1, 2 * CACHELINES_PER_PAGE) {
150			err = __mock_hwsp_timeline(&state, na, p->flags);
151			if (err)
152				goto out;
153		}
154	}
155
156out:
157	for (na = 0; na < state.max; na++)
158		__mock_hwsp_record(&state, na, NULL);
159	kfree(state.history);
160err_put:
161	mock_destroy_device(i915);
162	return err;
163}
164
165struct __igt_sync {
166	const char *name;
167	u32 seqno;
168	bool expected;
169	bool set;
170};
171
172static int __igt_sync(struct intel_timeline *tl,
173		      u64 ctx,
174		      const struct __igt_sync *p,
175		      const char *name)
176{
177	int ret;
178
179	if (__intel_timeline_sync_is_later(tl, ctx, p->seqno) != p->expected) {
180		pr_err("%s: %s(ctx=%llu, seqno=%u) expected passed %s but failed\n",
181		       name, p->name, ctx, p->seqno, yesno(p->expected));
182		return -EINVAL;
183	}
184
185	if (p->set) {
186		ret = __intel_timeline_sync_set(tl, ctx, p->seqno);
187		if (ret)
188			return ret;
189	}
190
191	return 0;
192}
193
194static int igt_sync(void *arg)
195{
196	const struct __igt_sync pass[] = {
197		{ "unset", 0, false, false },
198		{ "new", 0, false, true },
199		{ "0a", 0, true, true },
200		{ "1a", 1, false, true },
201		{ "1b", 1, true, true },
202		{ "0b", 0, true, false },
203		{ "2a", 2, false, true },
204		{ "4", 4, false, true },
205		{ "INT_MAX", INT_MAX, false, true },
206		{ "INT_MAX-1", INT_MAX-1, true, false },
207		{ "INT_MAX+1", (u32)INT_MAX+1, false, true },
208		{ "INT_MAX", INT_MAX, true, false },
209		{ "UINT_MAX", UINT_MAX, false, true },
210		{ "wrap", 0, false, true },
211		{ "unwrap", UINT_MAX, true, false },
212		{},
213	}, *p;
214	struct intel_timeline tl;
215	int order, offset;
216	int ret = -ENODEV;
217
218	mock_timeline_init(&tl, 0);
219	for (p = pass; p->name; p++) {
220		for (order = 1; order < 64; order++) {
221			for (offset = -1; offset <= (order > 1); offset++) {
222				u64 ctx = BIT_ULL(order) + offset;
223
224				ret = __igt_sync(&tl, ctx, p, "1");
225				if (ret)
226					goto out;
227			}
228		}
229	}
230	mock_timeline_fini(&tl);
231
232	mock_timeline_init(&tl, 0);
233	for (order = 1; order < 64; order++) {
234		for (offset = -1; offset <= (order > 1); offset++) {
235			u64 ctx = BIT_ULL(order) + offset;
236
237			for (p = pass; p->name; p++) {
238				ret = __igt_sync(&tl, ctx, p, "2");
239				if (ret)
240					goto out;
241			}
242		}
243	}
244
245out:
246	mock_timeline_fini(&tl);
247	return ret;
248}
249
250static unsigned int random_engine(struct rnd_state *rnd)
251{
252	return i915_prandom_u32_max_state(I915_NUM_ENGINES, rnd);
253}
254
255static int bench_sync(void *arg)
256{
257	struct rnd_state prng;
258	struct intel_timeline tl;
259	unsigned long end_time, count;
260	u64 prng32_1M;
261	ktime_t kt;
262	int order, last_order;
263
264	mock_timeline_init(&tl, 0);
265
266	/* Lookups from cache are very fast and so the random number generation
267	 * and the loop itself becomes a significant factor in the per-iteration
268	 * timings. We try to compensate the results by measuring the overhead
269	 * of the prng and subtract it from the reported results.
270	 */
271	prandom_seed_state(&prng, i915_selftest.random_seed);
272	count = 0;
273	kt = ktime_get();
274	end_time = jiffies + HZ/10;
275	do {
276		u32 x;
277
278		/* Make sure the compiler doesn't optimise away the prng call */
279		WRITE_ONCE(x, prandom_u32_state(&prng));
280
281		count++;
282	} while (!time_after(jiffies, end_time));
283	kt = ktime_sub(ktime_get(), kt);
284	pr_debug("%s: %lu random evaluations, %lluns/prng\n",
285		 __func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
286	prng32_1M = div64_ul(ktime_to_ns(kt) << 20, count);
287
288	/* Benchmark (only) setting random context ids */
289	prandom_seed_state(&prng, i915_selftest.random_seed);
290	count = 0;
291	kt = ktime_get();
292	end_time = jiffies + HZ/10;
293	do {
294		u64 id = i915_prandom_u64_state(&prng);
295
296		__intel_timeline_sync_set(&tl, id, 0);
297		count++;
298	} while (!time_after(jiffies, end_time));
299	kt = ktime_sub(ktime_get(), kt);
300	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
301	pr_info("%s: %lu random insertions, %lluns/insert\n",
302		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
303
304	/* Benchmark looking up the exact same context ids as we just set */
305	prandom_seed_state(&prng, i915_selftest.random_seed);
306	end_time = count;
307	kt = ktime_get();
308	while (end_time--) {
309		u64 id = i915_prandom_u64_state(&prng);
310
311		if (!__intel_timeline_sync_is_later(&tl, id, 0)) {
312			mock_timeline_fini(&tl);
313			pr_err("Lookup of %llu failed\n", id);
314			return -EINVAL;
315		}
316	}
317	kt = ktime_sub(ktime_get(), kt);
318	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
319	pr_info("%s: %lu random lookups, %lluns/lookup\n",
320		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
321
322	mock_timeline_fini(&tl);
323	cond_resched();
324
325	mock_timeline_init(&tl, 0);
326
327	/* Benchmark setting the first N (in order) contexts */
328	count = 0;
329	kt = ktime_get();
330	end_time = jiffies + HZ/10;
331	do {
332		__intel_timeline_sync_set(&tl, count++, 0);
333	} while (!time_after(jiffies, end_time));
334	kt = ktime_sub(ktime_get(), kt);
335	pr_info("%s: %lu in-order insertions, %lluns/insert\n",
336		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
337
338	/* Benchmark looking up the exact same context ids as we just set */
339	end_time = count;
340	kt = ktime_get();
341	while (end_time--) {
342		if (!__intel_timeline_sync_is_later(&tl, end_time, 0)) {
343			pr_err("Lookup of %lu failed\n", end_time);
344			mock_timeline_fini(&tl);
345			return -EINVAL;
346		}
347	}
348	kt = ktime_sub(ktime_get(), kt);
349	pr_info("%s: %lu in-order lookups, %lluns/lookup\n",
350		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
351
352	mock_timeline_fini(&tl);
353	cond_resched();
354
355	mock_timeline_init(&tl, 0);
356
357	/* Benchmark searching for a random context id and maybe changing it */
358	prandom_seed_state(&prng, i915_selftest.random_seed);
359	count = 0;
360	kt = ktime_get();
361	end_time = jiffies + HZ/10;
362	do {
363		u32 id = random_engine(&prng);
364		u32 seqno = prandom_u32_state(&prng);
365
366		if (!__intel_timeline_sync_is_later(&tl, id, seqno))
367			__intel_timeline_sync_set(&tl, id, seqno);
368
369		count++;
370	} while (!time_after(jiffies, end_time));
371	kt = ktime_sub(ktime_get(), kt);
372	kt = ktime_sub_ns(kt, (count * prng32_1M * 2) >> 20);
373	pr_info("%s: %lu repeated insert/lookups, %lluns/op\n",
374		__func__, count, (long long)div64_ul(ktime_to_ns(kt), count));
375	mock_timeline_fini(&tl);
376	cond_resched();
377
378	/* Benchmark searching for a known context id and changing the seqno */
379	for (last_order = 1, order = 1; order < 32;
380	     ({ int tmp = last_order; last_order = order; order += tmp; })) {
381		unsigned int mask = BIT(order) - 1;
382
383		mock_timeline_init(&tl, 0);
384
385		count = 0;
386		kt = ktime_get();
387		end_time = jiffies + HZ/10;
388		do {
389			/* Without assuming too many details of the underlying
390			 * implementation, try to identify its phase-changes
391			 * (if any)!
392			 */
393			u64 id = (u64)(count & mask) << order;
394
395			__intel_timeline_sync_is_later(&tl, id, 0);
396			__intel_timeline_sync_set(&tl, id, 0);
397
398			count++;
399		} while (!time_after(jiffies, end_time));
400		kt = ktime_sub(ktime_get(), kt);
401		pr_info("%s: %lu cyclic/%d insert/lookups, %lluns/op\n",
402			__func__, count, order,
403			(long long)div64_ul(ktime_to_ns(kt), count));
404		mock_timeline_fini(&tl);
405		cond_resched();
406	}
407
408	return 0;
409}
410
411int intel_timeline_mock_selftests(void)
412{
413	static const struct i915_subtest tests[] = {
414		SUBTEST(mock_hwsp_freelist),
415		SUBTEST(igt_sync),
416		SUBTEST(bench_sync),
417	};
418
419	return i915_subtests(tests, NULL);
420}
421
422static int emit_ggtt_store_dw(struct i915_request *rq, u32 addr, u32 value)
423{
424	u32 *cs;
425
426	cs = intel_ring_begin(rq, 4);
427	if (IS_ERR(cs))
428		return PTR_ERR(cs);
429
430	if (INTEL_GEN(rq->engine->i915) >= 8) {
431		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
432		*cs++ = addr;
433		*cs++ = 0;
434		*cs++ = value;
435	} else if (INTEL_GEN(rq->engine->i915) >= 4) {
436		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
437		*cs++ = 0;
438		*cs++ = addr;
439		*cs++ = value;
440	} else {
441		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
442		*cs++ = addr;
443		*cs++ = value;
444		*cs++ = MI_NOOP;
445	}
446
447	intel_ring_advance(rq, cs);
448
449	return 0;
450}
451
452static struct i915_request *
453tl_write(struct intel_timeline *tl, struct intel_engine_cs *engine, u32 value)
454{
455	struct i915_request *rq;
456	int err;
457
458	err = intel_timeline_pin(tl, NULL);
459	if (err) {
460		rq = ERR_PTR(err);
461		goto out;
462	}
463
464	rq = intel_engine_create_kernel_request(engine);
465	if (IS_ERR(rq))
466		goto out_unpin;
467
468	i915_request_get(rq);
469
470	err = emit_ggtt_store_dw(rq, tl->hwsp_offset, value);
471	i915_request_add(rq);
472	if (err) {
473		i915_request_put(rq);
474		rq = ERR_PTR(err);
475	}
476
477out_unpin:
478	intel_timeline_unpin(tl);
479out:
480	if (IS_ERR(rq))
481		pr_err("Failed to write to timeline!\n");
482	return rq;
483}
484
485static struct intel_timeline *
486checked_intel_timeline_create(struct intel_gt *gt)
487{
488	struct intel_timeline *tl;
489
490	tl = intel_timeline_create(gt);
491	if (IS_ERR(tl))
492		return tl;
493
494	if (READ_ONCE(*tl->hwsp_seqno) != tl->seqno) {
495		pr_err("Timeline created with incorrect breadcrumb, found %x, expected %x\n",
496		       *tl->hwsp_seqno, tl->seqno);
497		intel_timeline_put(tl);
498		return ERR_PTR(-EINVAL);
499	}
500
501	return tl;
502}
503
504static int live_hwsp_engine(void *arg)
505{
506#define NUM_TIMELINES 4096
507	struct intel_gt *gt = arg;
508	struct intel_timeline **timelines;
509	struct intel_engine_cs *engine;
510	enum intel_engine_id id;
511	unsigned long count, n;
512	int err = 0;
513
514	/*
515	 * Create a bunch of timelines and check we can write
516	 * independently to each of their breadcrumb slots.
517	 */
518
519	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
520				   sizeof(*timelines),
521				   GFP_KERNEL);
522	if (!timelines)
523		return -ENOMEM;
524
525	count = 0;
526	for_each_engine(engine, gt, id) {
527		if (!intel_engine_can_store_dword(engine))
528			continue;
529
530		intel_engine_pm_get(engine);
531
532		for (n = 0; n < NUM_TIMELINES; n++) {
533			struct intel_timeline *tl;
534			struct i915_request *rq;
535
536			tl = checked_intel_timeline_create(gt);
537			if (IS_ERR(tl)) {
538				err = PTR_ERR(tl);
539				break;
540			}
541
542			rq = tl_write(tl, engine, count);
543			if (IS_ERR(rq)) {
544				intel_timeline_put(tl);
545				err = PTR_ERR(rq);
546				break;
547			}
548
549			timelines[count++] = tl;
550			i915_request_put(rq);
551		}
552
553		intel_engine_pm_put(engine);
554		if (err)
555			break;
556	}
557
558	if (igt_flush_test(gt->i915))
559		err = -EIO;
560
561	for (n = 0; n < count; n++) {
562		struct intel_timeline *tl = timelines[n];
563
564		if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
565			GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
566				      n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
567			GEM_TRACE_DUMP();
568			err = -EINVAL;
569		}
570		intel_timeline_put(tl);
571	}
572
573	kvfree(timelines);
574	return err;
575#undef NUM_TIMELINES
576}
577
578static int live_hwsp_alternate(void *arg)
579{
580#define NUM_TIMELINES 4096
581	struct intel_gt *gt = arg;
582	struct intel_timeline **timelines;
583	struct intel_engine_cs *engine;
584	enum intel_engine_id id;
585	unsigned long count, n;
586	int err = 0;
587
588	/*
589	 * Create a bunch of timelines and check we can write
590	 * independently to each of their breadcrumb slots with adjacent
591	 * engines.
592	 */
593
594	timelines = kvmalloc_array(NUM_TIMELINES * I915_NUM_ENGINES,
595				   sizeof(*timelines),
596				   GFP_KERNEL);
597	if (!timelines)
598		return -ENOMEM;
599
600	count = 0;
601	for (n = 0; n < NUM_TIMELINES; n++) {
602		for_each_engine(engine, gt, id) {
603			struct intel_timeline *tl;
604			struct i915_request *rq;
605
606			if (!intel_engine_can_store_dword(engine))
607				continue;
608
609			tl = checked_intel_timeline_create(gt);
610			if (IS_ERR(tl)) {
611				err = PTR_ERR(tl);
612				goto out;
613			}
614
615			intel_engine_pm_get(engine);
616			rq = tl_write(tl, engine, count);
617			intel_engine_pm_put(engine);
618			if (IS_ERR(rq)) {
619				intel_timeline_put(tl);
620				err = PTR_ERR(rq);
621				goto out;
622			}
623
624			timelines[count++] = tl;
625			i915_request_put(rq);
626		}
627	}
628
629out:
630	if (igt_flush_test(gt->i915))
631		err = -EIO;
632
633	for (n = 0; n < count; n++) {
634		struct intel_timeline *tl = timelines[n];
635
636		if (!err && READ_ONCE(*tl->hwsp_seqno) != n) {
637			GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x, found 0x%x\n",
638				      n, tl->fence_context, tl->hwsp_offset, *tl->hwsp_seqno);
639			GEM_TRACE_DUMP();
640			err = -EINVAL;
641		}
642		intel_timeline_put(tl);
643	}
644
645	kvfree(timelines);
646	return err;
647#undef NUM_TIMELINES
648}
649
650static int live_hwsp_wrap(void *arg)
651{
652	struct intel_gt *gt = arg;
653	struct intel_engine_cs *engine;
654	struct intel_timeline *tl;
655	enum intel_engine_id id;
656	int err = 0;
657
658	/*
659	 * Across a seqno wrap, we need to keep the old cacheline alive for
660	 * foreign GPU references.
661	 */
662
663	tl = intel_timeline_create(gt);
664	if (IS_ERR(tl))
665		return PTR_ERR(tl);
666
667	if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
668		goto out_free;
669
670	err = intel_timeline_pin(tl, NULL);
671	if (err)
672		goto out_free;
673
674	for_each_engine(engine, gt, id) {
675		const u32 *hwsp_seqno[2];
676		struct i915_request *rq;
677		u32 seqno[2];
678
679		if (!intel_engine_can_store_dword(engine))
680			continue;
681
682		rq = intel_engine_create_kernel_request(engine);
683		if (IS_ERR(rq)) {
684			err = PTR_ERR(rq);
685			goto out;
686		}
687
688		tl->seqno = -4u;
689
690		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
691		err = intel_timeline_get_seqno(tl, rq, &seqno[0]);
692		mutex_unlock(&tl->mutex);
693		if (err) {
694			i915_request_add(rq);
695			goto out;
696		}
697		pr_debug("seqno[0]:%08x, hwsp_offset:%08x\n",
698			 seqno[0], tl->hwsp_offset);
699
700		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[0]);
701		if (err) {
702			i915_request_add(rq);
703			goto out;
704		}
705		hwsp_seqno[0] = tl->hwsp_seqno;
706
707		mutex_lock_nested(&tl->mutex, SINGLE_DEPTH_NESTING);
708		err = intel_timeline_get_seqno(tl, rq, &seqno[1]);
709		mutex_unlock(&tl->mutex);
710		if (err) {
711			i915_request_add(rq);
712			goto out;
713		}
714		pr_debug("seqno[1]:%08x, hwsp_offset:%08x\n",
715			 seqno[1], tl->hwsp_offset);
716
717		err = emit_ggtt_store_dw(rq, tl->hwsp_offset, seqno[1]);
718		if (err) {
719			i915_request_add(rq);
720			goto out;
721		}
722		hwsp_seqno[1] = tl->hwsp_seqno;
723
724		/* With wrap should come a new hwsp */
725		GEM_BUG_ON(seqno[1] >= seqno[0]);
726		GEM_BUG_ON(hwsp_seqno[0] == hwsp_seqno[1]);
727
728		i915_request_add(rq);
729
730		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
731			pr_err("Wait for timeline writes timed out!\n");
732			err = -EIO;
733			goto out;
734		}
735
736		if (READ_ONCE(*hwsp_seqno[0]) != seqno[0] ||
737		    READ_ONCE(*hwsp_seqno[1]) != seqno[1]) {
738			pr_err("Bad timeline values: found (%x, %x), expected (%x, %x)\n",
739			       *hwsp_seqno[0], *hwsp_seqno[1],
740			       seqno[0], seqno[1]);
741			err = -EINVAL;
742			goto out;
743		}
744
745		intel_gt_retire_requests(gt); /* recycle HWSP */
746	}
747
748out:
749	if (igt_flush_test(gt->i915))
750		err = -EIO;
751
752	intel_timeline_unpin(tl);
753out_free:
754	intel_timeline_put(tl);
755	return err;
756}
757
758static int live_hwsp_rollover_kernel(void *arg)
759{
760	struct intel_gt *gt = arg;
761	struct intel_engine_cs *engine;
762	enum intel_engine_id id;
763	int err = 0;
764
765	/*
766	 * Run the host for long enough, and even the kernel context will
767	 * see a seqno rollover.
768	 */
769
770	for_each_engine(engine, gt, id) {
771		struct intel_context *ce = engine->kernel_context;
772		struct intel_timeline *tl = ce->timeline;
773		struct i915_request *rq[3] = {};
774		int i;
775
776		st_engine_heartbeat_disable(engine);
777		if (intel_gt_wait_for_idle(gt, HZ / 2)) {
778			err = -EIO;
779			goto out;
780		}
781
782		GEM_BUG_ON(i915_active_fence_isset(&tl->last_request));
783		tl->seqno = 0;
784		timeline_rollback(tl);
785		timeline_rollback(tl);
786		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
787
788		for (i = 0; i < ARRAY_SIZE(rq); i++) {
789			struct i915_request *this;
790
791			this = i915_request_create(ce);
792			if (IS_ERR(this)) {
793				err = PTR_ERR(this);
794				goto out;
795			}
796
797			pr_debug("%s: create fence.seqnp:%d\n",
798				 engine->name,
799				 lower_32_bits(this->fence.seqno));
800
801			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
802
803			rq[i] = i915_request_get(this);
804			i915_request_add(this);
805		}
806
807		/* We expected a wrap! */
808		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
809
810		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
811			pr_err("Wait for timeline wrap timed out!\n");
812			err = -EIO;
813			goto out;
814		}
815
816		for (i = 0; i < ARRAY_SIZE(rq); i++) {
817			if (!i915_request_completed(rq[i])) {
818				pr_err("Pre-wrap request not completed!\n");
819				err = -EINVAL;
820				goto out;
821			}
822		}
823
824out:
825		for (i = 0; i < ARRAY_SIZE(rq); i++)
826			i915_request_put(rq[i]);
827		st_engine_heartbeat_enable(engine);
828		if (err)
829			break;
830	}
831
832	if (igt_flush_test(gt->i915))
833		err = -EIO;
834
835	return err;
836}
837
838static int live_hwsp_rollover_user(void *arg)
839{
840	struct intel_gt *gt = arg;
841	struct intel_engine_cs *engine;
842	enum intel_engine_id id;
843	int err = 0;
844
845	/*
846	 * Simulate a long running user context, and force the seqno wrap
847	 * on the user's timeline.
848	 */
849
850	for_each_engine(engine, gt, id) {
851		struct i915_request *rq[3] = {};
852		struct intel_timeline *tl;
853		struct intel_context *ce;
854		int i;
855
856		ce = intel_context_create(engine);
857		if (IS_ERR(ce))
858			return PTR_ERR(ce);
859
860		err = intel_context_alloc_state(ce);
861		if (err)
862			goto out;
863
864		tl = ce->timeline;
865		if (!tl->has_initial_breadcrumb || !tl->hwsp_cacheline)
866			goto out;
867
868		timeline_rollback(tl);
869		timeline_rollback(tl);
870		WRITE_ONCE(*(u32 *)tl->hwsp_seqno, tl->seqno);
871
872		for (i = 0; i < ARRAY_SIZE(rq); i++) {
873			struct i915_request *this;
874
875			this = intel_context_create_request(ce);
876			if (IS_ERR(this)) {
877				err = PTR_ERR(this);
878				goto out;
879			}
880
881			pr_debug("%s: create fence.seqnp:%d\n",
882				 engine->name,
883				 lower_32_bits(this->fence.seqno));
884
885			GEM_BUG_ON(rcu_access_pointer(this->timeline) != tl);
886
887			rq[i] = i915_request_get(this);
888			i915_request_add(this);
889		}
890
891		/* We expected a wrap! */
892		GEM_BUG_ON(rq[2]->fence.seqno > rq[0]->fence.seqno);
893
894		if (i915_request_wait(rq[2], 0, HZ / 5) < 0) {
895			pr_err("Wait for timeline wrap timed out!\n");
896			err = -EIO;
897			goto out;
898		}
899
900		for (i = 0; i < ARRAY_SIZE(rq); i++) {
901			if (!i915_request_completed(rq[i])) {
902				pr_err("Pre-wrap request not completed!\n");
903				err = -EINVAL;
904				goto out;
905			}
906		}
907
908out:
909		for (i = 0; i < ARRAY_SIZE(rq); i++)
910			i915_request_put(rq[i]);
911		intel_context_put(ce);
912		if (err)
913			break;
914	}
915
916	if (igt_flush_test(gt->i915))
917		err = -EIO;
918
919	return err;
920}
921
922static int live_hwsp_recycle(void *arg)
923{
924	struct intel_gt *gt = arg;
925	struct intel_engine_cs *engine;
926	enum intel_engine_id id;
927	unsigned long count;
928	int err = 0;
929
930	/*
931	 * Check seqno writes into one timeline at a time. We expect to
932	 * recycle the breadcrumb slot between iterations and neither
933	 * want to confuse ourselves or the GPU.
934	 */
935
936	count = 0;
937	for_each_engine(engine, gt, id) {
938		IGT_TIMEOUT(end_time);
939
940		if (!intel_engine_can_store_dword(engine))
941			continue;
942
943		intel_engine_pm_get(engine);
944
945		do {
946			struct intel_timeline *tl;
947			struct i915_request *rq;
948
949			tl = checked_intel_timeline_create(gt);
950			if (IS_ERR(tl)) {
951				err = PTR_ERR(tl);
952				break;
953			}
954
955			rq = tl_write(tl, engine, count);
956			if (IS_ERR(rq)) {
957				intel_timeline_put(tl);
958				err = PTR_ERR(rq);
959				break;
960			}
961
962			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
963				pr_err("Wait for timeline writes timed out!\n");
964				i915_request_put(rq);
965				intel_timeline_put(tl);
966				err = -EIO;
967				break;
968			}
969
970			if (READ_ONCE(*tl->hwsp_seqno) != count) {
971				GEM_TRACE_ERR("Invalid seqno:%lu stored in timeline %llu @ %x found 0x%x\n",
972					      count, tl->fence_context,
973					      tl->hwsp_offset, *tl->hwsp_seqno);
974				GEM_TRACE_DUMP();
975				err = -EINVAL;
976			}
977
978			i915_request_put(rq);
979			intel_timeline_put(tl);
980			count++;
981
982			if (err)
983				break;
984		} while (!__igt_timeout(end_time, NULL));
985
986		intel_engine_pm_put(engine);
987		if (err)
988			break;
989	}
990
991	return err;
992}
993
994int intel_timeline_live_selftests(struct drm_i915_private *i915)
995{
996	static const struct i915_subtest tests[] = {
997		SUBTEST(live_hwsp_recycle),
998		SUBTEST(live_hwsp_engine),
999		SUBTEST(live_hwsp_alternate),
1000		SUBTEST(live_hwsp_wrap),
1001		SUBTEST(live_hwsp_rollover_kernel),
1002		SUBTEST(live_hwsp_rollover_user),
1003	};
1004
1005	if (intel_gt_is_wedged(&i915->gt))
1006		return 0;
1007
1008	return intel_gt_live_subtests(tests, &i915->gt);
1009}
1010