1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2018 Intel Corporation
4 */
5
6#include <linux/crc32.h>
7
8#include "gem/i915_gem_stolen.h"
9
10#include "i915_memcpy.h"
11#include "i915_selftest.h"
12#include "selftests/igt_reset.h"
13#include "selftests/igt_atomic.h"
14#include "selftests/igt_spinner.h"
15
16static int
17__igt_reset_stolen(struct intel_gt *gt,
18		   intel_engine_mask_t mask,
19		   const char *msg)
20{
21	struct i915_ggtt *ggtt = &gt->i915->ggtt;
22	const struct resource *dsm = &gt->i915->dsm;
23	resource_size_t num_pages, page;
24	struct intel_engine_cs *engine;
25	intel_wakeref_t wakeref;
26	enum intel_engine_id id;
27	struct igt_spinner spin;
28	long max, count;
29	void *tmp;
30	u32 *crc;
31	int err;
32
33	if (!drm_mm_node_allocated(&ggtt->error_capture))
34		return 0;
35
36	num_pages = resource_size(dsm) >> PAGE_SHIFT;
37	if (!num_pages)
38		return 0;
39
40	crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL);
41	if (!crc)
42		return -ENOMEM;
43
44	tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
45	if (!tmp) {
46		err = -ENOMEM;
47		goto err_crc;
48	}
49
50	igt_global_reset_lock(gt);
51	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
52
53	err = igt_spinner_init(&spin, gt);
54	if (err)
55		goto err_lock;
56
57	for_each_engine(engine, gt, id) {
58		struct intel_context *ce;
59		struct i915_request *rq;
60
61		if (!(mask & engine->mask))
62			continue;
63
64		if (!intel_engine_can_store_dword(engine))
65			continue;
66
67		ce = intel_context_create(engine);
68		if (IS_ERR(ce)) {
69			err = PTR_ERR(ce);
70			goto err_spin;
71		}
72		rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
73		intel_context_put(ce);
74		if (IS_ERR(rq)) {
75			err = PTR_ERR(rq);
76			goto err_spin;
77		}
78		i915_request_add(rq);
79	}
80
81	for (page = 0; page < num_pages; page++) {
82		dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
83		void __iomem *s;
84		void *in;
85
86		ggtt->vm.insert_page(&ggtt->vm, dma,
87				     ggtt->error_capture.start,
88				     I915_CACHE_NONE, 0);
89		mb();
90
91		s = io_mapping_map_wc(&ggtt->iomap,
92				      ggtt->error_capture.start,
93				      PAGE_SIZE);
94
95		if (!__drm_mm_interval_first(&gt->i915->mm.stolen,
96					     page << PAGE_SHIFT,
97					     ((page + 1) << PAGE_SHIFT) - 1))
98			memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32));
99
100		in = s;
101		if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
102			in = tmp;
103		crc[page] = crc32_le(0, in, PAGE_SIZE);
104
105		io_mapping_unmap(s);
106	}
107	mb();
108	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
109
110	if (mask == ALL_ENGINES) {
111		intel_gt_reset(gt, mask, NULL);
112	} else {
113		for_each_engine(engine, gt, id) {
114			if (mask & engine->mask)
115				intel_engine_reset(engine, NULL);
116		}
117	}
118
119	max = -1;
120	count = 0;
121	for (page = 0; page < num_pages; page++) {
122		dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT);
123		void __iomem *s;
124		void *in;
125		u32 x;
126
127		ggtt->vm.insert_page(&ggtt->vm, dma,
128				     ggtt->error_capture.start,
129				     I915_CACHE_NONE, 0);
130		mb();
131
132		s = io_mapping_map_wc(&ggtt->iomap,
133				      ggtt->error_capture.start,
134				      PAGE_SIZE);
135
136		in = s;
137		if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE))
138			in = tmp;
139		x = crc32_le(0, in, PAGE_SIZE);
140
141		if (x != crc[page] &&
142		    !__drm_mm_interval_first(&gt->i915->mm.stolen,
143					     page << PAGE_SHIFT,
144					     ((page + 1) << PAGE_SHIFT) - 1)) {
145			pr_debug("unused stolen page %pa modified by GPU reset\n",
146				 &page);
147			if (count++ == 0)
148				igt_hexdump(in, PAGE_SIZE);
149			max = page;
150		}
151
152		io_mapping_unmap(s);
153	}
154	mb();
155	ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE);
156
157	if (count > 0) {
158		pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n",
159			msg, count, max);
160	}
161	if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) {
162		pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n",
163		       msg, I915_GEM_STOLEN_BIAS);
164		err = -EINVAL;
165	}
166
167err_spin:
168	igt_spinner_fini(&spin);
169
170err_lock:
171	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
172	igt_global_reset_unlock(gt);
173
174	kfree(tmp);
175err_crc:
176	kfree(crc);
177	return err;
178}
179
180static int igt_reset_device_stolen(void *arg)
181{
182	return __igt_reset_stolen(arg, ALL_ENGINES, "device");
183}
184
185static int igt_reset_engines_stolen(void *arg)
186{
187	struct intel_gt *gt = arg;
188	struct intel_engine_cs *engine;
189	enum intel_engine_id id;
190	int err;
191
192	if (!intel_has_reset_engine(gt))
193		return 0;
194
195	for_each_engine(engine, gt, id) {
196		err = __igt_reset_stolen(gt, engine->mask, engine->name);
197		if (err)
198			return err;
199	}
200
201	return 0;
202}
203
204static int igt_global_reset(void *arg)
205{
206	struct intel_gt *gt = arg;
207	unsigned int reset_count;
208	intel_wakeref_t wakeref;
209	int err = 0;
210
211	/* Check that we can issue a global GPU reset */
212
213	igt_global_reset_lock(gt);
214	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
215
216	reset_count = i915_reset_count(&gt->i915->gpu_error);
217
218	intel_gt_reset(gt, ALL_ENGINES, NULL);
219
220	if (i915_reset_count(&gt->i915->gpu_error) == reset_count) {
221		pr_err("No GPU reset recorded!\n");
222		err = -EINVAL;
223	}
224
225	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
226	igt_global_reset_unlock(gt);
227
228	if (intel_gt_is_wedged(gt))
229		err = -EIO;
230
231	return err;
232}
233
234static int igt_wedged_reset(void *arg)
235{
236	struct intel_gt *gt = arg;
237	intel_wakeref_t wakeref;
238
239	/* Check that we can recover a wedged device with a GPU reset */
240
241	igt_global_reset_lock(gt);
242	wakeref = intel_runtime_pm_get(gt->uncore->rpm);
243
244	intel_gt_set_wedged(gt);
245
246	GEM_BUG_ON(!intel_gt_is_wedged(gt));
247	intel_gt_reset(gt, ALL_ENGINES, NULL);
248
249	intel_runtime_pm_put(gt->uncore->rpm, wakeref);
250	igt_global_reset_unlock(gt);
251
252	return intel_gt_is_wedged(gt) ? -EIO : 0;
253}
254
255static int igt_atomic_reset(void *arg)
256{
257	struct intel_gt *gt = arg;
258	const typeof(*igt_atomic_phases) *p;
259	int err = 0;
260
261	/* Check that the resets are usable from atomic context */
262
263	intel_gt_pm_get(gt);
264	igt_global_reset_lock(gt);
265
266	/* Flush any requests before we get started and check basics */
267	if (!igt_force_reset(gt))
268		goto unlock;
269
270	for (p = igt_atomic_phases; p->name; p++) {
271		intel_engine_mask_t awake;
272
273		GEM_TRACE("__intel_gt_reset under %s\n", p->name);
274
275		awake = reset_prepare(gt);
276		p->critical_section_begin();
277
278		err = __intel_gt_reset(gt, ALL_ENGINES);
279
280		p->critical_section_end();
281		reset_finish(gt, awake);
282
283		if (err) {
284			pr_err("__intel_gt_reset failed under %s\n", p->name);
285			break;
286		}
287	}
288
289	/* As we poke around the guts, do a full reset before continuing. */
290	igt_force_reset(gt);
291
292unlock:
293	igt_global_reset_unlock(gt);
294	intel_gt_pm_put(gt);
295
296	return err;
297}
298
299static int igt_atomic_engine_reset(void *arg)
300{
301	struct intel_gt *gt = arg;
302	const typeof(*igt_atomic_phases) *p;
303	struct intel_engine_cs *engine;
304	enum intel_engine_id id;
305	int err = 0;
306
307	/* Check that the resets are usable from atomic context */
308
309	if (!intel_has_reset_engine(gt))
310		return 0;
311
312	if (intel_uc_uses_guc_submission(&gt->uc))
313		return 0;
314
315	intel_gt_pm_get(gt);
316	igt_global_reset_lock(gt);
317
318	/* Flush any requests before we get started and check basics */
319	if (!igt_force_reset(gt))
320		goto out_unlock;
321
322	for_each_engine(engine, gt, id) {
323		tasklet_disable(&engine->execlists.tasklet);
324		intel_engine_pm_get(engine);
325
326		for (p = igt_atomic_phases; p->name; p++) {
327			GEM_TRACE("intel_engine_reset(%s) under %s\n",
328				  engine->name, p->name);
329
330			p->critical_section_begin();
331			err = intel_engine_reset(engine, NULL);
332			p->critical_section_end();
333
334			if (err) {
335				pr_err("intel_engine_reset(%s) failed under %s\n",
336				       engine->name, p->name);
337				break;
338			}
339		}
340
341		intel_engine_pm_put(engine);
342		tasklet_enable(&engine->execlists.tasklet);
343		if (err)
344			break;
345	}
346
347	/* As we poke around the guts, do a full reset before continuing. */
348	igt_force_reset(gt);
349
350out_unlock:
351	igt_global_reset_unlock(gt);
352	intel_gt_pm_put(gt);
353
354	return err;
355}
356
357int intel_reset_live_selftests(struct drm_i915_private *i915)
358{
359	static const struct i915_subtest tests[] = {
360		SUBTEST(igt_global_reset), /* attempt to recover GPU first */
361		SUBTEST(igt_reset_device_stolen),
362		SUBTEST(igt_reset_engines_stolen),
363		SUBTEST(igt_wedged_reset),
364		SUBTEST(igt_atomic_reset),
365		SUBTEST(igt_atomic_engine_reset),
366	};
367	struct intel_gt *gt = &i915->gt;
368
369	if (!intel_has_gpu_reset(gt))
370		return 0;
371
372	if (intel_gt_is_wedged(gt))
373		return -EIO; /* we're long past hope of a successful reset */
374
375	return intel_gt_live_subtests(tests, gt);
376}
377