1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2020 Intel Corporation
4 */
5
6#include <linux/pm_qos.h>
7#include <linux/sort.h>
8
9#include "intel_engine_heartbeat.h"
10#include "intel_engine_pm.h"
11#include "intel_gpu_commands.h"
12#include "intel_gt_clock_utils.h"
13#include "intel_gt_pm.h"
14#include "intel_rc6.h"
15#include "selftest_engine_heartbeat.h"
16#include "selftest_rps.h"
17#include "selftests/igt_flush_test.h"
18#include "selftests/igt_spinner.h"
19#include "selftests/librapl.h"
20
21/* Try to isolate the impact of cstates from determing frequency response */
22#define CPU_LATENCY 0 /* -1 to disable pm_qos, 0 to disable cstates */
23
24static void dummy_rps_work(struct work_struct *wrk)
25{
26}
27
28static int cmp_u64(const void *A, const void *B)
29{
30	const u64 *a = A, *b = B;
31
32	if (*a < *b)
33		return -1;
34	else if (*a > *b)
35		return 1;
36	else
37		return 0;
38}
39
40static int cmp_u32(const void *A, const void *B)
41{
42	const u32 *a = A, *b = B;
43
44	if (*a < *b)
45		return -1;
46	else if (*a > *b)
47		return 1;
48	else
49		return 0;
50}
51
52static struct i915_vma *
53create_spin_counter(struct intel_engine_cs *engine,
54		    struct i915_address_space *vm,
55		    bool srm,
56		    u32 **cancel,
57		    u32 **counter)
58{
59	enum {
60		COUNT,
61		INC,
62		__NGPR__,
63	};
64#define CS_GPR(x) GEN8_RING_CS_GPR(engine->mmio_base, x)
65	struct drm_i915_gem_object *obj;
66	struct i915_vma *vma;
67	unsigned long end;
68	u32 *base, *cs;
69	int loop, i;
70	int err;
71
72	obj = i915_gem_object_create_internal(vm->i915, 64 << 10);
73	if (IS_ERR(obj))
74		return ERR_CAST(obj);
75
76	end = obj->base.size / sizeof(u32) - 1;
77
78	vma = i915_vma_instance(obj, vm, NULL);
79	if (IS_ERR(vma)) {
80		err = PTR_ERR(vma);
81		goto err_put;
82	}
83
84	err = i915_vma_pin(vma, 0, 0, PIN_USER);
85	if (err)
86		goto err_unlock;
87
88	i915_vma_lock(vma);
89
90	base = i915_gem_object_pin_map(obj, I915_MAP_WC);
91	if (IS_ERR(base)) {
92		err = PTR_ERR(base);
93		goto err_unpin;
94	}
95	cs = base;
96
97	*cs++ = MI_LOAD_REGISTER_IMM(__NGPR__ * 2);
98	for (i = 0; i < __NGPR__; i++) {
99		*cs++ = i915_mmio_reg_offset(CS_GPR(i));
100		*cs++ = 0;
101		*cs++ = i915_mmio_reg_offset(CS_GPR(i)) + 4;
102		*cs++ = 0;
103	}
104
105	*cs++ = MI_LOAD_REGISTER_IMM(1);
106	*cs++ = i915_mmio_reg_offset(CS_GPR(INC));
107	*cs++ = 1;
108
109	loop = cs - base;
110
111	/* Unroll the loop to avoid MI_BB_START stalls impacting measurements */
112	for (i = 0; i < 1024; i++) {
113		*cs++ = MI_MATH(4);
114		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(COUNT));
115		*cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(INC));
116		*cs++ = MI_MATH_ADD;
117		*cs++ = MI_MATH_STORE(MI_MATH_REG(COUNT), MI_MATH_REG_ACCU);
118
119		if (srm) {
120			*cs++ = MI_STORE_REGISTER_MEM_GEN8;
121			*cs++ = i915_mmio_reg_offset(CS_GPR(COUNT));
122			*cs++ = lower_32_bits(vma->node.start + end * sizeof(*cs));
123			*cs++ = upper_32_bits(vma->node.start + end * sizeof(*cs));
124		}
125	}
126
127	*cs++ = MI_BATCH_BUFFER_START_GEN8;
128	*cs++ = lower_32_bits(vma->node.start + loop * sizeof(*cs));
129	*cs++ = upper_32_bits(vma->node.start + loop * sizeof(*cs));
130	GEM_BUG_ON(cs - base > end);
131
132	i915_gem_object_flush_map(obj);
133
134	*cancel = base + loop;
135	*counter = srm ? memset32(base + end, 0, 1) : NULL;
136	return vma;
137
138err_unpin:
139	i915_vma_unpin(vma);
140err_unlock:
141	i915_vma_unlock(vma);
142err_put:
143	i915_gem_object_put(obj);
144	return ERR_PTR(err);
145}
146
147static u8 wait_for_freq(struct intel_rps *rps, u8 freq, int timeout_ms)
148{
149	u8 history[64], i;
150	unsigned long end;
151	int sleep;
152
153	i = 0;
154	memset(history, freq, sizeof(history));
155	sleep = 20;
156
157	/* The PCU does not change instantly, but drifts towards the goal? */
158	end = jiffies + msecs_to_jiffies(timeout_ms);
159	do {
160		u8 act;
161
162		act = read_cagf(rps);
163		if (time_after(jiffies, end))
164			return act;
165
166		/* Target acquired */
167		if (act == freq)
168			return act;
169
170		/* Any change within the last N samples? */
171		if (!memchr_inv(history, act, sizeof(history)))
172			return act;
173
174		history[i] = act;
175		i = (i + 1) % ARRAY_SIZE(history);
176
177		usleep_range(sleep, 2 * sleep);
178		sleep *= 2;
179		if (sleep > timeout_ms * 20)
180			sleep = timeout_ms * 20;
181	} while (1);
182}
183
184static u8 rps_set_check(struct intel_rps *rps, u8 freq)
185{
186	mutex_lock(&rps->lock);
187	GEM_BUG_ON(!intel_rps_is_active(rps));
188	intel_rps_set(rps, freq);
189	GEM_BUG_ON(rps->last_freq != freq);
190	mutex_unlock(&rps->lock);
191
192	return wait_for_freq(rps, freq, 50);
193}
194
195static void show_pstate_limits(struct intel_rps *rps)
196{
197	struct drm_i915_private *i915 = rps_to_i915(rps);
198
199	if (IS_BROXTON(i915)) {
200		pr_info("P_STATE_CAP[%x]: 0x%08x\n",
201			i915_mmio_reg_offset(BXT_RP_STATE_CAP),
202			intel_uncore_read(rps_to_uncore(rps),
203					  BXT_RP_STATE_CAP));
204	} else if (IS_GEN(i915, 9)) {
205		pr_info("P_STATE_LIMITS[%x]: 0x%08x\n",
206			i915_mmio_reg_offset(GEN9_RP_STATE_LIMITS),
207			intel_uncore_read(rps_to_uncore(rps),
208					  GEN9_RP_STATE_LIMITS));
209	}
210}
211
212int live_rps_clock_interval(void *arg)
213{
214	struct intel_gt *gt = arg;
215	struct intel_rps *rps = &gt->rps;
216	void (*saved_work)(struct work_struct *wrk);
217	struct intel_engine_cs *engine;
218	enum intel_engine_id id;
219	struct igt_spinner spin;
220	int err = 0;
221
222	if (!intel_rps_is_enabled(rps))
223		return 0;
224
225	if (igt_spinner_init(&spin, gt))
226		return -ENOMEM;
227
228	intel_gt_pm_wait_for_idle(gt);
229	saved_work = rps->work.func;
230	rps->work.func = dummy_rps_work;
231
232	intel_gt_pm_get(gt);
233	intel_rps_disable(&gt->rps);
234
235	intel_gt_check_clock_frequency(gt);
236
237	for_each_engine(engine, gt, id) {
238		struct i915_request *rq;
239		u32 cycles;
240		u64 dt;
241
242		if (!intel_engine_can_store_dword(engine))
243			continue;
244
245		st_engine_heartbeat_disable(engine);
246
247		rq = igt_spinner_create_request(&spin,
248						engine->kernel_context,
249						MI_NOOP);
250		if (IS_ERR(rq)) {
251			st_engine_heartbeat_enable(engine);
252			err = PTR_ERR(rq);
253			break;
254		}
255
256		i915_request_add(rq);
257
258		if (!igt_wait_for_spinner(&spin, rq)) {
259			pr_err("%s: RPS spinner did not start\n",
260			       engine->name);
261			igt_spinner_end(&spin);
262			st_engine_heartbeat_enable(engine);
263			intel_gt_set_wedged(engine->gt);
264			err = -EIO;
265			break;
266		}
267
268		intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL);
269
270		intel_uncore_write_fw(gt->uncore, GEN6_RP_CUR_UP_EI, 0);
271
272		/* Set the evaluation interval to infinity! */
273		intel_uncore_write_fw(gt->uncore,
274				      GEN6_RP_UP_EI, 0xffffffff);
275		intel_uncore_write_fw(gt->uncore,
276				      GEN6_RP_UP_THRESHOLD, 0xffffffff);
277
278		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL,
279				      GEN6_RP_ENABLE | GEN6_RP_UP_BUSY_AVG);
280
281		if (wait_for(intel_uncore_read_fw(gt->uncore,
282						  GEN6_RP_CUR_UP_EI),
283			     10)) {
284			/* Just skip the test; assume lack of HW support */
285			pr_notice("%s: rps evaluation interval not ticking\n",
286				  engine->name);
287			err = -ENODEV;
288		} else {
289			ktime_t dt_[5];
290			u32 cycles_[5];
291			int i;
292
293			for (i = 0; i < 5; i++) {
294				preempt_disable();
295
296				dt_[i] = ktime_get();
297				cycles_[i] = -intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
298
299				udelay(1000);
300
301				dt_[i] = ktime_sub(ktime_get(), dt_[i]);
302				cycles_[i] += intel_uncore_read_fw(gt->uncore, GEN6_RP_CUR_UP_EI);
303
304				preempt_enable();
305			}
306
307			/* Use the median of both cycle/dt; close enough */
308			sort(cycles_, 5, sizeof(*cycles_), cmp_u32, NULL);
309			cycles = (cycles_[1] + 2 * cycles_[2] + cycles_[3]) / 4;
310			sort(dt_, 5, sizeof(*dt_), cmp_u64, NULL);
311			dt = div_u64(dt_[1] + 2 * dt_[2] + dt_[3], 4);
312		}
313
314		intel_uncore_write_fw(gt->uncore, GEN6_RP_CONTROL, 0);
315		intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL);
316
317		igt_spinner_end(&spin);
318		st_engine_heartbeat_enable(engine);
319
320		if (err == 0) {
321			u64 time = intel_gt_pm_interval_to_ns(gt, cycles);
322			u32 expected =
323				intel_gt_ns_to_pm_interval(gt, dt);
324
325			pr_info("%s: rps counted %d C0 cycles [%lldns] in %lldns [%d cycles], using GT clock frequency of %uKHz\n",
326				engine->name, cycles, time, dt, expected,
327				gt->clock_frequency / 1000);
328
329			if (10 * time < 8 * dt ||
330			    8 * time > 10 * dt) {
331				pr_err("%s: rps clock time does not match walltime!\n",
332				       engine->name);
333				err = -EINVAL;
334			}
335
336			if (10 * expected < 8 * cycles ||
337			    8 * expected > 10 * cycles) {
338				pr_err("%s: walltime does not match rps clock ticks!\n",
339				       engine->name);
340				err = -EINVAL;
341			}
342		}
343
344		if (igt_flush_test(gt->i915))
345			err = -EIO;
346
347		break; /* once is enough */
348	}
349
350	intel_rps_enable(&gt->rps);
351	intel_gt_pm_put(gt);
352
353	igt_spinner_fini(&spin);
354
355	intel_gt_pm_wait_for_idle(gt);
356	rps->work.func = saved_work;
357
358	if (err == -ENODEV) /* skipped, don't report a fail */
359		err = 0;
360
361	return err;
362}
363
364int live_rps_control(void *arg)
365{
366	struct intel_gt *gt = arg;
367	struct intel_rps *rps = &gt->rps;
368	void (*saved_work)(struct work_struct *wrk);
369	struct intel_engine_cs *engine;
370	enum intel_engine_id id;
371	struct igt_spinner spin;
372	int err = 0;
373
374	/*
375	 * Check that the actual frequency matches our requested frequency,
376	 * to verify our control mechanism. We have to be careful that the
377	 * PCU may throttle the GPU in which case the actual frequency used
378	 * will be lowered than requested.
379	 */
380
381	if (!intel_rps_is_enabled(rps))
382		return 0;
383
384	if (IS_CHERRYVIEW(gt->i915)) /* XXX fragile PCU */
385		return 0;
386
387	if (igt_spinner_init(&spin, gt))
388		return -ENOMEM;
389
390	intel_gt_pm_wait_for_idle(gt);
391	saved_work = rps->work.func;
392	rps->work.func = dummy_rps_work;
393
394	intel_gt_pm_get(gt);
395	for_each_engine(engine, gt, id) {
396		struct i915_request *rq;
397		ktime_t min_dt, max_dt;
398		int f, limit;
399		int min, max;
400
401		if (!intel_engine_can_store_dword(engine))
402			continue;
403
404		st_engine_heartbeat_disable(engine);
405
406		rq = igt_spinner_create_request(&spin,
407						engine->kernel_context,
408						MI_NOOP);
409		if (IS_ERR(rq)) {
410			err = PTR_ERR(rq);
411			break;
412		}
413
414		i915_request_add(rq);
415
416		if (!igt_wait_for_spinner(&spin, rq)) {
417			pr_err("%s: RPS spinner did not start\n",
418			       engine->name);
419			igt_spinner_end(&spin);
420			st_engine_heartbeat_enable(engine);
421			intel_gt_set_wedged(engine->gt);
422			err = -EIO;
423			break;
424		}
425
426		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
427			pr_err("%s: could not set minimum frequency [%x], only %x!\n",
428			       engine->name, rps->min_freq, read_cagf(rps));
429			igt_spinner_end(&spin);
430			st_engine_heartbeat_enable(engine);
431			show_pstate_limits(rps);
432			err = -EINVAL;
433			break;
434		}
435
436		for (f = rps->min_freq + 1; f < rps->max_freq; f++) {
437			if (rps_set_check(rps, f) < f)
438				break;
439		}
440
441		limit = rps_set_check(rps, f);
442
443		if (rps_set_check(rps, rps->min_freq) != rps->min_freq) {
444			pr_err("%s: could not restore minimum frequency [%x], only %x!\n",
445			       engine->name, rps->min_freq, read_cagf(rps));
446			igt_spinner_end(&spin);
447			st_engine_heartbeat_enable(engine);
448			show_pstate_limits(rps);
449			err = -EINVAL;
450			break;
451		}
452
453		max_dt = ktime_get();
454		max = rps_set_check(rps, limit);
455		max_dt = ktime_sub(ktime_get(), max_dt);
456
457		min_dt = ktime_get();
458		min = rps_set_check(rps, rps->min_freq);
459		min_dt = ktime_sub(ktime_get(), min_dt);
460
461		igt_spinner_end(&spin);
462		st_engine_heartbeat_enable(engine);
463
464		pr_info("%s: range:[%x:%uMHz, %x:%uMHz] limit:[%x:%uMHz], %x:%x response %lluns:%lluns\n",
465			engine->name,
466			rps->min_freq, intel_gpu_freq(rps, rps->min_freq),
467			rps->max_freq, intel_gpu_freq(rps, rps->max_freq),
468			limit, intel_gpu_freq(rps, limit),
469			min, max, ktime_to_ns(min_dt), ktime_to_ns(max_dt));
470
471		if (limit == rps->min_freq) {
472			pr_err("%s: GPU throttled to minimum!\n",
473			       engine->name);
474			show_pstate_limits(rps);
475			err = -ENODEV;
476			break;
477		}
478
479		if (igt_flush_test(gt->i915)) {
480			err = -EIO;
481			break;
482		}
483	}
484	intel_gt_pm_put(gt);
485
486	igt_spinner_fini(&spin);
487
488	intel_gt_pm_wait_for_idle(gt);
489	rps->work.func = saved_work;
490
491	return err;
492}
493
494static void show_pcu_config(struct intel_rps *rps)
495{
496	struct drm_i915_private *i915 = rps_to_i915(rps);
497	unsigned int max_gpu_freq, min_gpu_freq;
498	intel_wakeref_t wakeref;
499	int gpu_freq;
500
501	if (!HAS_LLC(i915))
502		return;
503
504	min_gpu_freq = rps->min_freq;
505	max_gpu_freq = rps->max_freq;
506	if (INTEL_GEN(i915) >= 9) {
507		/* Convert GT frequency to 50 HZ units */
508		min_gpu_freq /= GEN9_FREQ_SCALER;
509		max_gpu_freq /= GEN9_FREQ_SCALER;
510	}
511
512	wakeref = intel_runtime_pm_get(rps_to_uncore(rps)->rpm);
513
514	pr_info("%5s  %5s  %5s\n", "GPU", "eCPU", "eRing");
515	for (gpu_freq = min_gpu_freq; gpu_freq <= max_gpu_freq; gpu_freq++) {
516		int ia_freq = gpu_freq;
517
518		sandybridge_pcode_read(i915,
519				       GEN6_PCODE_READ_MIN_FREQ_TABLE,
520				       &ia_freq, NULL);
521
522		pr_info("%5d  %5d  %5d\n",
523			gpu_freq * 50,
524			((ia_freq >> 0) & 0xff) * 100,
525			((ia_freq >> 8) & 0xff) * 100);
526	}
527
528	intel_runtime_pm_put(rps_to_uncore(rps)->rpm, wakeref);
529}
530
531static u64 __measure_frequency(u32 *cntr, int duration_ms)
532{
533	u64 dc, dt;
534
535	dt = ktime_get();
536	dc = READ_ONCE(*cntr);
537	usleep_range(1000 * duration_ms, 2000 * duration_ms);
538	dc = READ_ONCE(*cntr) - dc;
539	dt = ktime_get() - dt;
540
541	return div64_u64(1000 * 1000 * dc, dt);
542}
543
544static u64 measure_frequency_at(struct intel_rps *rps, u32 *cntr, int *freq)
545{
546	u64 x[5];
547	int i;
548
549	*freq = rps_set_check(rps, *freq);
550	for (i = 0; i < 5; i++)
551		x[i] = __measure_frequency(cntr, 2);
552	*freq = (*freq + read_cagf(rps)) / 2;
553
554	/* A simple triangle filter for better result stability */
555	sort(x, 5, sizeof(*x), cmp_u64, NULL);
556	return div_u64(x[1] + 2 * x[2] + x[3], 4);
557}
558
559static u64 __measure_cs_frequency(struct intel_engine_cs *engine,
560				  int duration_ms)
561{
562	u64 dc, dt;
563
564	dt = ktime_get();
565	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0));
566	usleep_range(1000 * duration_ms, 2000 * duration_ms);
567	dc = intel_uncore_read_fw(engine->uncore, CS_GPR(0)) - dc;
568	dt = ktime_get() - dt;
569
570	return div64_u64(1000 * 1000 * dc, dt);
571}
572
573static u64 measure_cs_frequency_at(struct intel_rps *rps,
574				   struct intel_engine_cs *engine,
575				   int *freq)
576{
577	u64 x[5];
578	int i;
579
580	*freq = rps_set_check(rps, *freq);
581	for (i = 0; i < 5; i++)
582		x[i] = __measure_cs_frequency(engine, 2);
583	*freq = (*freq + read_cagf(rps)) / 2;
584
585	/* A simple triangle filter for better result stability */
586	sort(x, 5, sizeof(*x), cmp_u64, NULL);
587	return div_u64(x[1] + 2 * x[2] + x[3], 4);
588}
589
590static bool scaled_within(u64 x, u64 y, u32 f_n, u32 f_d)
591{
592	return f_d * x > f_n * y && f_n * x < f_d * y;
593}
594
595int live_rps_frequency_cs(void *arg)
596{
597	void (*saved_work)(struct work_struct *wrk);
598	struct intel_gt *gt = arg;
599	struct intel_rps *rps = &gt->rps;
600	struct intel_engine_cs *engine;
601	struct pm_qos_request qos;
602	enum intel_engine_id id;
603	int err = 0;
604
605	/*
606	 * The premise is that the GPU does change freqency at our behest.
607	 * Let's check there is a correspondence between the requested
608	 * frequency, the actual frequency, and the observed clock rate.
609	 */
610
611	if (!intel_rps_is_enabled(rps))
612		return 0;
613
614	if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
615		return 0;
616
617	if (CPU_LATENCY >= 0)
618		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
619
620	intel_gt_pm_wait_for_idle(gt);
621	saved_work = rps->work.func;
622	rps->work.func = dummy_rps_work;
623
624	for_each_engine(engine, gt, id) {
625		struct i915_request *rq;
626		struct i915_vma *vma;
627		u32 *cancel, *cntr;
628		struct {
629			u64 count;
630			int freq;
631		} min, max;
632
633		st_engine_heartbeat_disable(engine);
634
635		vma = create_spin_counter(engine,
636					  engine->kernel_context->vm, false,
637					  &cancel, &cntr);
638		if (IS_ERR(vma)) {
639			err = PTR_ERR(vma);
640			st_engine_heartbeat_enable(engine);
641			break;
642		}
643
644		rq = intel_engine_create_kernel_request(engine);
645		if (IS_ERR(rq)) {
646			err = PTR_ERR(rq);
647			goto err_vma;
648		}
649
650		err = i915_request_await_object(rq, vma->obj, false);
651		if (!err)
652			err = i915_vma_move_to_active(vma, rq, 0);
653		if (!err)
654			err = rq->engine->emit_bb_start(rq,
655							vma->node.start,
656							PAGE_SIZE, 0);
657		i915_request_add(rq);
658		if (err)
659			goto err_vma;
660
661		if (wait_for(intel_uncore_read(engine->uncore, CS_GPR(0)),
662			     10)) {
663			pr_err("%s: timed loop did not start\n",
664			       engine->name);
665			goto err_vma;
666		}
667
668		min.freq = rps->min_freq;
669		min.count = measure_cs_frequency_at(rps, engine, &min.freq);
670
671		max.freq = rps->max_freq;
672		max.count = measure_cs_frequency_at(rps, engine, &max.freq);
673
674		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
675			engine->name,
676			min.count, intel_gpu_freq(rps, min.freq),
677			max.count, intel_gpu_freq(rps, max.freq),
678			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
679						     max.freq * min.count));
680
681		if (!scaled_within(max.freq * min.count,
682				   min.freq * max.count,
683				   2, 3)) {
684			int f;
685
686			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
687			       engine->name,
688			       max.freq * min.count,
689			       min.freq * max.count);
690			show_pcu_config(rps);
691
692			for (f = min.freq + 1; f <= rps->max_freq; f++) {
693				int act = f;
694				u64 count;
695
696				count = measure_cs_frequency_at(rps, engine, &act);
697				if (act < f)
698					break;
699
700				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
701					engine->name,
702					act, intel_gpu_freq(rps, act), count,
703					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
704								     act * min.count));
705
706				f = act; /* may skip ahead [pcu granularity] */
707			}
708
709			err = -EINTR; /* ignore error, continue on with test */
710		}
711
712err_vma:
713		*cancel = MI_BATCH_BUFFER_END;
714		i915_gem_object_flush_map(vma->obj);
715		i915_gem_object_unpin_map(vma->obj);
716		i915_vma_unpin(vma);
717		i915_vma_unlock(vma);
718		i915_vma_put(vma);
719
720		st_engine_heartbeat_enable(engine);
721		if (igt_flush_test(gt->i915))
722			err = -EIO;
723		if (err)
724			break;
725	}
726
727	intel_gt_pm_wait_for_idle(gt);
728	rps->work.func = saved_work;
729
730	if (CPU_LATENCY >= 0)
731		cpu_latency_qos_remove_request(&qos);
732
733	return err;
734}
735
736int live_rps_frequency_srm(void *arg)
737{
738	void (*saved_work)(struct work_struct *wrk);
739	struct intel_gt *gt = arg;
740	struct intel_rps *rps = &gt->rps;
741	struct intel_engine_cs *engine;
742	struct pm_qos_request qos;
743	enum intel_engine_id id;
744	int err = 0;
745
746	/*
747	 * The premise is that the GPU does change freqency at our behest.
748	 * Let's check there is a correspondence between the requested
749	 * frequency, the actual frequency, and the observed clock rate.
750	 */
751
752	if (!intel_rps_is_enabled(rps))
753		return 0;
754
755	if (INTEL_GEN(gt->i915) < 8) /* for CS simplicity */
756		return 0;
757
758	if (CPU_LATENCY >= 0)
759		cpu_latency_qos_add_request(&qos, CPU_LATENCY);
760
761	intel_gt_pm_wait_for_idle(gt);
762	saved_work = rps->work.func;
763	rps->work.func = dummy_rps_work;
764
765	for_each_engine(engine, gt, id) {
766		struct i915_request *rq;
767		struct i915_vma *vma;
768		u32 *cancel, *cntr;
769		struct {
770			u64 count;
771			int freq;
772		} min, max;
773
774		st_engine_heartbeat_disable(engine);
775
776		vma = create_spin_counter(engine,
777					  engine->kernel_context->vm, true,
778					  &cancel, &cntr);
779		if (IS_ERR(vma)) {
780			err = PTR_ERR(vma);
781			st_engine_heartbeat_enable(engine);
782			break;
783		}
784
785		rq = intel_engine_create_kernel_request(engine);
786		if (IS_ERR(rq)) {
787			err = PTR_ERR(rq);
788			goto err_vma;
789		}
790
791		err = i915_request_await_object(rq, vma->obj, false);
792		if (!err)
793			err = i915_vma_move_to_active(vma, rq, 0);
794		if (!err)
795			err = rq->engine->emit_bb_start(rq,
796							vma->node.start,
797							PAGE_SIZE, 0);
798		i915_request_add(rq);
799		if (err)
800			goto err_vma;
801
802		if (wait_for(READ_ONCE(*cntr), 10)) {
803			pr_err("%s: timed loop did not start\n",
804			       engine->name);
805			goto err_vma;
806		}
807
808		min.freq = rps->min_freq;
809		min.count = measure_frequency_at(rps, cntr, &min.freq);
810
811		max.freq = rps->max_freq;
812		max.count = measure_frequency_at(rps, cntr, &max.freq);
813
814		pr_info("%s: min:%lluKHz @ %uMHz, max:%lluKHz @ %uMHz [%d%%]\n",
815			engine->name,
816			min.count, intel_gpu_freq(rps, min.freq),
817			max.count, intel_gpu_freq(rps, max.freq),
818			(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * max.count,
819						     max.freq * min.count));
820
821		if (!scaled_within(max.freq * min.count,
822				   min.freq * max.count,
823				   1, 2)) {
824			int f;
825
826			pr_err("%s: CS did not scale with frequency! scaled min:%llu, max:%llu\n",
827			       engine->name,
828			       max.freq * min.count,
829			       min.freq * max.count);
830			show_pcu_config(rps);
831
832			for (f = min.freq + 1; f <= rps->max_freq; f++) {
833				int act = f;
834				u64 count;
835
836				count = measure_frequency_at(rps, cntr, &act);
837				if (act < f)
838					break;
839
840				pr_info("%s: %x:%uMHz: %lluKHz [%d%%]\n",
841					engine->name,
842					act, intel_gpu_freq(rps, act), count,
843					(int)DIV64_U64_ROUND_CLOSEST(100 * min.freq * count,
844								     act * min.count));
845
846				f = act; /* may skip ahead [pcu granularity] */
847			}
848
849			err = -EINTR; /* ignore error, continue on with test */
850		}
851
852err_vma:
853		*cancel = MI_BATCH_BUFFER_END;
854		i915_gem_object_flush_map(vma->obj);
855		i915_gem_object_unpin_map(vma->obj);
856		i915_vma_unpin(vma);
857		i915_vma_unlock(vma);
858		i915_vma_put(vma);
859
860		st_engine_heartbeat_enable(engine);
861		if (igt_flush_test(gt->i915))
862			err = -EIO;
863		if (err)
864			break;
865	}
866
867	intel_gt_pm_wait_for_idle(gt);
868	rps->work.func = saved_work;
869
870	if (CPU_LATENCY >= 0)
871		cpu_latency_qos_remove_request(&qos);
872
873	return err;
874}
875
876static void sleep_for_ei(struct intel_rps *rps, int timeout_us)
877{
878	/* Flush any previous EI */
879	usleep_range(timeout_us, 2 * timeout_us);
880
881	/* Reset the interrupt status */
882	rps_disable_interrupts(rps);
883	GEM_BUG_ON(rps->pm_iir);
884	rps_enable_interrupts(rps);
885
886	/* And then wait for the timeout, for real this time */
887	usleep_range(2 * timeout_us, 3 * timeout_us);
888}
889
890static int __rps_up_interrupt(struct intel_rps *rps,
891			      struct intel_engine_cs *engine,
892			      struct igt_spinner *spin)
893{
894	struct intel_uncore *uncore = engine->uncore;
895	struct i915_request *rq;
896	u32 timeout;
897
898	if (!intel_engine_can_store_dword(engine))
899		return 0;
900
901	rps_set_check(rps, rps->min_freq);
902
903	rq = igt_spinner_create_request(spin, engine->kernel_context, MI_NOOP);
904	if (IS_ERR(rq))
905		return PTR_ERR(rq);
906
907	i915_request_get(rq);
908	i915_request_add(rq);
909
910	if (!igt_wait_for_spinner(spin, rq)) {
911		pr_err("%s: RPS spinner did not start\n",
912		       engine->name);
913		i915_request_put(rq);
914		intel_gt_set_wedged(engine->gt);
915		return -EIO;
916	}
917
918	if (!intel_rps_is_active(rps)) {
919		pr_err("%s: RPS not enabled on starting spinner\n",
920		       engine->name);
921		igt_spinner_end(spin);
922		i915_request_put(rq);
923		return -EINVAL;
924	}
925
926	if (!(rps->pm_events & GEN6_PM_RP_UP_THRESHOLD)) {
927		pr_err("%s: RPS did not register UP interrupt\n",
928		       engine->name);
929		i915_request_put(rq);
930		return -EINVAL;
931	}
932
933	if (rps->last_freq != rps->min_freq) {
934		pr_err("%s: RPS did not program min frequency\n",
935		       engine->name);
936		i915_request_put(rq);
937		return -EINVAL;
938	}
939
940	timeout = intel_uncore_read(uncore, GEN6_RP_UP_EI);
941	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
942	timeout = DIV_ROUND_UP(timeout, 1000);
943
944	sleep_for_ei(rps, timeout);
945	GEM_BUG_ON(i915_request_completed(rq));
946
947	igt_spinner_end(spin);
948	i915_request_put(rq);
949
950	if (rps->cur_freq != rps->min_freq) {
951		pr_err("%s: Frequency unexpectedly changed [up], now %d!\n",
952		       engine->name, intel_rps_read_actual_frequency(rps));
953		return -EINVAL;
954	}
955
956	if (!(rps->pm_iir & GEN6_PM_RP_UP_THRESHOLD)) {
957		pr_err("%s: UP interrupt not recorded for spinner, pm_iir:%x, prev_up:%x, up_threshold:%x, up_ei:%x\n",
958		       engine->name, rps->pm_iir,
959		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
960		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
961		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
962		return -EINVAL;
963	}
964
965	return 0;
966}
967
968static int __rps_down_interrupt(struct intel_rps *rps,
969				struct intel_engine_cs *engine)
970{
971	struct intel_uncore *uncore = engine->uncore;
972	u32 timeout;
973
974	rps_set_check(rps, rps->max_freq);
975
976	if (!(rps->pm_events & GEN6_PM_RP_DOWN_THRESHOLD)) {
977		pr_err("%s: RPS did not register DOWN interrupt\n",
978		       engine->name);
979		return -EINVAL;
980	}
981
982	if (rps->last_freq != rps->max_freq) {
983		pr_err("%s: RPS did not program max frequency\n",
984		       engine->name);
985		return -EINVAL;
986	}
987
988	timeout = intel_uncore_read(uncore, GEN6_RP_DOWN_EI);
989	timeout = intel_gt_pm_interval_to_ns(engine->gt, timeout);
990	timeout = DIV_ROUND_UP(timeout, 1000);
991
992	sleep_for_ei(rps, timeout);
993
994	if (rps->cur_freq != rps->max_freq) {
995		pr_err("%s: Frequency unexpectedly changed [down], now %d!\n",
996		       engine->name,
997		       intel_rps_read_actual_frequency(rps));
998		return -EINVAL;
999	}
1000
1001	if (!(rps->pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT))) {
1002		pr_err("%s: DOWN interrupt not recorded for idle, pm_iir:%x, prev_down:%x, down_threshold:%x, down_ei:%x [prev_up:%x, up_threshold:%x, up_ei:%x]\n",
1003		       engine->name, rps->pm_iir,
1004		       intel_uncore_read(uncore, GEN6_RP_PREV_DOWN),
1005		       intel_uncore_read(uncore, GEN6_RP_DOWN_THRESHOLD),
1006		       intel_uncore_read(uncore, GEN6_RP_DOWN_EI),
1007		       intel_uncore_read(uncore, GEN6_RP_PREV_UP),
1008		       intel_uncore_read(uncore, GEN6_RP_UP_THRESHOLD),
1009		       intel_uncore_read(uncore, GEN6_RP_UP_EI));
1010		return -EINVAL;
1011	}
1012
1013	return 0;
1014}
1015
1016int live_rps_interrupt(void *arg)
1017{
1018	struct intel_gt *gt = arg;
1019	struct intel_rps *rps = &gt->rps;
1020	void (*saved_work)(struct work_struct *wrk);
1021	struct intel_engine_cs *engine;
1022	enum intel_engine_id id;
1023	struct igt_spinner spin;
1024	u32 pm_events;
1025	int err = 0;
1026
1027	/*
1028	 * First, let's check whether or not we are receiving interrupts.
1029	 */
1030
1031	if (!intel_rps_has_interrupts(rps))
1032		return 0;
1033
1034	intel_gt_pm_get(gt);
1035	pm_events = rps->pm_events;
1036	intel_gt_pm_put(gt);
1037	if (!pm_events) {
1038		pr_err("No RPS PM events registered, but RPS is enabled?\n");
1039		return -ENODEV;
1040	}
1041
1042	if (igt_spinner_init(&spin, gt))
1043		return -ENOMEM;
1044
1045	intel_gt_pm_wait_for_idle(gt);
1046	saved_work = rps->work.func;
1047	rps->work.func = dummy_rps_work;
1048
1049	for_each_engine(engine, gt, id) {
1050		/* Keep the engine busy with a spinner; expect an UP! */
1051		if (pm_events & GEN6_PM_RP_UP_THRESHOLD) {
1052			intel_gt_pm_wait_for_idle(engine->gt);
1053			GEM_BUG_ON(intel_rps_is_active(rps));
1054
1055			st_engine_heartbeat_disable(engine);
1056
1057			err = __rps_up_interrupt(rps, engine, &spin);
1058
1059			st_engine_heartbeat_enable(engine);
1060			if (err)
1061				goto out;
1062
1063			intel_gt_pm_wait_for_idle(engine->gt);
1064		}
1065
1066		/* Keep the engine awake but idle and check for DOWN */
1067		if (pm_events & GEN6_PM_RP_DOWN_THRESHOLD) {
1068			st_engine_heartbeat_disable(engine);
1069			intel_rc6_disable(&gt->rc6);
1070
1071			err = __rps_down_interrupt(rps, engine);
1072
1073			intel_rc6_enable(&gt->rc6);
1074			st_engine_heartbeat_enable(engine);
1075			if (err)
1076				goto out;
1077		}
1078	}
1079
1080out:
1081	if (igt_flush_test(gt->i915))
1082		err = -EIO;
1083
1084	igt_spinner_fini(&spin);
1085
1086	intel_gt_pm_wait_for_idle(gt);
1087	rps->work.func = saved_work;
1088
1089	return err;
1090}
1091
1092static u64 __measure_power(int duration_ms)
1093{
1094	u64 dE, dt;
1095
1096	dt = ktime_get();
1097	dE = librapl_energy_uJ();
1098	usleep_range(1000 * duration_ms, 2000 * duration_ms);
1099	dE = librapl_energy_uJ() - dE;
1100	dt = ktime_get() - dt;
1101
1102	return div64_u64(1000 * 1000 * dE, dt);
1103}
1104
1105static u64 measure_power_at(struct intel_rps *rps, int *freq)
1106{
1107	u64 x[5];
1108	int i;
1109
1110	*freq = rps_set_check(rps, *freq);
1111	for (i = 0; i < 5; i++)
1112		x[i] = __measure_power(5);
1113	*freq = (*freq + read_cagf(rps)) / 2;
1114
1115	/* A simple triangle filter for better result stability */
1116	sort(x, 5, sizeof(*x), cmp_u64, NULL);
1117	return div_u64(x[1] + 2 * x[2] + x[3], 4);
1118}
1119
1120int live_rps_power(void *arg)
1121{
1122	struct intel_gt *gt = arg;
1123	struct intel_rps *rps = &gt->rps;
1124	void (*saved_work)(struct work_struct *wrk);
1125	struct intel_engine_cs *engine;
1126	enum intel_engine_id id;
1127	struct igt_spinner spin;
1128	int err = 0;
1129
1130	/*
1131	 * Our fundamental assumption is that running at lower frequency
1132	 * actually saves power. Let's see if our RAPL measurement support
1133	 * that theory.
1134	 */
1135
1136	if (!intel_rps_is_enabled(rps))
1137		return 0;
1138
1139	if (!librapl_energy_uJ())
1140		return 0;
1141
1142	if (igt_spinner_init(&spin, gt))
1143		return -ENOMEM;
1144
1145	intel_gt_pm_wait_for_idle(gt);
1146	saved_work = rps->work.func;
1147	rps->work.func = dummy_rps_work;
1148
1149	for_each_engine(engine, gt, id) {
1150		struct i915_request *rq;
1151		struct {
1152			u64 power;
1153			int freq;
1154		} min, max;
1155
1156		if (!intel_engine_can_store_dword(engine))
1157			continue;
1158
1159		st_engine_heartbeat_disable(engine);
1160
1161		rq = igt_spinner_create_request(&spin,
1162						engine->kernel_context,
1163						MI_NOOP);
1164		if (IS_ERR(rq)) {
1165			st_engine_heartbeat_enable(engine);
1166			err = PTR_ERR(rq);
1167			break;
1168		}
1169
1170		i915_request_add(rq);
1171
1172		if (!igt_wait_for_spinner(&spin, rq)) {
1173			pr_err("%s: RPS spinner did not start\n",
1174			       engine->name);
1175			igt_spinner_end(&spin);
1176			st_engine_heartbeat_enable(engine);
1177			intel_gt_set_wedged(engine->gt);
1178			err = -EIO;
1179			break;
1180		}
1181
1182		max.freq = rps->max_freq;
1183		max.power = measure_power_at(rps, &max.freq);
1184
1185		min.freq = rps->min_freq;
1186		min.power = measure_power_at(rps, &min.freq);
1187
1188		igt_spinner_end(&spin);
1189		st_engine_heartbeat_enable(engine);
1190
1191		pr_info("%s: min:%llumW @ %uMHz, max:%llumW @ %uMHz\n",
1192			engine->name,
1193			min.power, intel_gpu_freq(rps, min.freq),
1194			max.power, intel_gpu_freq(rps, max.freq));
1195
1196		if (10 * min.freq >= 9 * max.freq) {
1197			pr_notice("Could not control frequency, ran at [%d:%uMHz, %d:%uMhz]\n",
1198				  min.freq, intel_gpu_freq(rps, min.freq),
1199				  max.freq, intel_gpu_freq(rps, max.freq));
1200			continue;
1201		}
1202
1203		if (11 * min.power > 10 * max.power) {
1204			pr_err("%s: did not conserve power when setting lower frequency!\n",
1205			       engine->name);
1206			err = -EINVAL;
1207			break;
1208		}
1209
1210		if (igt_flush_test(gt->i915)) {
1211			err = -EIO;
1212			break;
1213		}
1214	}
1215
1216	igt_spinner_fini(&spin);
1217
1218	intel_gt_pm_wait_for_idle(gt);
1219	rps->work.func = saved_work;
1220
1221	return err;
1222}
1223
1224int live_rps_dynamic(void *arg)
1225{
1226	struct intel_gt *gt = arg;
1227	struct intel_rps *rps = &gt->rps;
1228	struct intel_engine_cs *engine;
1229	enum intel_engine_id id;
1230	struct igt_spinner spin;
1231	int err = 0;
1232
1233	/*
1234	 * We've looked at the bascs, and have established that we
1235	 * can change the clock frequency and that the HW will generate
1236	 * interrupts based on load. Now we check how we integrate those
1237	 * moving parts into dynamic reclocking based on load.
1238	 */
1239
1240	if (!intel_rps_is_enabled(rps))
1241		return 0;
1242
1243	if (igt_spinner_init(&spin, gt))
1244		return -ENOMEM;
1245
1246	if (intel_rps_has_interrupts(rps))
1247		pr_info("RPS has interrupt support\n");
1248	if (intel_rps_uses_timer(rps))
1249		pr_info("RPS has timer support\n");
1250
1251	for_each_engine(engine, gt, id) {
1252		struct i915_request *rq;
1253		struct {
1254			ktime_t dt;
1255			u8 freq;
1256		} min, max;
1257
1258		if (!intel_engine_can_store_dword(engine))
1259			continue;
1260
1261		intel_gt_pm_wait_for_idle(gt);
1262		GEM_BUG_ON(intel_rps_is_active(rps));
1263		rps->cur_freq = rps->min_freq;
1264
1265		intel_engine_pm_get(engine);
1266		intel_rc6_disable(&gt->rc6);
1267		GEM_BUG_ON(rps->last_freq != rps->min_freq);
1268
1269		rq = igt_spinner_create_request(&spin,
1270						engine->kernel_context,
1271						MI_NOOP);
1272		if (IS_ERR(rq)) {
1273			err = PTR_ERR(rq);
1274			goto err;
1275		}
1276
1277		i915_request_add(rq);
1278
1279		max.dt = ktime_get();
1280		max.freq = wait_for_freq(rps, rps->max_freq, 500);
1281		max.dt = ktime_sub(ktime_get(), max.dt);
1282
1283		igt_spinner_end(&spin);
1284
1285		min.dt = ktime_get();
1286		min.freq = wait_for_freq(rps, rps->min_freq, 2000);
1287		min.dt = ktime_sub(ktime_get(), min.dt);
1288
1289		pr_info("%s: dynamically reclocked to %u:%uMHz while busy in %lluns, and %u:%uMHz while idle in %lluns\n",
1290			engine->name,
1291			max.freq, intel_gpu_freq(rps, max.freq),
1292			ktime_to_ns(max.dt),
1293			min.freq, intel_gpu_freq(rps, min.freq),
1294			ktime_to_ns(min.dt));
1295		if (min.freq >= max.freq) {
1296			pr_err("%s: dynamic reclocking of spinner failed\n!",
1297			       engine->name);
1298			err = -EINVAL;
1299		}
1300
1301err:
1302		intel_rc6_enable(&gt->rc6);
1303		intel_engine_pm_put(engine);
1304
1305		if (igt_flush_test(gt->i915))
1306			err = -EIO;
1307		if (err)
1308			break;
1309	}
1310
1311	igt_spinner_fini(&spin);
1312
1313	return err;
1314}
1315