1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2023 Intel Corporation
4 */
5
6#include "i915_drv.h"
7#include "i915_perf_oa_regs.h"
8#include "intel_engine_pm.h"
9#include "intel_gt.h"
10#include "intel_gt_mcr.h"
11#include "intel_gt_pm.h"
12#include "intel_gt_print.h"
13#include "intel_gt_regs.h"
14#include "intel_tlb.h"
15
16/*
17 * HW architecture suggest typical invalidation time at 40us,
18 * with pessimistic cases up to 100us and a recommendation to
19 * cap at 1ms. We go a bit higher just in case.
20 */
21#define TLB_INVAL_TIMEOUT_US 100
22#define TLB_INVAL_TIMEOUT_MS 4
23
24/*
25 * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
26 * but are now considered MCR registers.  Since they exist within a GAM range,
27 * the primary instance of the register rolls up the status from each unit.
28 */
29static int wait_for_invalidate(struct intel_engine_cs *engine)
30{
31	if (engine->tlb_inv.mcr)
32		return intel_gt_mcr_wait_for_reg(engine->gt,
33						 engine->tlb_inv.reg.mcr_reg,
34						 engine->tlb_inv.done,
35						 0,
36						 TLB_INVAL_TIMEOUT_US,
37						 TLB_INVAL_TIMEOUT_MS);
38	else
39		return __intel_wait_for_register_fw(engine->gt->uncore,
40						    engine->tlb_inv.reg.reg,
41						    engine->tlb_inv.done,
42						    0,
43						    TLB_INVAL_TIMEOUT_US,
44						    TLB_INVAL_TIMEOUT_MS,
45						    NULL);
46}
47
48static void mmio_invalidate_full(struct intel_gt *gt)
49{
50	struct drm_i915_private *i915 = gt->i915;
51	struct intel_uncore *uncore = gt->uncore;
52	struct intel_engine_cs *engine;
53	intel_engine_mask_t awake, tmp;
54	enum intel_engine_id id;
55	unsigned long flags;
56
57	if (GRAPHICS_VER(i915) < 8)
58		return;
59
60	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
61
62	intel_gt_mcr_lock(gt, &flags);
63	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
64
65	awake = 0;
66	for_each_engine(engine, gt, id) {
67		if (!intel_engine_pm_is_awake(engine))
68			continue;
69
70		if (engine->tlb_inv.mcr)
71			intel_gt_mcr_multicast_write_fw(gt,
72							engine->tlb_inv.reg.mcr_reg,
73							engine->tlb_inv.request);
74		else
75			intel_uncore_write_fw(uncore,
76					      engine->tlb_inv.reg.reg,
77					      engine->tlb_inv.request);
78
79		awake |= engine->mask;
80	}
81
82	GT_TRACE(gt, "invalidated engines %08x\n", awake);
83
84	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
85	if (awake &&
86	    (IS_TIGERLAKE(i915) ||
87	     IS_DG1(i915) ||
88	     IS_ROCKETLAKE(i915) ||
89	     IS_ALDERLAKE_S(i915) ||
90	     IS_ALDERLAKE_P(i915)))
91		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
92
93	spin_unlock(&uncore->lock);
94	intel_gt_mcr_unlock(gt, flags);
95
96	for_each_engine_masked(engine, gt, awake, tmp) {
97		if (wait_for_invalidate(engine))
98			gt_err_ratelimited(gt,
99					   "%s TLB invalidation did not complete in %ums!\n",
100					   engine->name, TLB_INVAL_TIMEOUT_MS);
101	}
102
103	/*
104	 * Use delayed put since a) we mostly expect a flurry of TLB
105	 * invalidations so it is good to avoid paying the forcewake cost and
106	 * b) it works around a bug in Icelake which cannot cope with too rapid
107	 * transitions.
108	 */
109	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
110}
111
112static bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
113{
114	u32 cur = intel_gt_tlb_seqno(gt);
115
116	/* Only skip if a *full* TLB invalidate barrier has passed */
117	return (s32)(cur - ALIGN(seqno, 2)) > 0;
118}
119
120void intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
121{
122	intel_wakeref_t wakeref;
123
124	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
125		return;
126
127	if (intel_gt_is_wedged(gt))
128		return;
129
130	if (tlb_seqno_passed(gt, seqno))
131		return;
132
133	with_intel_gt_pm_if_awake(gt, wakeref) {
134		mutex_lock(&gt->tlb.invalidate_lock);
135		if (tlb_seqno_passed(gt, seqno))
136			goto unlock;
137
138		mmio_invalidate_full(gt);
139
140		write_seqcount_invalidate(&gt->tlb.seqno);
141unlock:
142		mutex_unlock(&gt->tlb.invalidate_lock);
143	}
144}
145
146void intel_gt_init_tlb(struct intel_gt *gt)
147{
148	mutex_init(&gt->tlb.invalidate_lock);
149	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
150}
151
152void intel_gt_fini_tlb(struct intel_gt *gt)
153{
154	mutex_destroy(&gt->tlb.invalidate_lock);
155}
156
157#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
158#include "selftest_tlb.c"
159#endif
160