162306a36Sopenharmony_ci// SPDX-License-Identifier: MIT
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright © 2023 Intel Corporation
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include "i915_drv.h"
762306a36Sopenharmony_ci#include "i915_perf_oa_regs.h"
862306a36Sopenharmony_ci#include "intel_engine_pm.h"
962306a36Sopenharmony_ci#include "intel_gt.h"
1062306a36Sopenharmony_ci#include "intel_gt_mcr.h"
1162306a36Sopenharmony_ci#include "intel_gt_pm.h"
1262306a36Sopenharmony_ci#include "intel_gt_print.h"
1362306a36Sopenharmony_ci#include "intel_gt_regs.h"
1462306a36Sopenharmony_ci#include "intel_tlb.h"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci/*
1762306a36Sopenharmony_ci * HW architecture suggest typical invalidation time at 40us,
1862306a36Sopenharmony_ci * with pessimistic cases up to 100us and a recommendation to
1962306a36Sopenharmony_ci * cap at 1ms. We go a bit higher just in case.
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ci#define TLB_INVAL_TIMEOUT_US 100
2262306a36Sopenharmony_ci#define TLB_INVAL_TIMEOUT_MS 4
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci/*
2562306a36Sopenharmony_ci * On Xe_HP the TLB invalidation registers are located at the same MMIO offsets
2662306a36Sopenharmony_ci * but are now considered MCR registers.  Since they exist within a GAM range,
2762306a36Sopenharmony_ci * the primary instance of the register rolls up the status from each unit.
2862306a36Sopenharmony_ci */
2962306a36Sopenharmony_cistatic int wait_for_invalidate(struct intel_engine_cs *engine)
3062306a36Sopenharmony_ci{
3162306a36Sopenharmony_ci	if (engine->tlb_inv.mcr)
3262306a36Sopenharmony_ci		return intel_gt_mcr_wait_for_reg(engine->gt,
3362306a36Sopenharmony_ci						 engine->tlb_inv.reg.mcr_reg,
3462306a36Sopenharmony_ci						 engine->tlb_inv.done,
3562306a36Sopenharmony_ci						 0,
3662306a36Sopenharmony_ci						 TLB_INVAL_TIMEOUT_US,
3762306a36Sopenharmony_ci						 TLB_INVAL_TIMEOUT_MS);
3862306a36Sopenharmony_ci	else
3962306a36Sopenharmony_ci		return __intel_wait_for_register_fw(engine->gt->uncore,
4062306a36Sopenharmony_ci						    engine->tlb_inv.reg.reg,
4162306a36Sopenharmony_ci						    engine->tlb_inv.done,
4262306a36Sopenharmony_ci						    0,
4362306a36Sopenharmony_ci						    TLB_INVAL_TIMEOUT_US,
4462306a36Sopenharmony_ci						    TLB_INVAL_TIMEOUT_MS,
4562306a36Sopenharmony_ci						    NULL);
4662306a36Sopenharmony_ci}
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_cistatic void mmio_invalidate_full(struct intel_gt *gt)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	struct drm_i915_private *i915 = gt->i915;
5162306a36Sopenharmony_ci	struct intel_uncore *uncore = gt->uncore;
5262306a36Sopenharmony_ci	struct intel_engine_cs *engine;
5362306a36Sopenharmony_ci	intel_engine_mask_t awake, tmp;
5462306a36Sopenharmony_ci	enum intel_engine_id id;
5562306a36Sopenharmony_ci	unsigned long flags;
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	if (GRAPHICS_VER(i915) < 8)
5862306a36Sopenharmony_ci		return;
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	intel_gt_mcr_lock(gt, &flags);
6362306a36Sopenharmony_ci	spin_lock(&uncore->lock); /* serialise invalidate with GT reset */
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	awake = 0;
6662306a36Sopenharmony_ci	for_each_engine(engine, gt, id) {
6762306a36Sopenharmony_ci		if (!intel_engine_pm_is_awake(engine))
6862306a36Sopenharmony_ci			continue;
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci		if (engine->tlb_inv.mcr)
7162306a36Sopenharmony_ci			intel_gt_mcr_multicast_write_fw(gt,
7262306a36Sopenharmony_ci							engine->tlb_inv.reg.mcr_reg,
7362306a36Sopenharmony_ci							engine->tlb_inv.request);
7462306a36Sopenharmony_ci		else
7562306a36Sopenharmony_ci			intel_uncore_write_fw(uncore,
7662306a36Sopenharmony_ci					      engine->tlb_inv.reg.reg,
7762306a36Sopenharmony_ci					      engine->tlb_inv.request);
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci		awake |= engine->mask;
8062306a36Sopenharmony_ci	}
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	GT_TRACE(gt, "invalidated engines %08x\n", awake);
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	/* Wa_2207587034:tgl,dg1,rkl,adl-s,adl-p */
8562306a36Sopenharmony_ci	if (awake &&
8662306a36Sopenharmony_ci	    (IS_TIGERLAKE(i915) ||
8762306a36Sopenharmony_ci	     IS_DG1(i915) ||
8862306a36Sopenharmony_ci	     IS_ROCKETLAKE(i915) ||
8962306a36Sopenharmony_ci	     IS_ALDERLAKE_S(i915) ||
9062306a36Sopenharmony_ci	     IS_ALDERLAKE_P(i915)))
9162306a36Sopenharmony_ci		intel_uncore_write_fw(uncore, GEN12_OA_TLB_INV_CR, 1);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	spin_unlock(&uncore->lock);
9462306a36Sopenharmony_ci	intel_gt_mcr_unlock(gt, flags);
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	for_each_engine_masked(engine, gt, awake, tmp) {
9762306a36Sopenharmony_ci		if (wait_for_invalidate(engine))
9862306a36Sopenharmony_ci			gt_err_ratelimited(gt,
9962306a36Sopenharmony_ci					   "%s TLB invalidation did not complete in %ums!\n",
10062306a36Sopenharmony_ci					   engine->name, TLB_INVAL_TIMEOUT_MS);
10162306a36Sopenharmony_ci	}
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	/*
10462306a36Sopenharmony_ci	 * Use delayed put since a) we mostly expect a flurry of TLB
10562306a36Sopenharmony_ci	 * invalidations so it is good to avoid paying the forcewake cost and
10662306a36Sopenharmony_ci	 * b) it works around a bug in Icelake which cannot cope with too rapid
10762306a36Sopenharmony_ci	 * transitions.
10862306a36Sopenharmony_ci	 */
10962306a36Sopenharmony_ci	intel_uncore_forcewake_put_delayed(uncore, FORCEWAKE_ALL);
11062306a36Sopenharmony_ci}
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_cistatic bool tlb_seqno_passed(const struct intel_gt *gt, u32 seqno)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	u32 cur = intel_gt_tlb_seqno(gt);
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	/* Only skip if a *full* TLB invalidate barrier has passed */
11762306a36Sopenharmony_ci	return (s32)(cur - ALIGN(seqno, 2)) > 0;
11862306a36Sopenharmony_ci}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_civoid intel_gt_invalidate_tlb_full(struct intel_gt *gt, u32 seqno)
12162306a36Sopenharmony_ci{
12262306a36Sopenharmony_ci	intel_wakeref_t wakeref;
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci	if (I915_SELFTEST_ONLY(gt->awake == -ENODEV))
12562306a36Sopenharmony_ci		return;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	if (intel_gt_is_wedged(gt))
12862306a36Sopenharmony_ci		return;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	if (tlb_seqno_passed(gt, seqno))
13162306a36Sopenharmony_ci		return;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	with_intel_gt_pm_if_awake(gt, wakeref) {
13462306a36Sopenharmony_ci		mutex_lock(&gt->tlb.invalidate_lock);
13562306a36Sopenharmony_ci		if (tlb_seqno_passed(gt, seqno))
13662306a36Sopenharmony_ci			goto unlock;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci		mmio_invalidate_full(gt);
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci		write_seqcount_invalidate(&gt->tlb.seqno);
14162306a36Sopenharmony_ciunlock:
14262306a36Sopenharmony_ci		mutex_unlock(&gt->tlb.invalidate_lock);
14362306a36Sopenharmony_ci	}
14462306a36Sopenharmony_ci}
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_civoid intel_gt_init_tlb(struct intel_gt *gt)
14762306a36Sopenharmony_ci{
14862306a36Sopenharmony_ci	mutex_init(&gt->tlb.invalidate_lock);
14962306a36Sopenharmony_ci	seqcount_mutex_init(&gt->tlb.seqno, &gt->tlb.invalidate_lock);
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_civoid intel_gt_fini_tlb(struct intel_gt *gt)
15362306a36Sopenharmony_ci{
15462306a36Sopenharmony_ci	mutex_destroy(&gt->tlb.invalidate_lock);
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
15862306a36Sopenharmony_ci#include "selftest_tlb.c"
15962306a36Sopenharmony_ci#endif
160