1/*
2 * GTT virtualization
3 *
4 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 *    Zhi Wang <zhi.a.wang@intel.com>
27 *    Zhenyu Wang <zhenyuw@linux.intel.com>
28 *    Xiao Zheng <xiao.zheng@intel.com>
29 *
30 * Contributors:
31 *    Min He <min.he@intel.com>
32 *    Bing Niu <bing.niu@intel.com>
33 *
34 */
35
36#include "i915_drv.h"
37#include "gvt.h"
38#include "i915_pvinfo.h"
39#include "trace.h"
40
41#if defined(VERBOSE_DEBUG)
42#define gvt_vdbg_mm(fmt, args...) gvt_dbg_mm(fmt, ##args)
43#else
44#define gvt_vdbg_mm(fmt, args...)
45#endif
46
47static bool enable_out_of_sync = false;
48static int preallocated_oos_pages = 8192;
49
50/*
51 * validate a gm address and related range size,
52 * translate it to host gm address
53 */
54bool intel_gvt_ggtt_validate_range(struct intel_vgpu *vgpu, u64 addr, u32 size)
55{
56	if (size == 0)
57		return vgpu_gmadr_is_valid(vgpu, addr);
58
59	if (vgpu_gmadr_is_aperture(vgpu, addr) &&
60	    vgpu_gmadr_is_aperture(vgpu, addr + size - 1))
61		return true;
62	else if (vgpu_gmadr_is_hidden(vgpu, addr) &&
63		 vgpu_gmadr_is_hidden(vgpu, addr + size - 1))
64		return true;
65
66	gvt_dbg_mm("Invalid ggtt range at 0x%llx, size: 0x%x\n",
67		     addr, size);
68	return false;
69}
70
71/* translate a guest gmadr to host gmadr */
72int intel_gvt_ggtt_gmadr_g2h(struct intel_vgpu *vgpu, u64 g_addr, u64 *h_addr)
73{
74	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
75
76	if (drm_WARN(&i915->drm, !vgpu_gmadr_is_valid(vgpu, g_addr),
77		     "invalid guest gmadr %llx\n", g_addr))
78		return -EACCES;
79
80	if (vgpu_gmadr_is_aperture(vgpu, g_addr))
81		*h_addr = vgpu_aperture_gmadr_base(vgpu)
82			  + (g_addr - vgpu_aperture_offset(vgpu));
83	else
84		*h_addr = vgpu_hidden_gmadr_base(vgpu)
85			  + (g_addr - vgpu_hidden_offset(vgpu));
86	return 0;
87}
88
89/* translate a host gmadr to guest gmadr */
90int intel_gvt_ggtt_gmadr_h2g(struct intel_vgpu *vgpu, u64 h_addr, u64 *g_addr)
91{
92	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
93
94	if (drm_WARN(&i915->drm, !gvt_gmadr_is_valid(vgpu->gvt, h_addr),
95		     "invalid host gmadr %llx\n", h_addr))
96		return -EACCES;
97
98	if (gvt_gmadr_is_aperture(vgpu->gvt, h_addr))
99		*g_addr = vgpu_aperture_gmadr_base(vgpu)
100			+ (h_addr - gvt_aperture_gmadr_base(vgpu->gvt));
101	else
102		*g_addr = vgpu_hidden_gmadr_base(vgpu)
103			+ (h_addr - gvt_hidden_gmadr_base(vgpu->gvt));
104	return 0;
105}
106
107int intel_gvt_ggtt_index_g2h(struct intel_vgpu *vgpu, unsigned long g_index,
108			     unsigned long *h_index)
109{
110	u64 h_addr;
111	int ret;
112
113	ret = intel_gvt_ggtt_gmadr_g2h(vgpu, g_index << I915_GTT_PAGE_SHIFT,
114				       &h_addr);
115	if (ret)
116		return ret;
117
118	*h_index = h_addr >> I915_GTT_PAGE_SHIFT;
119	return 0;
120}
121
122int intel_gvt_ggtt_h2g_index(struct intel_vgpu *vgpu, unsigned long h_index,
123			     unsigned long *g_index)
124{
125	u64 g_addr;
126	int ret;
127
128	ret = intel_gvt_ggtt_gmadr_h2g(vgpu, h_index << I915_GTT_PAGE_SHIFT,
129				       &g_addr);
130	if (ret)
131		return ret;
132
133	*g_index = g_addr >> I915_GTT_PAGE_SHIFT;
134	return 0;
135}
136
137#define gtt_type_is_entry(type) \
138	(type > GTT_TYPE_INVALID && type < GTT_TYPE_PPGTT_ENTRY \
139	 && type != GTT_TYPE_PPGTT_PTE_ENTRY \
140	 && type != GTT_TYPE_PPGTT_ROOT_ENTRY)
141
142#define gtt_type_is_pt(type) \
143	(type >= GTT_TYPE_PPGTT_PTE_PT && type < GTT_TYPE_MAX)
144
145#define gtt_type_is_pte_pt(type) \
146	(type == GTT_TYPE_PPGTT_PTE_PT)
147
148#define gtt_type_is_root_pointer(type) \
149	(gtt_type_is_entry(type) && type > GTT_TYPE_PPGTT_ROOT_ENTRY)
150
151#define gtt_init_entry(e, t, p, v) do { \
152	(e)->type = t; \
153	(e)->pdev = p; \
154	memcpy(&(e)->val64, &v, sizeof(v)); \
155} while (0)
156
157/*
158 * Mappings between GTT_TYPE* enumerations.
159 * Following information can be found according to the given type:
160 * - type of next level page table
161 * - type of entry inside this level page table
162 * - type of entry with PSE set
163 *
164 * If the given type doesn't have such a kind of information,
165 * e.g. give a l4 root entry type, then request to get its PSE type,
166 * give a PTE page table type, then request to get its next level page
167 * table type, as we know l4 root entry doesn't have a PSE bit,
168 * and a PTE page table doesn't have a next level page table type,
169 * GTT_TYPE_INVALID will be returned. This is useful when traversing a
170 * page table.
171 */
172
173struct gtt_type_table_entry {
174	int entry_type;
175	int pt_type;
176	int next_pt_type;
177	int pse_entry_type;
178};
179
180#define GTT_TYPE_TABLE_ENTRY(type, e_type, cpt_type, npt_type, pse_type) \
181	[type] = { \
182		.entry_type = e_type, \
183		.pt_type = cpt_type, \
184		.next_pt_type = npt_type, \
185		.pse_entry_type = pse_type, \
186	}
187
188static struct gtt_type_table_entry gtt_type_table[] = {
189	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
190			GTT_TYPE_PPGTT_ROOT_L4_ENTRY,
191			GTT_TYPE_INVALID,
192			GTT_TYPE_PPGTT_PML4_PT,
193			GTT_TYPE_INVALID),
194	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_PT,
195			GTT_TYPE_PPGTT_PML4_ENTRY,
196			GTT_TYPE_PPGTT_PML4_PT,
197			GTT_TYPE_PPGTT_PDP_PT,
198			GTT_TYPE_INVALID),
199	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PML4_ENTRY,
200			GTT_TYPE_PPGTT_PML4_ENTRY,
201			GTT_TYPE_PPGTT_PML4_PT,
202			GTT_TYPE_PPGTT_PDP_PT,
203			GTT_TYPE_INVALID),
204	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_PT,
205			GTT_TYPE_PPGTT_PDP_ENTRY,
206			GTT_TYPE_PPGTT_PDP_PT,
207			GTT_TYPE_PPGTT_PDE_PT,
208			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
209	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
210			GTT_TYPE_PPGTT_ROOT_L3_ENTRY,
211			GTT_TYPE_INVALID,
212			GTT_TYPE_PPGTT_PDE_PT,
213			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
214	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDP_ENTRY,
215			GTT_TYPE_PPGTT_PDP_ENTRY,
216			GTT_TYPE_PPGTT_PDP_PT,
217			GTT_TYPE_PPGTT_PDE_PT,
218			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
219	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_PT,
220			GTT_TYPE_PPGTT_PDE_ENTRY,
221			GTT_TYPE_PPGTT_PDE_PT,
222			GTT_TYPE_PPGTT_PTE_PT,
223			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
224	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PDE_ENTRY,
225			GTT_TYPE_PPGTT_PDE_ENTRY,
226			GTT_TYPE_PPGTT_PDE_PT,
227			GTT_TYPE_PPGTT_PTE_PT,
228			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
229	/* We take IPS bit as 'PSE' for PTE level. */
230	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_PT,
231			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
232			GTT_TYPE_PPGTT_PTE_PT,
233			GTT_TYPE_INVALID,
234			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
235	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_4K_ENTRY,
236			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
237			GTT_TYPE_PPGTT_PTE_PT,
238			GTT_TYPE_INVALID,
239			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
240	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_64K_ENTRY,
241			GTT_TYPE_PPGTT_PTE_4K_ENTRY,
242			GTT_TYPE_PPGTT_PTE_PT,
243			GTT_TYPE_INVALID,
244			GTT_TYPE_PPGTT_PTE_64K_ENTRY),
245	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_2M_ENTRY,
246			GTT_TYPE_PPGTT_PDE_ENTRY,
247			GTT_TYPE_PPGTT_PDE_PT,
248			GTT_TYPE_INVALID,
249			GTT_TYPE_PPGTT_PTE_2M_ENTRY),
250	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_PPGTT_PTE_1G_ENTRY,
251			GTT_TYPE_PPGTT_PDP_ENTRY,
252			GTT_TYPE_PPGTT_PDP_PT,
253			GTT_TYPE_INVALID,
254			GTT_TYPE_PPGTT_PTE_1G_ENTRY),
255	GTT_TYPE_TABLE_ENTRY(GTT_TYPE_GGTT_PTE,
256			GTT_TYPE_GGTT_PTE,
257			GTT_TYPE_INVALID,
258			GTT_TYPE_INVALID,
259			GTT_TYPE_INVALID),
260};
261
262static inline int get_next_pt_type(int type)
263{
264	return gtt_type_table[type].next_pt_type;
265}
266
267static inline int get_pt_type(int type)
268{
269	return gtt_type_table[type].pt_type;
270}
271
272static inline int get_entry_type(int type)
273{
274	return gtt_type_table[type].entry_type;
275}
276
277static inline int get_pse_type(int type)
278{
279	return gtt_type_table[type].pse_entry_type;
280}
281
282static u64 read_pte64(struct i915_ggtt *ggtt, unsigned long index)
283{
284	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
285
286	return readq(addr);
287}
288
289static void ggtt_invalidate(struct intel_gt *gt)
290{
291	mmio_hw_access_pre(gt);
292	intel_uncore_write(gt->uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
293	mmio_hw_access_post(gt);
294}
295
296static void write_pte64(struct i915_ggtt *ggtt, unsigned long index, u64 pte)
297{
298	void __iomem *addr = (gen8_pte_t __iomem *)ggtt->gsm + index;
299
300	writeq(pte, addr);
301}
302
303static inline int gtt_get_entry64(void *pt,
304		struct intel_gvt_gtt_entry *e,
305		unsigned long index, bool hypervisor_access, unsigned long gpa,
306		struct intel_vgpu *vgpu)
307{
308	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
309	int ret;
310
311	if (WARN_ON(info->gtt_entry_size != 8))
312		return -EINVAL;
313
314	if (hypervisor_access) {
315		ret = intel_gvt_hypervisor_read_gpa(vgpu, gpa +
316				(index << info->gtt_entry_size_shift),
317				&e->val64, 8);
318		if (WARN_ON(ret))
319			return ret;
320	} else if (!pt) {
321		e->val64 = read_pte64(vgpu->gvt->gt->ggtt, index);
322	} else {
323		e->val64 = *((u64 *)pt + index);
324	}
325	return 0;
326}
327
328static inline int gtt_set_entry64(void *pt,
329		struct intel_gvt_gtt_entry *e,
330		unsigned long index, bool hypervisor_access, unsigned long gpa,
331		struct intel_vgpu *vgpu)
332{
333	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
334	int ret;
335
336	if (WARN_ON(info->gtt_entry_size != 8))
337		return -EINVAL;
338
339	if (hypervisor_access) {
340		ret = intel_gvt_hypervisor_write_gpa(vgpu, gpa +
341				(index << info->gtt_entry_size_shift),
342				&e->val64, 8);
343		if (WARN_ON(ret))
344			return ret;
345	} else if (!pt) {
346		write_pte64(vgpu->gvt->gt->ggtt, index, e->val64);
347	} else {
348		*((u64 *)pt + index) = e->val64;
349	}
350	return 0;
351}
352
353#define GTT_HAW 46
354
355#define ADDR_1G_MASK	GENMASK_ULL(GTT_HAW - 1, 30)
356#define ADDR_2M_MASK	GENMASK_ULL(GTT_HAW - 1, 21)
357#define ADDR_64K_MASK	GENMASK_ULL(GTT_HAW - 1, 16)
358#define ADDR_4K_MASK	GENMASK_ULL(GTT_HAW - 1, 12)
359
360#define GTT_SPTE_FLAG_MASK GENMASK_ULL(62, 52)
361#define GTT_SPTE_FLAG_64K_SPLITED BIT(52) /* splited 64K gtt entry */
362
363#define GTT_64K_PTE_STRIDE 16
364
365static unsigned long gen8_gtt_get_pfn(struct intel_gvt_gtt_entry *e)
366{
367	unsigned long pfn;
368
369	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY)
370		pfn = (e->val64 & ADDR_1G_MASK) >> PAGE_SHIFT;
371	else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY)
372		pfn = (e->val64 & ADDR_2M_MASK) >> PAGE_SHIFT;
373	else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY)
374		pfn = (e->val64 & ADDR_64K_MASK) >> PAGE_SHIFT;
375	else
376		pfn = (e->val64 & ADDR_4K_MASK) >> PAGE_SHIFT;
377	return pfn;
378}
379
380static void gen8_gtt_set_pfn(struct intel_gvt_gtt_entry *e, unsigned long pfn)
381{
382	if (e->type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
383		e->val64 &= ~ADDR_1G_MASK;
384		pfn &= (ADDR_1G_MASK >> PAGE_SHIFT);
385	} else if (e->type == GTT_TYPE_PPGTT_PTE_2M_ENTRY) {
386		e->val64 &= ~ADDR_2M_MASK;
387		pfn &= (ADDR_2M_MASK >> PAGE_SHIFT);
388	} else if (e->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY) {
389		e->val64 &= ~ADDR_64K_MASK;
390		pfn &= (ADDR_64K_MASK >> PAGE_SHIFT);
391	} else {
392		e->val64 &= ~ADDR_4K_MASK;
393		pfn &= (ADDR_4K_MASK >> PAGE_SHIFT);
394	}
395
396	e->val64 |= (pfn << PAGE_SHIFT);
397}
398
399static bool gen8_gtt_test_pse(struct intel_gvt_gtt_entry *e)
400{
401	return !!(e->val64 & _PAGE_PSE);
402}
403
404static void gen8_gtt_clear_pse(struct intel_gvt_gtt_entry *e)
405{
406	if (gen8_gtt_test_pse(e)) {
407		switch (e->type) {
408		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
409			e->val64 &= ~_PAGE_PSE;
410			e->type = GTT_TYPE_PPGTT_PDE_ENTRY;
411			break;
412		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
413			e->type = GTT_TYPE_PPGTT_PDP_ENTRY;
414			e->val64 &= ~_PAGE_PSE;
415			break;
416		default:
417			WARN_ON(1);
418		}
419	}
420}
421
422static bool gen8_gtt_test_ips(struct intel_gvt_gtt_entry *e)
423{
424	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
425		return false;
426
427	return !!(e->val64 & GEN8_PDE_IPS_64K);
428}
429
430static void gen8_gtt_clear_ips(struct intel_gvt_gtt_entry *e)
431{
432	if (GEM_WARN_ON(e->type != GTT_TYPE_PPGTT_PDE_ENTRY))
433		return;
434
435	e->val64 &= ~GEN8_PDE_IPS_64K;
436}
437
438static bool gen8_gtt_test_present(struct intel_gvt_gtt_entry *e)
439{
440	/*
441	 * i915 writes PDP root pointer registers without present bit,
442	 * it also works, so we need to treat root pointer entry
443	 * specifically.
444	 */
445	if (e->type == GTT_TYPE_PPGTT_ROOT_L3_ENTRY
446			|| e->type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
447		return (e->val64 != 0);
448	else
449		return (e->val64 & _PAGE_PRESENT);
450}
451
452static void gtt_entry_clear_present(struct intel_gvt_gtt_entry *e)
453{
454	e->val64 &= ~_PAGE_PRESENT;
455}
456
457static void gtt_entry_set_present(struct intel_gvt_gtt_entry *e)
458{
459	e->val64 |= _PAGE_PRESENT;
460}
461
462static bool gen8_gtt_test_64k_splited(struct intel_gvt_gtt_entry *e)
463{
464	return !!(e->val64 & GTT_SPTE_FLAG_64K_SPLITED);
465}
466
467static void gen8_gtt_set_64k_splited(struct intel_gvt_gtt_entry *e)
468{
469	e->val64 |= GTT_SPTE_FLAG_64K_SPLITED;
470}
471
472static void gen8_gtt_clear_64k_splited(struct intel_gvt_gtt_entry *e)
473{
474	e->val64 &= ~GTT_SPTE_FLAG_64K_SPLITED;
475}
476
477/*
478 * Per-platform GMA routines.
479 */
480static unsigned long gma_to_ggtt_pte_index(unsigned long gma)
481{
482	unsigned long x = (gma >> I915_GTT_PAGE_SHIFT);
483
484	trace_gma_index(__func__, gma, x);
485	return x;
486}
487
488#define DEFINE_PPGTT_GMA_TO_INDEX(prefix, ename, exp) \
489static unsigned long prefix##_gma_to_##ename##_index(unsigned long gma) \
490{ \
491	unsigned long x = (exp); \
492	trace_gma_index(__func__, gma, x); \
493	return x; \
494}
495
496DEFINE_PPGTT_GMA_TO_INDEX(gen8, pte, (gma >> 12 & 0x1ff));
497DEFINE_PPGTT_GMA_TO_INDEX(gen8, pde, (gma >> 21 & 0x1ff));
498DEFINE_PPGTT_GMA_TO_INDEX(gen8, l3_pdp, (gma >> 30 & 0x3));
499DEFINE_PPGTT_GMA_TO_INDEX(gen8, l4_pdp, (gma >> 30 & 0x1ff));
500DEFINE_PPGTT_GMA_TO_INDEX(gen8, pml4, (gma >> 39 & 0x1ff));
501
502static struct intel_gvt_gtt_pte_ops gen8_gtt_pte_ops = {
503	.get_entry = gtt_get_entry64,
504	.set_entry = gtt_set_entry64,
505	.clear_present = gtt_entry_clear_present,
506	.set_present = gtt_entry_set_present,
507	.test_present = gen8_gtt_test_present,
508	.test_pse = gen8_gtt_test_pse,
509	.clear_pse = gen8_gtt_clear_pse,
510	.clear_ips = gen8_gtt_clear_ips,
511	.test_ips = gen8_gtt_test_ips,
512	.clear_64k_splited = gen8_gtt_clear_64k_splited,
513	.set_64k_splited = gen8_gtt_set_64k_splited,
514	.test_64k_splited = gen8_gtt_test_64k_splited,
515	.get_pfn = gen8_gtt_get_pfn,
516	.set_pfn = gen8_gtt_set_pfn,
517};
518
519static struct intel_gvt_gtt_gma_ops gen8_gtt_gma_ops = {
520	.gma_to_ggtt_pte_index = gma_to_ggtt_pte_index,
521	.gma_to_pte_index = gen8_gma_to_pte_index,
522	.gma_to_pde_index = gen8_gma_to_pde_index,
523	.gma_to_l3_pdp_index = gen8_gma_to_l3_pdp_index,
524	.gma_to_l4_pdp_index = gen8_gma_to_l4_pdp_index,
525	.gma_to_pml4_index = gen8_gma_to_pml4_index,
526};
527
528/* Update entry type per pse and ips bit. */
529static void update_entry_type_for_real(struct intel_gvt_gtt_pte_ops *pte_ops,
530	struct intel_gvt_gtt_entry *entry, bool ips)
531{
532	switch (entry->type) {
533	case GTT_TYPE_PPGTT_PDE_ENTRY:
534	case GTT_TYPE_PPGTT_PDP_ENTRY:
535		if (pte_ops->test_pse(entry))
536			entry->type = get_pse_type(entry->type);
537		break;
538	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
539		if (ips)
540			entry->type = get_pse_type(entry->type);
541		break;
542	default:
543		GEM_BUG_ON(!gtt_type_is_entry(entry->type));
544	}
545
546	GEM_BUG_ON(entry->type == GTT_TYPE_INVALID);
547}
548
549/*
550 * MM helpers.
551 */
552static void _ppgtt_get_root_entry(struct intel_vgpu_mm *mm,
553		struct intel_gvt_gtt_entry *entry, unsigned long index,
554		bool guest)
555{
556	struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
557
558	GEM_BUG_ON(mm->type != INTEL_GVT_MM_PPGTT);
559
560	entry->type = mm->ppgtt_mm.root_entry_type;
561	pte_ops->get_entry(guest ? mm->ppgtt_mm.guest_pdps :
562			   mm->ppgtt_mm.shadow_pdps,
563			   entry, index, false, 0, mm->vgpu);
564	update_entry_type_for_real(pte_ops, entry, false);
565}
566
567static inline void ppgtt_get_guest_root_entry(struct intel_vgpu_mm *mm,
568		struct intel_gvt_gtt_entry *entry, unsigned long index)
569{
570	_ppgtt_get_root_entry(mm, entry, index, true);
571}
572
573static inline void ppgtt_get_shadow_root_entry(struct intel_vgpu_mm *mm,
574		struct intel_gvt_gtt_entry *entry, unsigned long index)
575{
576	_ppgtt_get_root_entry(mm, entry, index, false);
577}
578
579static void _ppgtt_set_root_entry(struct intel_vgpu_mm *mm,
580		struct intel_gvt_gtt_entry *entry, unsigned long index,
581		bool guest)
582{
583	struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
584
585	pte_ops->set_entry(guest ? mm->ppgtt_mm.guest_pdps :
586			   mm->ppgtt_mm.shadow_pdps,
587			   entry, index, false, 0, mm->vgpu);
588}
589
590static inline void ppgtt_set_guest_root_entry(struct intel_vgpu_mm *mm,
591		struct intel_gvt_gtt_entry *entry, unsigned long index)
592{
593	_ppgtt_set_root_entry(mm, entry, index, true);
594}
595
596static inline void ppgtt_set_shadow_root_entry(struct intel_vgpu_mm *mm,
597		struct intel_gvt_gtt_entry *entry, unsigned long index)
598{
599	_ppgtt_set_root_entry(mm, entry, index, false);
600}
601
602static void ggtt_get_guest_entry(struct intel_vgpu_mm *mm,
603		struct intel_gvt_gtt_entry *entry, unsigned long index)
604{
605	struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
606
607	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
608
609	entry->type = GTT_TYPE_GGTT_PTE;
610	pte_ops->get_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
611			   false, 0, mm->vgpu);
612}
613
614static void ggtt_set_guest_entry(struct intel_vgpu_mm *mm,
615		struct intel_gvt_gtt_entry *entry, unsigned long index)
616{
617	struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
618
619	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
620
621	pte_ops->set_entry(mm->ggtt_mm.virtual_ggtt, entry, index,
622			   false, 0, mm->vgpu);
623}
624
625static void ggtt_get_host_entry(struct intel_vgpu_mm *mm,
626		struct intel_gvt_gtt_entry *entry, unsigned long index)
627{
628	struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
629
630	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
631
632	pte_ops->get_entry(NULL, entry, index, false, 0, mm->vgpu);
633}
634
635static void ggtt_set_host_entry(struct intel_vgpu_mm *mm,
636		struct intel_gvt_gtt_entry *entry, unsigned long index)
637{
638	struct intel_gvt_gtt_pte_ops *pte_ops = mm->vgpu->gvt->gtt.pte_ops;
639	unsigned long offset = index;
640
641	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT);
642
643	if (vgpu_gmadr_is_aperture(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
644		offset -= (vgpu_aperture_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
645		mm->ggtt_mm.host_ggtt_aperture[offset] = entry->val64;
646	} else if (vgpu_gmadr_is_hidden(mm->vgpu, index << I915_GTT_PAGE_SHIFT)) {
647		offset -= (vgpu_hidden_gmadr_base(mm->vgpu) >> PAGE_SHIFT);
648		mm->ggtt_mm.host_ggtt_hidden[offset] = entry->val64;
649	}
650
651	pte_ops->set_entry(NULL, entry, index, false, 0, mm->vgpu);
652}
653
654/*
655 * PPGTT shadow page table helpers.
656 */
657static inline int ppgtt_spt_get_entry(
658		struct intel_vgpu_ppgtt_spt *spt,
659		void *page_table, int type,
660		struct intel_gvt_gtt_entry *e, unsigned long index,
661		bool guest)
662{
663	struct intel_gvt *gvt = spt->vgpu->gvt;
664	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
665	int ret;
666
667	e->type = get_entry_type(type);
668
669	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
670		return -EINVAL;
671
672	ret = ops->get_entry(page_table, e, index, guest,
673			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
674			spt->vgpu);
675	if (ret)
676		return ret;
677
678	update_entry_type_for_real(ops, e, guest ?
679				   spt->guest_page.pde_ips : false);
680
681	gvt_vdbg_mm("read ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
682		    type, e->type, index, e->val64);
683	return 0;
684}
685
686static inline int ppgtt_spt_set_entry(
687		struct intel_vgpu_ppgtt_spt *spt,
688		void *page_table, int type,
689		struct intel_gvt_gtt_entry *e, unsigned long index,
690		bool guest)
691{
692	struct intel_gvt *gvt = spt->vgpu->gvt;
693	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
694
695	if (WARN(!gtt_type_is_entry(e->type), "invalid entry type\n"))
696		return -EINVAL;
697
698	gvt_vdbg_mm("set ppgtt entry, spt type %d, entry type %d, index %lu, value %llx\n",
699		    type, e->type, index, e->val64);
700
701	return ops->set_entry(page_table, e, index, guest,
702			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
703			spt->vgpu);
704}
705
706#define ppgtt_get_guest_entry(spt, e, index) \
707	ppgtt_spt_get_entry(spt, NULL, \
708		spt->guest_page.type, e, index, true)
709
710#define ppgtt_set_guest_entry(spt, e, index) \
711	ppgtt_spt_set_entry(spt, NULL, \
712		spt->guest_page.type, e, index, true)
713
714#define ppgtt_get_shadow_entry(spt, e, index) \
715	ppgtt_spt_get_entry(spt, spt->shadow_page.vaddr, \
716		spt->shadow_page.type, e, index, false)
717
718#define ppgtt_set_shadow_entry(spt, e, index) \
719	ppgtt_spt_set_entry(spt, spt->shadow_page.vaddr, \
720		spt->shadow_page.type, e, index, false)
721
722static void *alloc_spt(gfp_t gfp_mask)
723{
724	struct intel_vgpu_ppgtt_spt *spt;
725
726	spt = kzalloc(sizeof(*spt), gfp_mask);
727	if (!spt)
728		return NULL;
729
730	spt->shadow_page.page = alloc_page(gfp_mask);
731	if (!spt->shadow_page.page) {
732		kfree(spt);
733		return NULL;
734	}
735	return spt;
736}
737
738static void free_spt(struct intel_vgpu_ppgtt_spt *spt)
739{
740	__free_page(spt->shadow_page.page);
741	kfree(spt);
742}
743
744static int detach_oos_page(struct intel_vgpu *vgpu,
745		struct intel_vgpu_oos_page *oos_page);
746
747static void ppgtt_free_spt(struct intel_vgpu_ppgtt_spt *spt)
748{
749	struct device *kdev = &spt->vgpu->gvt->gt->i915->drm.pdev->dev;
750
751	trace_spt_free(spt->vgpu->id, spt, spt->guest_page.type);
752
753	dma_unmap_page(kdev, spt->shadow_page.mfn << I915_GTT_PAGE_SHIFT, 4096,
754		       PCI_DMA_BIDIRECTIONAL);
755
756	radix_tree_delete(&spt->vgpu->gtt.spt_tree, spt->shadow_page.mfn);
757
758	if (spt->guest_page.gfn) {
759		if (spt->guest_page.oos_page)
760			detach_oos_page(spt->vgpu, spt->guest_page.oos_page);
761
762		intel_vgpu_unregister_page_track(spt->vgpu, spt->guest_page.gfn);
763	}
764
765	list_del_init(&spt->post_shadow_list);
766	free_spt(spt);
767}
768
769static void ppgtt_free_all_spt(struct intel_vgpu *vgpu)
770{
771	struct intel_vgpu_ppgtt_spt *spt, *spn;
772	struct radix_tree_iter iter;
773	LIST_HEAD(all_spt);
774	void __rcu **slot;
775
776	rcu_read_lock();
777	radix_tree_for_each_slot(slot, &vgpu->gtt.spt_tree, &iter, 0) {
778		spt = radix_tree_deref_slot(slot);
779		list_move(&spt->post_shadow_list, &all_spt);
780	}
781	rcu_read_unlock();
782
783	list_for_each_entry_safe(spt, spn, &all_spt, post_shadow_list)
784		ppgtt_free_spt(spt);
785}
786
787static int ppgtt_handle_guest_write_page_table_bytes(
788		struct intel_vgpu_ppgtt_spt *spt,
789		u64 pa, void *p_data, int bytes);
790
791static int ppgtt_write_protection_handler(
792		struct intel_vgpu_page_track *page_track,
793		u64 gpa, void *data, int bytes)
794{
795	struct intel_vgpu_ppgtt_spt *spt = page_track->priv_data;
796
797	int ret;
798
799	if (bytes != 4 && bytes != 8)
800		return -EINVAL;
801
802	ret = ppgtt_handle_guest_write_page_table_bytes(spt, gpa, data, bytes);
803	if (ret)
804		return ret;
805	return ret;
806}
807
808/* Find a spt by guest gfn. */
809static struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_gfn(
810		struct intel_vgpu *vgpu, unsigned long gfn)
811{
812	struct intel_vgpu_page_track *track;
813
814	track = intel_vgpu_find_page_track(vgpu, gfn);
815	if (track && track->handler == ppgtt_write_protection_handler)
816		return track->priv_data;
817
818	return NULL;
819}
820
821/* Find the spt by shadow page mfn. */
822static inline struct intel_vgpu_ppgtt_spt *intel_vgpu_find_spt_by_mfn(
823		struct intel_vgpu *vgpu, unsigned long mfn)
824{
825	return radix_tree_lookup(&vgpu->gtt.spt_tree, mfn);
826}
827
828static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt);
829
830/* Allocate shadow page table without guest page. */
831static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt(
832		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type)
833{
834	struct device *kdev = &vgpu->gvt->gt->i915->drm.pdev->dev;
835	struct intel_vgpu_ppgtt_spt *spt = NULL;
836	dma_addr_t daddr;
837	int ret;
838
839retry:
840	spt = alloc_spt(GFP_KERNEL | __GFP_ZERO);
841	if (!spt) {
842		if (reclaim_one_ppgtt_mm(vgpu->gvt))
843			goto retry;
844
845		gvt_vgpu_err("fail to allocate ppgtt shadow page\n");
846		return ERR_PTR(-ENOMEM);
847	}
848
849	spt->vgpu = vgpu;
850	atomic_set(&spt->refcount, 1);
851	INIT_LIST_HEAD(&spt->post_shadow_list);
852
853	/*
854	 * Init shadow_page.
855	 */
856	spt->shadow_page.type = type;
857	daddr = dma_map_page(kdev, spt->shadow_page.page,
858			     0, 4096, PCI_DMA_BIDIRECTIONAL);
859	if (dma_mapping_error(kdev, daddr)) {
860		gvt_vgpu_err("fail to map dma addr\n");
861		ret = -EINVAL;
862		goto err_free_spt;
863	}
864	spt->shadow_page.vaddr = page_address(spt->shadow_page.page);
865	spt->shadow_page.mfn = daddr >> I915_GTT_PAGE_SHIFT;
866
867	ret = radix_tree_insert(&vgpu->gtt.spt_tree, spt->shadow_page.mfn, spt);
868	if (ret)
869		goto err_unmap_dma;
870
871	return spt;
872
873err_unmap_dma:
874	dma_unmap_page(kdev, daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
875err_free_spt:
876	free_spt(spt);
877	return ERR_PTR(ret);
878}
879
880/* Allocate shadow page table associated with specific gfn. */
881static struct intel_vgpu_ppgtt_spt *ppgtt_alloc_spt_gfn(
882		struct intel_vgpu *vgpu, enum intel_gvt_gtt_type type,
883		unsigned long gfn, bool guest_pde_ips)
884{
885	struct intel_vgpu_ppgtt_spt *spt;
886	int ret;
887
888	spt = ppgtt_alloc_spt(vgpu, type);
889	if (IS_ERR(spt))
890		return spt;
891
892	/*
893	 * Init guest_page.
894	 */
895	ret = intel_vgpu_register_page_track(vgpu, gfn,
896			ppgtt_write_protection_handler, spt);
897	if (ret) {
898		ppgtt_free_spt(spt);
899		return ERR_PTR(ret);
900	}
901
902	spt->guest_page.type = type;
903	spt->guest_page.gfn = gfn;
904	spt->guest_page.pde_ips = guest_pde_ips;
905
906	trace_spt_alloc(vgpu->id, spt, type, spt->shadow_page.mfn, gfn);
907
908	return spt;
909}
910
911#define pt_entry_size_shift(spt) \
912	((spt)->vgpu->gvt->device_info.gtt_entry_size_shift)
913
914#define pt_entries(spt) \
915	(I915_GTT_PAGE_SIZE >> pt_entry_size_shift(spt))
916
917#define for_each_present_guest_entry(spt, e, i) \
918	for (i = 0; i < pt_entries(spt); \
919	     i += spt->guest_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
920		if (!ppgtt_get_guest_entry(spt, e, i) && \
921		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
922
923#define for_each_present_shadow_entry(spt, e, i) \
924	for (i = 0; i < pt_entries(spt); \
925	     i += spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1) \
926		if (!ppgtt_get_shadow_entry(spt, e, i) && \
927		    spt->vgpu->gvt->gtt.pte_ops->test_present(e))
928
929#define for_each_shadow_entry(spt, e, i) \
930	for (i = 0; i < pt_entries(spt); \
931	     i += (spt->shadow_page.pde_ips ? GTT_64K_PTE_STRIDE : 1)) \
932		if (!ppgtt_get_shadow_entry(spt, e, i))
933
934static inline void ppgtt_get_spt(struct intel_vgpu_ppgtt_spt *spt)
935{
936	int v = atomic_read(&spt->refcount);
937
938	trace_spt_refcount(spt->vgpu->id, "inc", spt, v, (v + 1));
939	atomic_inc(&spt->refcount);
940}
941
942static inline int ppgtt_put_spt(struct intel_vgpu_ppgtt_spt *spt)
943{
944	int v = atomic_read(&spt->refcount);
945
946	trace_spt_refcount(spt->vgpu->id, "dec", spt, v, (v - 1));
947	return atomic_dec_return(&spt->refcount);
948}
949
950static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt);
951
952static int ppgtt_invalidate_spt_by_shadow_entry(struct intel_vgpu *vgpu,
953		struct intel_gvt_gtt_entry *e)
954{
955	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
956	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
957	struct intel_vgpu_ppgtt_spt *s;
958	enum intel_gvt_gtt_type cur_pt_type;
959
960	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(e->type)));
961
962	if (e->type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY
963		&& e->type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY) {
964		cur_pt_type = get_next_pt_type(e->type);
965
966		if (!gtt_type_is_pt(cur_pt_type) ||
967				!gtt_type_is_pt(cur_pt_type + 1)) {
968			drm_WARN(&i915->drm, 1,
969				 "Invalid page table type, cur_pt_type is: %d\n",
970				 cur_pt_type);
971			return -EINVAL;
972		}
973
974		cur_pt_type += 1;
975
976		if (ops->get_pfn(e) ==
977			vgpu->gtt.scratch_pt[cur_pt_type].page_mfn)
978			return 0;
979	}
980	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
981	if (!s) {
982		gvt_vgpu_err("fail to find shadow page: mfn: 0x%lx\n",
983				ops->get_pfn(e));
984		return -ENXIO;
985	}
986	return ppgtt_invalidate_spt(s);
987}
988
989static inline void ppgtt_invalidate_pte(struct intel_vgpu_ppgtt_spt *spt,
990		struct intel_gvt_gtt_entry *entry)
991{
992	struct intel_vgpu *vgpu = spt->vgpu;
993	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
994	unsigned long pfn;
995	int type;
996
997	pfn = ops->get_pfn(entry);
998	type = spt->shadow_page.type;
999
1000	/* Uninitialized spte or unshadowed spte. */
1001	if (!pfn || pfn == vgpu->gtt.scratch_pt[type].page_mfn)
1002		return;
1003
1004	intel_gvt_hypervisor_dma_unmap_guest_page(vgpu, pfn << PAGE_SHIFT);
1005}
1006
1007static int ppgtt_invalidate_spt(struct intel_vgpu_ppgtt_spt *spt)
1008{
1009	struct intel_vgpu *vgpu = spt->vgpu;
1010	struct intel_gvt_gtt_entry e;
1011	unsigned long index;
1012	int ret;
1013
1014	trace_spt_change(spt->vgpu->id, "die", spt,
1015			spt->guest_page.gfn, spt->shadow_page.type);
1016
1017	if (ppgtt_put_spt(spt) > 0)
1018		return 0;
1019
1020	for_each_present_shadow_entry(spt, &e, index) {
1021		switch (e.type) {
1022		case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1023			gvt_vdbg_mm("invalidate 4K entry\n");
1024			ppgtt_invalidate_pte(spt, &e);
1025			break;
1026		case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1027			/* We don't setup 64K shadow entry so far. */
1028			WARN(1, "suspicious 64K gtt entry\n");
1029			continue;
1030		case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1031			gvt_vdbg_mm("invalidate 2M entry\n");
1032			continue;
1033		case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1034			WARN(1, "GVT doesn't support 1GB page\n");
1035			continue;
1036		case GTT_TYPE_PPGTT_PML4_ENTRY:
1037		case GTT_TYPE_PPGTT_PDP_ENTRY:
1038		case GTT_TYPE_PPGTT_PDE_ENTRY:
1039			gvt_vdbg_mm("invalidate PMUL4/PDP/PDE entry\n");
1040			ret = ppgtt_invalidate_spt_by_shadow_entry(
1041					spt->vgpu, &e);
1042			if (ret)
1043				goto fail;
1044			break;
1045		default:
1046			GEM_BUG_ON(1);
1047		}
1048	}
1049
1050	trace_spt_change(spt->vgpu->id, "release", spt,
1051			 spt->guest_page.gfn, spt->shadow_page.type);
1052	ppgtt_free_spt(spt);
1053	return 0;
1054fail:
1055	gvt_vgpu_err("fail: shadow page %p shadow entry 0x%llx type %d\n",
1056			spt, e.val64, e.type);
1057	return ret;
1058}
1059
1060static bool vgpu_ips_enabled(struct intel_vgpu *vgpu)
1061{
1062	struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
1063
1064	if (INTEL_GEN(dev_priv) == 9 || INTEL_GEN(dev_priv) == 10) {
1065		u32 ips = vgpu_vreg_t(vgpu, GEN8_GAMW_ECO_DEV_RW_IA) &
1066			GAMW_ECO_ENABLE_64K_IPS_FIELD;
1067
1068		return ips == GAMW_ECO_ENABLE_64K_IPS_FIELD;
1069	} else if (INTEL_GEN(dev_priv) >= 11) {
1070		/* 64K paging only controlled by IPS bit in PTE now. */
1071		return true;
1072	} else
1073		return false;
1074}
1075
1076static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt);
1077
1078static struct intel_vgpu_ppgtt_spt *ppgtt_populate_spt_by_guest_entry(
1079		struct intel_vgpu *vgpu, struct intel_gvt_gtt_entry *we)
1080{
1081	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1082	struct intel_vgpu_ppgtt_spt *spt = NULL;
1083	bool ips = false;
1084	int ret;
1085
1086	GEM_BUG_ON(!gtt_type_is_pt(get_next_pt_type(we->type)));
1087
1088	if (we->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1089		ips = vgpu_ips_enabled(vgpu) && ops->test_ips(we);
1090
1091	spt = intel_vgpu_find_spt_by_gfn(vgpu, ops->get_pfn(we));
1092	if (spt) {
1093		ppgtt_get_spt(spt);
1094
1095		if (ips != spt->guest_page.pde_ips) {
1096			spt->guest_page.pde_ips = ips;
1097
1098			gvt_dbg_mm("reshadow PDE since ips changed\n");
1099			clear_page(spt->shadow_page.vaddr);
1100			ret = ppgtt_populate_spt(spt);
1101			if (ret) {
1102				ppgtt_put_spt(spt);
1103				goto err;
1104			}
1105		}
1106	} else {
1107		int type = get_next_pt_type(we->type);
1108
1109		if (!gtt_type_is_pt(type)) {
1110			ret = -EINVAL;
1111			goto err;
1112		}
1113
1114		spt = ppgtt_alloc_spt_gfn(vgpu, type, ops->get_pfn(we), ips);
1115		if (IS_ERR(spt)) {
1116			ret = PTR_ERR(spt);
1117			goto err;
1118		}
1119
1120		ret = intel_vgpu_enable_page_track(vgpu, spt->guest_page.gfn);
1121		if (ret)
1122			goto err_free_spt;
1123
1124		ret = ppgtt_populate_spt(spt);
1125		if (ret)
1126			goto err_free_spt;
1127
1128		trace_spt_change(vgpu->id, "new", spt, spt->guest_page.gfn,
1129				 spt->shadow_page.type);
1130	}
1131	return spt;
1132
1133err_free_spt:
1134	ppgtt_free_spt(spt);
1135	spt = NULL;
1136err:
1137	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1138		     spt, we->val64, we->type);
1139	return ERR_PTR(ret);
1140}
1141
1142static inline void ppgtt_generate_shadow_entry(struct intel_gvt_gtt_entry *se,
1143		struct intel_vgpu_ppgtt_spt *s, struct intel_gvt_gtt_entry *ge)
1144{
1145	struct intel_gvt_gtt_pte_ops *ops = s->vgpu->gvt->gtt.pte_ops;
1146
1147	se->type = ge->type;
1148	se->val64 = ge->val64;
1149
1150	/* Because we always split 64KB pages, so clear IPS in shadow PDE. */
1151	if (se->type == GTT_TYPE_PPGTT_PDE_ENTRY)
1152		ops->clear_ips(se);
1153
1154	ops->set_pfn(se, s->shadow_page.mfn);
1155}
1156
1157/**
1158 * Check if can do 2M page
1159 * @vgpu: target vgpu
1160 * @entry: target pfn's gtt entry
1161 *
1162 * Return 1 if 2MB huge gtt shadowing is possilbe, 0 if miscondition,
1163 * negtive if found err.
1164 */
1165static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
1166	struct intel_gvt_gtt_entry *entry)
1167{
1168	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1169	unsigned long pfn;
1170
1171	if (!HAS_PAGE_SIZES(vgpu->gvt->gt->i915, I915_GTT_PAGE_SIZE_2M))
1172		return 0;
1173
1174	pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, ops->get_pfn(entry));
1175	if (pfn == INTEL_GVT_INVALID_ADDR)
1176		return -EINVAL;
1177
1178	return PageTransHuge(pfn_to_page(pfn));
1179}
1180
1181static int split_2MB_gtt_entry(struct intel_vgpu *vgpu,
1182	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1183	struct intel_gvt_gtt_entry *se)
1184{
1185	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1186	struct intel_vgpu_ppgtt_spt *sub_spt;
1187	struct intel_gvt_gtt_entry sub_se;
1188	unsigned long start_gfn;
1189	dma_addr_t dma_addr;
1190	unsigned long sub_index;
1191	int ret;
1192
1193	gvt_dbg_mm("Split 2M gtt entry, index %lu\n", index);
1194
1195	start_gfn = ops->get_pfn(se);
1196
1197	sub_spt = ppgtt_alloc_spt(vgpu, GTT_TYPE_PPGTT_PTE_PT);
1198	if (IS_ERR(sub_spt))
1199		return PTR_ERR(sub_spt);
1200
1201	for_each_shadow_entry(sub_spt, &sub_se, sub_index) {
1202		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
1203				start_gfn + sub_index, PAGE_SIZE, &dma_addr);
1204		if (ret)
1205			goto err;
1206		sub_se.val64 = se->val64;
1207
1208		/* Copy the PAT field from PDE. */
1209		sub_se.val64 &= ~_PAGE_PAT;
1210		sub_se.val64 |= (se->val64 & _PAGE_PAT_LARGE) >> 5;
1211
1212		ops->set_pfn(&sub_se, dma_addr >> PAGE_SHIFT);
1213		ppgtt_set_shadow_entry(sub_spt, &sub_se, sub_index);
1214	}
1215
1216	/* Clear dirty field. */
1217	se->val64 &= ~_PAGE_DIRTY;
1218
1219	ops->clear_pse(se);
1220	ops->clear_ips(se);
1221	ops->set_pfn(se, sub_spt->shadow_page.mfn);
1222	ppgtt_set_shadow_entry(spt, se, index);
1223	return 0;
1224err:
1225	/* Cancel the existing addess mappings of DMA addr. */
1226	for_each_present_shadow_entry(sub_spt, &sub_se, sub_index) {
1227		gvt_vdbg_mm("invalidate 4K entry\n");
1228		ppgtt_invalidate_pte(sub_spt, &sub_se);
1229	}
1230	/* Release the new allocated spt. */
1231	trace_spt_change(sub_spt->vgpu->id, "release", sub_spt,
1232		sub_spt->guest_page.gfn, sub_spt->shadow_page.type);
1233	ppgtt_free_spt(sub_spt);
1234	return ret;
1235}
1236
1237static int split_64KB_gtt_entry(struct intel_vgpu *vgpu,
1238	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1239	struct intel_gvt_gtt_entry *se)
1240{
1241	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1242	struct intel_gvt_gtt_entry entry = *se;
1243	unsigned long start_gfn;
1244	dma_addr_t dma_addr;
1245	int i, ret;
1246
1247	gvt_vdbg_mm("Split 64K gtt entry, index %lu\n", index);
1248
1249	GEM_BUG_ON(index % GTT_64K_PTE_STRIDE);
1250
1251	start_gfn = ops->get_pfn(se);
1252
1253	entry.type = GTT_TYPE_PPGTT_PTE_4K_ENTRY;
1254	ops->set_64k_splited(&entry);
1255
1256	for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1257		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu,
1258					start_gfn + i, PAGE_SIZE, &dma_addr);
1259		if (ret)
1260			return ret;
1261
1262		ops->set_pfn(&entry, dma_addr >> PAGE_SHIFT);
1263		ppgtt_set_shadow_entry(spt, &entry, index + i);
1264	}
1265	return 0;
1266}
1267
1268static int ppgtt_populate_shadow_entry(struct intel_vgpu *vgpu,
1269	struct intel_vgpu_ppgtt_spt *spt, unsigned long index,
1270	struct intel_gvt_gtt_entry *ge)
1271{
1272	struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
1273	struct intel_gvt_gtt_entry se = *ge;
1274	unsigned long gfn, page_size = PAGE_SIZE;
1275	dma_addr_t dma_addr;
1276	int ret;
1277
1278	if (!pte_ops->test_present(ge))
1279		return 0;
1280
1281	gfn = pte_ops->get_pfn(ge);
1282
1283	switch (ge->type) {
1284	case GTT_TYPE_PPGTT_PTE_4K_ENTRY:
1285		gvt_vdbg_mm("shadow 4K gtt entry\n");
1286		break;
1287	case GTT_TYPE_PPGTT_PTE_64K_ENTRY:
1288		gvt_vdbg_mm("shadow 64K gtt entry\n");
1289		/*
1290		 * The layout of 64K page is special, the page size is
1291		 * controlled by uper PDE. To be simple, we always split
1292		 * 64K page to smaller 4K pages in shadow PT.
1293		 */
1294		return split_64KB_gtt_entry(vgpu, spt, index, &se);
1295	case GTT_TYPE_PPGTT_PTE_2M_ENTRY:
1296		gvt_vdbg_mm("shadow 2M gtt entry\n");
1297		ret = is_2MB_gtt_possible(vgpu, ge);
1298		if (ret == 0)
1299			return split_2MB_gtt_entry(vgpu, spt, index, &se);
1300		else if (ret < 0)
1301			return ret;
1302		page_size = I915_GTT_PAGE_SIZE_2M;
1303		break;
1304	case GTT_TYPE_PPGTT_PTE_1G_ENTRY:
1305		gvt_vgpu_err("GVT doesn't support 1GB entry\n");
1306		return -EINVAL;
1307	default:
1308		GEM_BUG_ON(1);
1309	}
1310
1311	/* direct shadow */
1312	ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn, page_size,
1313						      &dma_addr);
1314	if (ret)
1315		return -ENXIO;
1316
1317	pte_ops->set_pfn(&se, dma_addr >> PAGE_SHIFT);
1318	ppgtt_set_shadow_entry(spt, &se, index);
1319	return 0;
1320}
1321
1322static int ppgtt_populate_spt(struct intel_vgpu_ppgtt_spt *spt)
1323{
1324	struct intel_vgpu *vgpu = spt->vgpu;
1325	struct intel_gvt *gvt = vgpu->gvt;
1326	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1327	struct intel_vgpu_ppgtt_spt *s;
1328	struct intel_gvt_gtt_entry se, ge;
1329	unsigned long gfn, i;
1330	int ret;
1331
1332	trace_spt_change(spt->vgpu->id, "born", spt,
1333			 spt->guest_page.gfn, spt->shadow_page.type);
1334
1335	for_each_present_guest_entry(spt, &ge, i) {
1336		if (gtt_type_is_pt(get_next_pt_type(ge.type))) {
1337			s = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1338			if (IS_ERR(s)) {
1339				ret = PTR_ERR(s);
1340				goto fail;
1341			}
1342			ppgtt_get_shadow_entry(spt, &se, i);
1343			ppgtt_generate_shadow_entry(&se, s, &ge);
1344			ppgtt_set_shadow_entry(spt, &se, i);
1345		} else {
1346			gfn = ops->get_pfn(&ge);
1347			if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
1348				ops->set_pfn(&se, gvt->gtt.scratch_mfn);
1349				ppgtt_set_shadow_entry(spt, &se, i);
1350				continue;
1351			}
1352
1353			ret = ppgtt_populate_shadow_entry(vgpu, spt, i, &ge);
1354			if (ret)
1355				goto fail;
1356		}
1357	}
1358	return 0;
1359fail:
1360	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1361			spt, ge.val64, ge.type);
1362	return ret;
1363}
1364
1365static int ppgtt_handle_guest_entry_removal(struct intel_vgpu_ppgtt_spt *spt,
1366		struct intel_gvt_gtt_entry *se, unsigned long index)
1367{
1368	struct intel_vgpu *vgpu = spt->vgpu;
1369	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1370	int ret;
1371
1372	trace_spt_guest_change(spt->vgpu->id, "remove", spt,
1373			       spt->shadow_page.type, se->val64, index);
1374
1375	gvt_vdbg_mm("destroy old shadow entry, type %d, index %lu, value %llx\n",
1376		    se->type, index, se->val64);
1377
1378	if (!ops->test_present(se))
1379		return 0;
1380
1381	if (ops->get_pfn(se) ==
1382	    vgpu->gtt.scratch_pt[spt->shadow_page.type].page_mfn)
1383		return 0;
1384
1385	if (gtt_type_is_pt(get_next_pt_type(se->type))) {
1386		struct intel_vgpu_ppgtt_spt *s =
1387			intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(se));
1388		if (!s) {
1389			gvt_vgpu_err("fail to find guest page\n");
1390			ret = -ENXIO;
1391			goto fail;
1392		}
1393		ret = ppgtt_invalidate_spt(s);
1394		if (ret)
1395			goto fail;
1396	} else {
1397		/* We don't setup 64K shadow entry so far. */
1398		WARN(se->type == GTT_TYPE_PPGTT_PTE_64K_ENTRY,
1399		     "suspicious 64K entry\n");
1400		ppgtt_invalidate_pte(spt, se);
1401	}
1402
1403	return 0;
1404fail:
1405	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d\n",
1406			spt, se->val64, se->type);
1407	return ret;
1408}
1409
1410static int ppgtt_handle_guest_entry_add(struct intel_vgpu_ppgtt_spt *spt,
1411		struct intel_gvt_gtt_entry *we, unsigned long index)
1412{
1413	struct intel_vgpu *vgpu = spt->vgpu;
1414	struct intel_gvt_gtt_entry m;
1415	struct intel_vgpu_ppgtt_spt *s;
1416	int ret;
1417
1418	trace_spt_guest_change(spt->vgpu->id, "add", spt, spt->shadow_page.type,
1419			       we->val64, index);
1420
1421	gvt_vdbg_mm("add shadow entry: type %d, index %lu, value %llx\n",
1422		    we->type, index, we->val64);
1423
1424	if (gtt_type_is_pt(get_next_pt_type(we->type))) {
1425		s = ppgtt_populate_spt_by_guest_entry(vgpu, we);
1426		if (IS_ERR(s)) {
1427			ret = PTR_ERR(s);
1428			goto fail;
1429		}
1430		ppgtt_get_shadow_entry(spt, &m, index);
1431		ppgtt_generate_shadow_entry(&m, s, we);
1432		ppgtt_set_shadow_entry(spt, &m, index);
1433	} else {
1434		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, we);
1435		if (ret)
1436			goto fail;
1437	}
1438	return 0;
1439fail:
1440	gvt_vgpu_err("fail: spt %p guest entry 0x%llx type %d\n",
1441		spt, we->val64, we->type);
1442	return ret;
1443}
1444
1445static int sync_oos_page(struct intel_vgpu *vgpu,
1446		struct intel_vgpu_oos_page *oos_page)
1447{
1448	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1449	struct intel_gvt *gvt = vgpu->gvt;
1450	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
1451	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1452	struct intel_gvt_gtt_entry old, new;
1453	int index;
1454	int ret;
1455
1456	trace_oos_change(vgpu->id, "sync", oos_page->id,
1457			 spt, spt->guest_page.type);
1458
1459	old.type = new.type = get_entry_type(spt->guest_page.type);
1460	old.val64 = new.val64 = 0;
1461
1462	for (index = 0; index < (I915_GTT_PAGE_SIZE >>
1463				info->gtt_entry_size_shift); index++) {
1464		ops->get_entry(oos_page->mem, &old, index, false, 0, vgpu);
1465		ops->get_entry(NULL, &new, index, true,
1466			       spt->guest_page.gfn << PAGE_SHIFT, vgpu);
1467
1468		if (old.val64 == new.val64
1469			&& !test_and_clear_bit(index, spt->post_shadow_bitmap))
1470			continue;
1471
1472		trace_oos_sync(vgpu->id, oos_page->id,
1473				spt, spt->guest_page.type,
1474				new.val64, index);
1475
1476		ret = ppgtt_populate_shadow_entry(vgpu, spt, index, &new);
1477		if (ret)
1478			return ret;
1479
1480		ops->set_entry(oos_page->mem, &new, index, false, 0, vgpu);
1481	}
1482
1483	spt->guest_page.write_cnt = 0;
1484	list_del_init(&spt->post_shadow_list);
1485	return 0;
1486}
1487
1488static int detach_oos_page(struct intel_vgpu *vgpu,
1489		struct intel_vgpu_oos_page *oos_page)
1490{
1491	struct intel_gvt *gvt = vgpu->gvt;
1492	struct intel_vgpu_ppgtt_spt *spt = oos_page->spt;
1493
1494	trace_oos_change(vgpu->id, "detach", oos_page->id,
1495			 spt, spt->guest_page.type);
1496
1497	spt->guest_page.write_cnt = 0;
1498	spt->guest_page.oos_page = NULL;
1499	oos_page->spt = NULL;
1500
1501	list_del_init(&oos_page->vm_list);
1502	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_free_list_head);
1503
1504	return 0;
1505}
1506
1507static int attach_oos_page(struct intel_vgpu_oos_page *oos_page,
1508		struct intel_vgpu_ppgtt_spt *spt)
1509{
1510	struct intel_gvt *gvt = spt->vgpu->gvt;
1511	int ret;
1512
1513	ret = intel_gvt_hypervisor_read_gpa(spt->vgpu,
1514			spt->guest_page.gfn << I915_GTT_PAGE_SHIFT,
1515			oos_page->mem, I915_GTT_PAGE_SIZE);
1516	if (ret)
1517		return ret;
1518
1519	oos_page->spt = spt;
1520	spt->guest_page.oos_page = oos_page;
1521
1522	list_move_tail(&oos_page->list, &gvt->gtt.oos_page_use_list_head);
1523
1524	trace_oos_change(spt->vgpu->id, "attach", oos_page->id,
1525			 spt, spt->guest_page.type);
1526	return 0;
1527}
1528
1529static int ppgtt_set_guest_page_sync(struct intel_vgpu_ppgtt_spt *spt)
1530{
1531	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1532	int ret;
1533
1534	ret = intel_vgpu_enable_page_track(spt->vgpu, spt->guest_page.gfn);
1535	if (ret)
1536		return ret;
1537
1538	trace_oos_change(spt->vgpu->id, "set page sync", oos_page->id,
1539			 spt, spt->guest_page.type);
1540
1541	list_del_init(&oos_page->vm_list);
1542	return sync_oos_page(spt->vgpu, oos_page);
1543}
1544
1545static int ppgtt_allocate_oos_page(struct intel_vgpu_ppgtt_spt *spt)
1546{
1547	struct intel_gvt *gvt = spt->vgpu->gvt;
1548	struct intel_gvt_gtt *gtt = &gvt->gtt;
1549	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1550	int ret;
1551
1552	WARN(oos_page, "shadow PPGTT page has already has a oos page\n");
1553
1554	if (list_empty(&gtt->oos_page_free_list_head)) {
1555		oos_page = container_of(gtt->oos_page_use_list_head.next,
1556			struct intel_vgpu_oos_page, list);
1557		ret = ppgtt_set_guest_page_sync(oos_page->spt);
1558		if (ret)
1559			return ret;
1560		ret = detach_oos_page(spt->vgpu, oos_page);
1561		if (ret)
1562			return ret;
1563	} else
1564		oos_page = container_of(gtt->oos_page_free_list_head.next,
1565			struct intel_vgpu_oos_page, list);
1566	return attach_oos_page(oos_page, spt);
1567}
1568
1569static int ppgtt_set_guest_page_oos(struct intel_vgpu_ppgtt_spt *spt)
1570{
1571	struct intel_vgpu_oos_page *oos_page = spt->guest_page.oos_page;
1572
1573	if (WARN(!oos_page, "shadow PPGTT page should have a oos page\n"))
1574		return -EINVAL;
1575
1576	trace_oos_change(spt->vgpu->id, "set page out of sync", oos_page->id,
1577			 spt, spt->guest_page.type);
1578
1579	list_add_tail(&oos_page->vm_list, &spt->vgpu->gtt.oos_page_list_head);
1580	return intel_vgpu_disable_page_track(spt->vgpu, spt->guest_page.gfn);
1581}
1582
1583/**
1584 * intel_vgpu_sync_oos_pages - sync all the out-of-synced shadow for vGPU
1585 * @vgpu: a vGPU
1586 *
1587 * This function is called before submitting a guest workload to host,
1588 * to sync all the out-of-synced shadow for vGPU
1589 *
1590 * Returns:
1591 * Zero on success, negative error code if failed.
1592 */
1593int intel_vgpu_sync_oos_pages(struct intel_vgpu *vgpu)
1594{
1595	struct list_head *pos, *n;
1596	struct intel_vgpu_oos_page *oos_page;
1597	int ret;
1598
1599	if (!enable_out_of_sync)
1600		return 0;
1601
1602	list_for_each_safe(pos, n, &vgpu->gtt.oos_page_list_head) {
1603		oos_page = container_of(pos,
1604				struct intel_vgpu_oos_page, vm_list);
1605		ret = ppgtt_set_guest_page_sync(oos_page->spt);
1606		if (ret)
1607			return ret;
1608	}
1609	return 0;
1610}
1611
1612/*
1613 * The heart of PPGTT shadow page table.
1614 */
1615static int ppgtt_handle_guest_write_page_table(
1616		struct intel_vgpu_ppgtt_spt *spt,
1617		struct intel_gvt_gtt_entry *we, unsigned long index)
1618{
1619	struct intel_vgpu *vgpu = spt->vgpu;
1620	int type = spt->shadow_page.type;
1621	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1622	struct intel_gvt_gtt_entry old_se;
1623	int new_present;
1624	int i, ret;
1625
1626	new_present = ops->test_present(we);
1627
1628	/*
1629	 * Adding the new entry first and then removing the old one, that can
1630	 * guarantee the ppgtt table is validated during the window between
1631	 * adding and removal.
1632	 */
1633	ppgtt_get_shadow_entry(spt, &old_se, index);
1634
1635	if (new_present) {
1636		ret = ppgtt_handle_guest_entry_add(spt, we, index);
1637		if (ret)
1638			goto fail;
1639	}
1640
1641	ret = ppgtt_handle_guest_entry_removal(spt, &old_se, index);
1642	if (ret)
1643		goto fail;
1644
1645	if (!new_present) {
1646		/* For 64KB splited entries, we need clear them all. */
1647		if (ops->test_64k_splited(&old_se) &&
1648		    !(index % GTT_64K_PTE_STRIDE)) {
1649			gvt_vdbg_mm("remove splited 64K shadow entries\n");
1650			for (i = 0; i < GTT_64K_PTE_STRIDE; i++) {
1651				ops->clear_64k_splited(&old_se);
1652				ops->set_pfn(&old_se,
1653					vgpu->gtt.scratch_pt[type].page_mfn);
1654				ppgtt_set_shadow_entry(spt, &old_se, index + i);
1655			}
1656		} else if (old_se.type == GTT_TYPE_PPGTT_PTE_2M_ENTRY ||
1657			   old_se.type == GTT_TYPE_PPGTT_PTE_1G_ENTRY) {
1658			ops->clear_pse(&old_se);
1659			ops->set_pfn(&old_se,
1660				     vgpu->gtt.scratch_pt[type].page_mfn);
1661			ppgtt_set_shadow_entry(spt, &old_se, index);
1662		} else {
1663			ops->set_pfn(&old_se,
1664				     vgpu->gtt.scratch_pt[type].page_mfn);
1665			ppgtt_set_shadow_entry(spt, &old_se, index);
1666		}
1667	}
1668
1669	return 0;
1670fail:
1671	gvt_vgpu_err("fail: shadow page %p guest entry 0x%llx type %d.\n",
1672			spt, we->val64, we->type);
1673	return ret;
1674}
1675
1676
1677
1678static inline bool can_do_out_of_sync(struct intel_vgpu_ppgtt_spt *spt)
1679{
1680	return enable_out_of_sync
1681		&& gtt_type_is_pte_pt(spt->guest_page.type)
1682		&& spt->guest_page.write_cnt >= 2;
1683}
1684
1685static void ppgtt_set_post_shadow(struct intel_vgpu_ppgtt_spt *spt,
1686		unsigned long index)
1687{
1688	set_bit(index, spt->post_shadow_bitmap);
1689	if (!list_empty(&spt->post_shadow_list))
1690		return;
1691
1692	list_add_tail(&spt->post_shadow_list,
1693			&spt->vgpu->gtt.post_shadow_list_head);
1694}
1695
1696/**
1697 * intel_vgpu_flush_post_shadow - flush the post shadow transactions
1698 * @vgpu: a vGPU
1699 *
1700 * This function is called before submitting a guest workload to host,
1701 * to flush all the post shadows for a vGPU.
1702 *
1703 * Returns:
1704 * Zero on success, negative error code if failed.
1705 */
1706int intel_vgpu_flush_post_shadow(struct intel_vgpu *vgpu)
1707{
1708	struct list_head *pos, *n;
1709	struct intel_vgpu_ppgtt_spt *spt;
1710	struct intel_gvt_gtt_entry ge;
1711	unsigned long index;
1712	int ret;
1713
1714	list_for_each_safe(pos, n, &vgpu->gtt.post_shadow_list_head) {
1715		spt = container_of(pos, struct intel_vgpu_ppgtt_spt,
1716				post_shadow_list);
1717
1718		for_each_set_bit(index, spt->post_shadow_bitmap,
1719				GTT_ENTRY_NUM_IN_ONE_PAGE) {
1720			ppgtt_get_guest_entry(spt, &ge, index);
1721
1722			ret = ppgtt_handle_guest_write_page_table(spt,
1723							&ge, index);
1724			if (ret)
1725				return ret;
1726			clear_bit(index, spt->post_shadow_bitmap);
1727		}
1728		list_del_init(&spt->post_shadow_list);
1729	}
1730	return 0;
1731}
1732
1733static int ppgtt_handle_guest_write_page_table_bytes(
1734		struct intel_vgpu_ppgtt_spt *spt,
1735		u64 pa, void *p_data, int bytes)
1736{
1737	struct intel_vgpu *vgpu = spt->vgpu;
1738	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
1739	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
1740	struct intel_gvt_gtt_entry we, se;
1741	unsigned long index;
1742	int ret;
1743
1744	index = (pa & (PAGE_SIZE - 1)) >> info->gtt_entry_size_shift;
1745
1746	ppgtt_get_guest_entry(spt, &we, index);
1747
1748	/*
1749	 * For page table which has 64K gtt entry, only PTE#0, PTE#16,
1750	 * PTE#32, ... PTE#496 are used. Unused PTEs update should be
1751	 * ignored.
1752	 */
1753	if (we.type == GTT_TYPE_PPGTT_PTE_64K_ENTRY &&
1754	    (index % GTT_64K_PTE_STRIDE)) {
1755		gvt_vdbg_mm("Ignore write to unused PTE entry, index %lu\n",
1756			    index);
1757		return 0;
1758	}
1759
1760	if (bytes == info->gtt_entry_size) {
1761		ret = ppgtt_handle_guest_write_page_table(spt, &we, index);
1762		if (ret)
1763			return ret;
1764	} else {
1765		if (!test_bit(index, spt->post_shadow_bitmap)) {
1766			int type = spt->shadow_page.type;
1767
1768			ppgtt_get_shadow_entry(spt, &se, index);
1769			ret = ppgtt_handle_guest_entry_removal(spt, &se, index);
1770			if (ret)
1771				return ret;
1772			ops->set_pfn(&se, vgpu->gtt.scratch_pt[type].page_mfn);
1773			ppgtt_set_shadow_entry(spt, &se, index);
1774		}
1775		ppgtt_set_post_shadow(spt, index);
1776	}
1777
1778	if (!enable_out_of_sync)
1779		return 0;
1780
1781	spt->guest_page.write_cnt++;
1782
1783	if (spt->guest_page.oos_page)
1784		ops->set_entry(spt->guest_page.oos_page->mem, &we, index,
1785				false, 0, vgpu);
1786
1787	if (can_do_out_of_sync(spt)) {
1788		if (!spt->guest_page.oos_page)
1789			ppgtt_allocate_oos_page(spt);
1790
1791		ret = ppgtt_set_guest_page_oos(spt);
1792		if (ret < 0)
1793			return ret;
1794	}
1795	return 0;
1796}
1797
1798static void invalidate_ppgtt_mm(struct intel_vgpu_mm *mm)
1799{
1800	struct intel_vgpu *vgpu = mm->vgpu;
1801	struct intel_gvt *gvt = vgpu->gvt;
1802	struct intel_gvt_gtt *gtt = &gvt->gtt;
1803	struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1804	struct intel_gvt_gtt_entry se;
1805	int index;
1806
1807	if (!mm->ppgtt_mm.shadowed)
1808		return;
1809
1810	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.shadow_pdps); index++) {
1811		ppgtt_get_shadow_root_entry(mm, &se, index);
1812
1813		if (!ops->test_present(&se))
1814			continue;
1815
1816		ppgtt_invalidate_spt_by_shadow_entry(vgpu, &se);
1817		se.val64 = 0;
1818		ppgtt_set_shadow_root_entry(mm, &se, index);
1819
1820		trace_spt_guest_change(vgpu->id, "destroy root pointer",
1821				       NULL, se.type, se.val64, index);
1822	}
1823
1824	mm->ppgtt_mm.shadowed = false;
1825}
1826
1827
1828static int shadow_ppgtt_mm(struct intel_vgpu_mm *mm)
1829{
1830	struct intel_vgpu *vgpu = mm->vgpu;
1831	struct intel_gvt *gvt = vgpu->gvt;
1832	struct intel_gvt_gtt *gtt = &gvt->gtt;
1833	struct intel_gvt_gtt_pte_ops *ops = gtt->pte_ops;
1834	struct intel_vgpu_ppgtt_spt *spt;
1835	struct intel_gvt_gtt_entry ge, se;
1836	int index, ret;
1837
1838	if (mm->ppgtt_mm.shadowed)
1839		return 0;
1840
1841	mm->ppgtt_mm.shadowed = true;
1842
1843	for (index = 0; index < ARRAY_SIZE(mm->ppgtt_mm.guest_pdps); index++) {
1844		ppgtt_get_guest_root_entry(mm, &ge, index);
1845
1846		if (!ops->test_present(&ge))
1847			continue;
1848
1849		trace_spt_guest_change(vgpu->id, __func__, NULL,
1850				       ge.type, ge.val64, index);
1851
1852		spt = ppgtt_populate_spt_by_guest_entry(vgpu, &ge);
1853		if (IS_ERR(spt)) {
1854			gvt_vgpu_err("fail to populate guest root pointer\n");
1855			ret = PTR_ERR(spt);
1856			goto fail;
1857		}
1858		ppgtt_generate_shadow_entry(&se, spt, &ge);
1859		ppgtt_set_shadow_root_entry(mm, &se, index);
1860
1861		trace_spt_guest_change(vgpu->id, "populate root pointer",
1862				       NULL, se.type, se.val64, index);
1863	}
1864
1865	return 0;
1866fail:
1867	invalidate_ppgtt_mm(mm);
1868	return ret;
1869}
1870
1871static struct intel_vgpu_mm *vgpu_alloc_mm(struct intel_vgpu *vgpu)
1872{
1873	struct intel_vgpu_mm *mm;
1874
1875	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
1876	if (!mm)
1877		return NULL;
1878
1879	mm->vgpu = vgpu;
1880	kref_init(&mm->ref);
1881	atomic_set(&mm->pincount, 0);
1882
1883	return mm;
1884}
1885
1886static void vgpu_free_mm(struct intel_vgpu_mm *mm)
1887{
1888	kfree(mm);
1889}
1890
1891/**
1892 * intel_vgpu_create_ppgtt_mm - create a ppgtt mm object for a vGPU
1893 * @vgpu: a vGPU
1894 * @root_entry_type: ppgtt root entry type
1895 * @pdps: guest pdps.
1896 *
1897 * This function is used to create a ppgtt mm object for a vGPU.
1898 *
1899 * Returns:
1900 * Zero on success, negative error code in pointer if failed.
1901 */
1902struct intel_vgpu_mm *intel_vgpu_create_ppgtt_mm(struct intel_vgpu *vgpu,
1903		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
1904{
1905	struct intel_gvt *gvt = vgpu->gvt;
1906	struct intel_vgpu_mm *mm;
1907	int ret;
1908
1909	mm = vgpu_alloc_mm(vgpu);
1910	if (!mm)
1911		return ERR_PTR(-ENOMEM);
1912
1913	mm->type = INTEL_GVT_MM_PPGTT;
1914
1915	GEM_BUG_ON(root_entry_type != GTT_TYPE_PPGTT_ROOT_L3_ENTRY &&
1916		   root_entry_type != GTT_TYPE_PPGTT_ROOT_L4_ENTRY);
1917	mm->ppgtt_mm.root_entry_type = root_entry_type;
1918
1919	INIT_LIST_HEAD(&mm->ppgtt_mm.list);
1920	INIT_LIST_HEAD(&mm->ppgtt_mm.lru_list);
1921	INIT_LIST_HEAD(&mm->ppgtt_mm.link);
1922
1923	if (root_entry_type == GTT_TYPE_PPGTT_ROOT_L4_ENTRY)
1924		mm->ppgtt_mm.guest_pdps[0] = pdps[0];
1925	else
1926		memcpy(mm->ppgtt_mm.guest_pdps, pdps,
1927		       sizeof(mm->ppgtt_mm.guest_pdps));
1928
1929	ret = shadow_ppgtt_mm(mm);
1930	if (ret) {
1931		gvt_vgpu_err("failed to shadow ppgtt mm\n");
1932		vgpu_free_mm(mm);
1933		return ERR_PTR(ret);
1934	}
1935
1936	list_add_tail(&mm->ppgtt_mm.list, &vgpu->gtt.ppgtt_mm_list_head);
1937
1938	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
1939	list_add_tail(&mm->ppgtt_mm.lru_list, &gvt->gtt.ppgtt_mm_lru_list_head);
1940	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
1941
1942	return mm;
1943}
1944
1945static struct intel_vgpu_mm *intel_vgpu_create_ggtt_mm(struct intel_vgpu *vgpu)
1946{
1947	struct intel_vgpu_mm *mm;
1948	unsigned long nr_entries;
1949
1950	mm = vgpu_alloc_mm(vgpu);
1951	if (!mm)
1952		return ERR_PTR(-ENOMEM);
1953
1954	mm->type = INTEL_GVT_MM_GGTT;
1955
1956	nr_entries = gvt_ggtt_gm_sz(vgpu->gvt) >> I915_GTT_PAGE_SHIFT;
1957	mm->ggtt_mm.virtual_ggtt =
1958		vzalloc(array_size(nr_entries,
1959				   vgpu->gvt->device_info.gtt_entry_size));
1960	if (!mm->ggtt_mm.virtual_ggtt) {
1961		vgpu_free_mm(mm);
1962		return ERR_PTR(-ENOMEM);
1963	}
1964
1965	mm->ggtt_mm.host_ggtt_aperture = vzalloc((vgpu_aperture_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1966	if (!mm->ggtt_mm.host_ggtt_aperture) {
1967		vfree(mm->ggtt_mm.virtual_ggtt);
1968		vgpu_free_mm(mm);
1969		return ERR_PTR(-ENOMEM);
1970	}
1971
1972	mm->ggtt_mm.host_ggtt_hidden = vzalloc((vgpu_hidden_sz(vgpu) >> PAGE_SHIFT) * sizeof(u64));
1973	if (!mm->ggtt_mm.host_ggtt_hidden) {
1974		vfree(mm->ggtt_mm.host_ggtt_aperture);
1975		vfree(mm->ggtt_mm.virtual_ggtt);
1976		vgpu_free_mm(mm);
1977		return ERR_PTR(-ENOMEM);
1978	}
1979
1980	return mm;
1981}
1982
1983/**
1984 * _intel_vgpu_mm_release - destroy a mm object
1985 * @mm_ref: a kref object
1986 *
1987 * This function is used to destroy a mm object for vGPU
1988 *
1989 */
1990void _intel_vgpu_mm_release(struct kref *mm_ref)
1991{
1992	struct intel_vgpu_mm *mm = container_of(mm_ref, typeof(*mm), ref);
1993
1994	if (GEM_WARN_ON(atomic_read(&mm->pincount)))
1995		gvt_err("vgpu mm pin count bug detected\n");
1996
1997	if (mm->type == INTEL_GVT_MM_PPGTT) {
1998		list_del(&mm->ppgtt_mm.list);
1999
2000		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2001		list_del(&mm->ppgtt_mm.lru_list);
2002		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2003
2004		invalidate_ppgtt_mm(mm);
2005	} else {
2006		vfree(mm->ggtt_mm.virtual_ggtt);
2007		vfree(mm->ggtt_mm.host_ggtt_aperture);
2008		vfree(mm->ggtt_mm.host_ggtt_hidden);
2009	}
2010
2011	vgpu_free_mm(mm);
2012}
2013
2014/**
2015 * intel_vgpu_unpin_mm - decrease the pin count of a vGPU mm object
2016 * @mm: a vGPU mm object
2017 *
2018 * This function is called when user doesn't want to use a vGPU mm object
2019 */
2020void intel_vgpu_unpin_mm(struct intel_vgpu_mm *mm)
2021{
2022	atomic_dec_if_positive(&mm->pincount);
2023}
2024
2025/**
2026 * intel_vgpu_pin_mm - increase the pin count of a vGPU mm object
2027 * @mm: target vgpu mm
2028 *
2029 * This function is called when user wants to use a vGPU mm object. If this
2030 * mm object hasn't been shadowed yet, the shadow will be populated at this
2031 * time.
2032 *
2033 * Returns:
2034 * Zero on success, negative error code if failed.
2035 */
2036int intel_vgpu_pin_mm(struct intel_vgpu_mm *mm)
2037{
2038	int ret;
2039
2040	atomic_inc(&mm->pincount);
2041
2042	if (mm->type == INTEL_GVT_MM_PPGTT) {
2043		ret = shadow_ppgtt_mm(mm);
2044		if (ret)
2045			return ret;
2046
2047		mutex_lock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2048		list_move_tail(&mm->ppgtt_mm.lru_list,
2049			       &mm->vgpu->gvt->gtt.ppgtt_mm_lru_list_head);
2050		mutex_unlock(&mm->vgpu->gvt->gtt.ppgtt_mm_lock);
2051	}
2052
2053	return 0;
2054}
2055
2056static int reclaim_one_ppgtt_mm(struct intel_gvt *gvt)
2057{
2058	struct intel_vgpu_mm *mm;
2059	struct list_head *pos, *n;
2060
2061	mutex_lock(&gvt->gtt.ppgtt_mm_lock);
2062
2063	list_for_each_safe(pos, n, &gvt->gtt.ppgtt_mm_lru_list_head) {
2064		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.lru_list);
2065
2066		if (atomic_read(&mm->pincount))
2067			continue;
2068
2069		list_del_init(&mm->ppgtt_mm.lru_list);
2070		mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2071		invalidate_ppgtt_mm(mm);
2072		return 1;
2073	}
2074	mutex_unlock(&gvt->gtt.ppgtt_mm_lock);
2075	return 0;
2076}
2077
2078/*
2079 * GMA translation APIs.
2080 */
2081static inline int ppgtt_get_next_level_entry(struct intel_vgpu_mm *mm,
2082		struct intel_gvt_gtt_entry *e, unsigned long index, bool guest)
2083{
2084	struct intel_vgpu *vgpu = mm->vgpu;
2085	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2086	struct intel_vgpu_ppgtt_spt *s;
2087
2088	s = intel_vgpu_find_spt_by_mfn(vgpu, ops->get_pfn(e));
2089	if (!s)
2090		return -ENXIO;
2091
2092	if (!guest)
2093		ppgtt_get_shadow_entry(s, e, index);
2094	else
2095		ppgtt_get_guest_entry(s, e, index);
2096	return 0;
2097}
2098
2099/**
2100 * intel_vgpu_gma_to_gpa - translate a gma to GPA
2101 * @mm: mm object. could be a PPGTT or GGTT mm object
2102 * @gma: graphics memory address in this mm object
2103 *
2104 * This function is used to translate a graphics memory address in specific
2105 * graphics memory space to guest physical address.
2106 *
2107 * Returns:
2108 * Guest physical address on success, INTEL_GVT_INVALID_ADDR if failed.
2109 */
2110unsigned long intel_vgpu_gma_to_gpa(struct intel_vgpu_mm *mm, unsigned long gma)
2111{
2112	struct intel_vgpu *vgpu = mm->vgpu;
2113	struct intel_gvt *gvt = vgpu->gvt;
2114	struct intel_gvt_gtt_pte_ops *pte_ops = gvt->gtt.pte_ops;
2115	struct intel_gvt_gtt_gma_ops *gma_ops = gvt->gtt.gma_ops;
2116	unsigned long gpa = INTEL_GVT_INVALID_ADDR;
2117	unsigned long gma_index[4];
2118	struct intel_gvt_gtt_entry e;
2119	int i, levels = 0;
2120	int ret;
2121
2122	GEM_BUG_ON(mm->type != INTEL_GVT_MM_GGTT &&
2123		   mm->type != INTEL_GVT_MM_PPGTT);
2124
2125	if (mm->type == INTEL_GVT_MM_GGTT) {
2126		if (!vgpu_gmadr_is_valid(vgpu, gma))
2127			goto err;
2128
2129		ggtt_get_guest_entry(mm, &e,
2130			gma_ops->gma_to_ggtt_pte_index(gma));
2131
2132		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT)
2133			+ (gma & ~I915_GTT_PAGE_MASK);
2134
2135		trace_gma_translate(vgpu->id, "ggtt", 0, 0, gma, gpa);
2136	} else {
2137		switch (mm->ppgtt_mm.root_entry_type) {
2138		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2139			ppgtt_get_shadow_root_entry(mm, &e, 0);
2140
2141			gma_index[0] = gma_ops->gma_to_pml4_index(gma);
2142			gma_index[1] = gma_ops->gma_to_l4_pdp_index(gma);
2143			gma_index[2] = gma_ops->gma_to_pde_index(gma);
2144			gma_index[3] = gma_ops->gma_to_pte_index(gma);
2145			levels = 4;
2146			break;
2147		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2148			ppgtt_get_shadow_root_entry(mm, &e,
2149					gma_ops->gma_to_l3_pdp_index(gma));
2150
2151			gma_index[0] = gma_ops->gma_to_pde_index(gma);
2152			gma_index[1] = gma_ops->gma_to_pte_index(gma);
2153			levels = 2;
2154			break;
2155		default:
2156			GEM_BUG_ON(1);
2157		}
2158
2159		/* walk the shadow page table and get gpa from guest entry */
2160		for (i = 0; i < levels; i++) {
2161			ret = ppgtt_get_next_level_entry(mm, &e, gma_index[i],
2162				(i == levels - 1));
2163			if (ret)
2164				goto err;
2165
2166			if (!pte_ops->test_present(&e)) {
2167				gvt_dbg_core("GMA 0x%lx is not present\n", gma);
2168				goto err;
2169			}
2170		}
2171
2172		gpa = (pte_ops->get_pfn(&e) << I915_GTT_PAGE_SHIFT) +
2173					(gma & ~I915_GTT_PAGE_MASK);
2174		trace_gma_translate(vgpu->id, "ppgtt", 0,
2175				    mm->ppgtt_mm.root_entry_type, gma, gpa);
2176	}
2177
2178	return gpa;
2179err:
2180	gvt_vgpu_err("invalid mm type: %d gma %lx\n", mm->type, gma);
2181	return INTEL_GVT_INVALID_ADDR;
2182}
2183
2184static int emulate_ggtt_mmio_read(struct intel_vgpu *vgpu,
2185	unsigned int off, void *p_data, unsigned int bytes)
2186{
2187	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2188	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2189	unsigned long index = off >> info->gtt_entry_size_shift;
2190	unsigned long gma;
2191	struct intel_gvt_gtt_entry e;
2192
2193	if (bytes != 4 && bytes != 8)
2194		return -EINVAL;
2195
2196	gma = index << I915_GTT_PAGE_SHIFT;
2197	if (!intel_gvt_ggtt_validate_range(vgpu,
2198					   gma, 1 << I915_GTT_PAGE_SHIFT)) {
2199		gvt_dbg_mm("read invalid ggtt at 0x%lx\n", gma);
2200		memset(p_data, 0, bytes);
2201		return 0;
2202	}
2203
2204	ggtt_get_guest_entry(ggtt_mm, &e, index);
2205	memcpy(p_data, (void *)&e.val64 + (off & (info->gtt_entry_size - 1)),
2206			bytes);
2207	return 0;
2208}
2209
2210/**
2211 * intel_vgpu_emulate_gtt_mmio_read - emulate GTT MMIO register read
2212 * @vgpu: a vGPU
2213 * @off: register offset
2214 * @p_data: data will be returned to guest
2215 * @bytes: data length
2216 *
2217 * This function is used to emulate the GTT MMIO register read
2218 *
2219 * Returns:
2220 * Zero on success, error code if failed.
2221 */
2222int intel_vgpu_emulate_ggtt_mmio_read(struct intel_vgpu *vgpu, unsigned int off,
2223	void *p_data, unsigned int bytes)
2224{
2225	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2226	int ret;
2227
2228	if (bytes != 4 && bytes != 8)
2229		return -EINVAL;
2230
2231	off -= info->gtt_start_offset;
2232	ret = emulate_ggtt_mmio_read(vgpu, off, p_data, bytes);
2233	return ret;
2234}
2235
2236static void ggtt_invalidate_pte(struct intel_vgpu *vgpu,
2237		struct intel_gvt_gtt_entry *entry)
2238{
2239	struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2240	unsigned long pfn;
2241
2242	pfn = pte_ops->get_pfn(entry);
2243	if (pfn != vgpu->gvt->gtt.scratch_mfn)
2244		intel_gvt_hypervisor_dma_unmap_guest_page(vgpu,
2245						pfn << PAGE_SHIFT);
2246}
2247
2248static int emulate_ggtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
2249	void *p_data, unsigned int bytes)
2250{
2251	struct intel_gvt *gvt = vgpu->gvt;
2252	const struct intel_gvt_device_info *info = &gvt->device_info;
2253	struct intel_vgpu_mm *ggtt_mm = vgpu->gtt.ggtt_mm;
2254	struct intel_gvt_gtt_pte_ops *ops = gvt->gtt.pte_ops;
2255	unsigned long g_gtt_index = off >> info->gtt_entry_size_shift;
2256	unsigned long gma, gfn;
2257	struct intel_gvt_gtt_entry e = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2258	struct intel_gvt_gtt_entry m = {.val64 = 0, .type = GTT_TYPE_GGTT_PTE};
2259	dma_addr_t dma_addr;
2260	int ret;
2261	struct intel_gvt_partial_pte *partial_pte, *pos, *n;
2262	bool partial_update = false;
2263
2264	if (bytes != 4 && bytes != 8)
2265		return -EINVAL;
2266
2267	gma = g_gtt_index << I915_GTT_PAGE_SHIFT;
2268
2269	/* the VM may configure the whole GM space when ballooning is used */
2270	if (!vgpu_gmadr_is_valid(vgpu, gma))
2271		return 0;
2272
2273	e.type = GTT_TYPE_GGTT_PTE;
2274	memcpy((void *)&e.val64 + (off & (info->gtt_entry_size - 1)), p_data,
2275			bytes);
2276
2277	/* If ggtt entry size is 8 bytes, and it's split into two 4 bytes
2278	 * write, save the first 4 bytes in a list and update virtual
2279	 * PTE. Only update shadow PTE when the second 4 bytes comes.
2280	 */
2281	if (bytes < info->gtt_entry_size) {
2282		bool found = false;
2283
2284		list_for_each_entry_safe(pos, n,
2285				&ggtt_mm->ggtt_mm.partial_pte_list, list) {
2286			if (g_gtt_index == pos->offset >>
2287					info->gtt_entry_size_shift) {
2288				if (off != pos->offset) {
2289					/* the second partial part*/
2290					int last_off = pos->offset &
2291						(info->gtt_entry_size - 1);
2292
2293					memcpy((void *)&e.val64 + last_off,
2294						(void *)&pos->data + last_off,
2295						bytes);
2296
2297					list_del(&pos->list);
2298					kfree(pos);
2299					found = true;
2300					break;
2301				}
2302
2303				/* update of the first partial part */
2304				pos->data = e.val64;
2305				ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2306				return 0;
2307			}
2308		}
2309
2310		if (!found) {
2311			/* the first partial part */
2312			partial_pte = kzalloc(sizeof(*partial_pte), GFP_KERNEL);
2313			if (!partial_pte)
2314				return -ENOMEM;
2315			partial_pte->offset = off;
2316			partial_pte->data = e.val64;
2317			list_add_tail(&partial_pte->list,
2318				&ggtt_mm->ggtt_mm.partial_pte_list);
2319			partial_update = true;
2320		}
2321	}
2322
2323	if (!partial_update && (ops->test_present(&e))) {
2324		gfn = ops->get_pfn(&e);
2325		m.val64 = e.val64;
2326		m.type = e.type;
2327
2328		/* one PTE update may be issued in multiple writes and the
2329		 * first write may not construct a valid gfn
2330		 */
2331		if (!intel_gvt_hypervisor_is_valid_gfn(vgpu, gfn)) {
2332			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2333			goto out;
2334		}
2335
2336		ret = intel_gvt_hypervisor_dma_map_guest_page(vgpu, gfn,
2337							PAGE_SIZE, &dma_addr);
2338		if (ret) {
2339			gvt_vgpu_err("fail to populate guest ggtt entry\n");
2340			/* guest driver may read/write the entry when partial
2341			 * update the entry in this situation p2m will fail
2342			 * settting the shadow entry to point to a scratch page
2343			 */
2344			ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2345		} else
2346			ops->set_pfn(&m, dma_addr >> PAGE_SHIFT);
2347	} else {
2348		ops->set_pfn(&m, gvt->gtt.scratch_mfn);
2349		ops->clear_present(&m);
2350	}
2351
2352out:
2353	ggtt_set_guest_entry(ggtt_mm, &e, g_gtt_index);
2354
2355	ggtt_get_host_entry(ggtt_mm, &e, g_gtt_index);
2356	ggtt_invalidate_pte(vgpu, &e);
2357
2358	ggtt_set_host_entry(ggtt_mm, &m, g_gtt_index);
2359	ggtt_invalidate(gvt->gt);
2360	return 0;
2361}
2362
2363/*
2364 * intel_vgpu_emulate_ggtt_mmio_write - emulate GTT MMIO register write
2365 * @vgpu: a vGPU
2366 * @off: register offset
2367 * @p_data: data from guest write
2368 * @bytes: data length
2369 *
2370 * This function is used to emulate the GTT MMIO register write
2371 *
2372 * Returns:
2373 * Zero on success, error code if failed.
2374 */
2375int intel_vgpu_emulate_ggtt_mmio_write(struct intel_vgpu *vgpu,
2376		unsigned int off, void *p_data, unsigned int bytes)
2377{
2378	const struct intel_gvt_device_info *info = &vgpu->gvt->device_info;
2379	int ret;
2380	struct intel_vgpu_submission *s = &vgpu->submission;
2381	struct intel_engine_cs *engine;
2382	int i;
2383
2384	if (bytes != 4 && bytes != 8)
2385		return -EINVAL;
2386
2387	off -= info->gtt_start_offset;
2388	ret = emulate_ggtt_mmio_write(vgpu, off, p_data, bytes);
2389
2390	/* if ggtt of last submitted context is written,
2391	 * that context is probably got unpinned.
2392	 * Set last shadowed ctx to invalid.
2393	 */
2394	for_each_engine(engine, vgpu->gvt->gt, i) {
2395		if (!s->last_ctx[i].valid)
2396			continue;
2397
2398		if (s->last_ctx[i].lrca == (off >> info->gtt_entry_size_shift))
2399			s->last_ctx[i].valid = false;
2400	}
2401	return ret;
2402}
2403
2404static int alloc_scratch_pages(struct intel_vgpu *vgpu,
2405		enum intel_gvt_gtt_type type)
2406{
2407	struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
2408	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2409	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
2410	int page_entry_num = I915_GTT_PAGE_SIZE >>
2411				vgpu->gvt->device_info.gtt_entry_size_shift;
2412	void *scratch_pt;
2413	int i;
2414	struct device *dev = &vgpu->gvt->gt->i915->drm.pdev->dev;
2415	dma_addr_t daddr;
2416
2417	if (drm_WARN_ON(&i915->drm,
2418			type < GTT_TYPE_PPGTT_PTE_PT || type >= GTT_TYPE_MAX))
2419		return -EINVAL;
2420
2421	scratch_pt = (void *)get_zeroed_page(GFP_KERNEL);
2422	if (!scratch_pt) {
2423		gvt_vgpu_err("fail to allocate scratch page\n");
2424		return -ENOMEM;
2425	}
2426
2427	daddr = dma_map_page(dev, virt_to_page(scratch_pt), 0,
2428			4096, PCI_DMA_BIDIRECTIONAL);
2429	if (dma_mapping_error(dev, daddr)) {
2430		gvt_vgpu_err("fail to dmamap scratch_pt\n");
2431		__free_page(virt_to_page(scratch_pt));
2432		return -ENOMEM;
2433	}
2434	gtt->scratch_pt[type].page_mfn =
2435		(unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2436	gtt->scratch_pt[type].page = virt_to_page(scratch_pt);
2437	gvt_dbg_mm("vgpu%d create scratch_pt: type %d mfn=0x%lx\n",
2438			vgpu->id, type, gtt->scratch_pt[type].page_mfn);
2439
2440	/* Build the tree by full filled the scratch pt with the entries which
2441	 * point to the next level scratch pt or scratch page. The
2442	 * scratch_pt[type] indicate the scratch pt/scratch page used by the
2443	 * 'type' pt.
2444	 * e.g. scratch_pt[GTT_TYPE_PPGTT_PDE_PT] is used by
2445	 * GTT_TYPE_PPGTT_PDE_PT level pt, that means this scratch_pt it self
2446	 * is GTT_TYPE_PPGTT_PTE_PT, and full filled by scratch page mfn.
2447	 */
2448	if (type > GTT_TYPE_PPGTT_PTE_PT) {
2449		struct intel_gvt_gtt_entry se;
2450
2451		memset(&se, 0, sizeof(struct intel_gvt_gtt_entry));
2452		se.type = get_entry_type(type - 1);
2453		ops->set_pfn(&se, gtt->scratch_pt[type - 1].page_mfn);
2454
2455		/* The entry parameters like present/writeable/cache type
2456		 * set to the same as i915's scratch page tree.
2457		 */
2458		se.val64 |= _PAGE_PRESENT | _PAGE_RW;
2459		if (type == GTT_TYPE_PPGTT_PDE_PT)
2460			se.val64 |= PPAT_CACHED;
2461
2462		for (i = 0; i < page_entry_num; i++)
2463			ops->set_entry(scratch_pt, &se, i, false, 0, vgpu);
2464	}
2465
2466	return 0;
2467}
2468
2469static int release_scratch_page_tree(struct intel_vgpu *vgpu)
2470{
2471	int i;
2472	struct device *dev = &vgpu->gvt->gt->i915->drm.pdev->dev;
2473	dma_addr_t daddr;
2474
2475	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2476		if (vgpu->gtt.scratch_pt[i].page != NULL) {
2477			daddr = (dma_addr_t)(vgpu->gtt.scratch_pt[i].page_mfn <<
2478					I915_GTT_PAGE_SHIFT);
2479			dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2480			__free_page(vgpu->gtt.scratch_pt[i].page);
2481			vgpu->gtt.scratch_pt[i].page = NULL;
2482			vgpu->gtt.scratch_pt[i].page_mfn = 0;
2483		}
2484	}
2485
2486	return 0;
2487}
2488
2489static int create_scratch_page_tree(struct intel_vgpu *vgpu)
2490{
2491	int i, ret;
2492
2493	for (i = GTT_TYPE_PPGTT_PTE_PT; i < GTT_TYPE_MAX; i++) {
2494		ret = alloc_scratch_pages(vgpu, i);
2495		if (ret)
2496			goto err;
2497	}
2498
2499	return 0;
2500
2501err:
2502	release_scratch_page_tree(vgpu);
2503	return ret;
2504}
2505
2506/**
2507 * intel_vgpu_init_gtt - initialize per-vGPU graphics memory virulization
2508 * @vgpu: a vGPU
2509 *
2510 * This function is used to initialize per-vGPU graphics memory virtualization
2511 * components.
2512 *
2513 * Returns:
2514 * Zero on success, error code if failed.
2515 */
2516int intel_vgpu_init_gtt(struct intel_vgpu *vgpu)
2517{
2518	struct intel_vgpu_gtt *gtt = &vgpu->gtt;
2519
2520	INIT_RADIX_TREE(&gtt->spt_tree, GFP_KERNEL);
2521
2522	INIT_LIST_HEAD(&gtt->ppgtt_mm_list_head);
2523	INIT_LIST_HEAD(&gtt->oos_page_list_head);
2524	INIT_LIST_HEAD(&gtt->post_shadow_list_head);
2525
2526	gtt->ggtt_mm = intel_vgpu_create_ggtt_mm(vgpu);
2527	if (IS_ERR(gtt->ggtt_mm)) {
2528		gvt_vgpu_err("fail to create mm for ggtt.\n");
2529		return PTR_ERR(gtt->ggtt_mm);
2530	}
2531
2532	intel_vgpu_reset_ggtt(vgpu, false);
2533
2534	INIT_LIST_HEAD(&gtt->ggtt_mm->ggtt_mm.partial_pte_list);
2535
2536	return create_scratch_page_tree(vgpu);
2537}
2538
2539void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
2540{
2541	struct list_head *pos, *n;
2542	struct intel_vgpu_mm *mm;
2543
2544	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2545		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2546		intel_vgpu_destroy_mm(mm);
2547	}
2548
2549	if (GEM_WARN_ON(!list_empty(&vgpu->gtt.ppgtt_mm_list_head)))
2550		gvt_err("vgpu ppgtt mm is not fully destroyed\n");
2551
2552	if (GEM_WARN_ON(!radix_tree_empty(&vgpu->gtt.spt_tree))) {
2553		gvt_err("Why we still has spt not freed?\n");
2554		ppgtt_free_all_spt(vgpu);
2555	}
2556}
2557
2558static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
2559{
2560	struct intel_gvt_partial_pte *pos, *next;
2561
2562	list_for_each_entry_safe(pos, next,
2563				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
2564				 list) {
2565		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
2566			pos->offset, pos->data);
2567		kfree(pos);
2568	}
2569	intel_vgpu_destroy_mm(vgpu->gtt.ggtt_mm);
2570	vgpu->gtt.ggtt_mm = NULL;
2571}
2572
2573/**
2574 * intel_vgpu_clean_gtt - clean up per-vGPU graphics memory virulization
2575 * @vgpu: a vGPU
2576 *
2577 * This function is used to clean up per-vGPU graphics memory virtualization
2578 * components.
2579 *
2580 * Returns:
2581 * Zero on success, error code if failed.
2582 */
2583void intel_vgpu_clean_gtt(struct intel_vgpu *vgpu)
2584{
2585	intel_vgpu_destroy_all_ppgtt_mm(vgpu);
2586	intel_vgpu_destroy_ggtt_mm(vgpu);
2587	release_scratch_page_tree(vgpu);
2588}
2589
2590static void clean_spt_oos(struct intel_gvt *gvt)
2591{
2592	struct intel_gvt_gtt *gtt = &gvt->gtt;
2593	struct list_head *pos, *n;
2594	struct intel_vgpu_oos_page *oos_page;
2595
2596	WARN(!list_empty(&gtt->oos_page_use_list_head),
2597		"someone is still using oos page\n");
2598
2599	list_for_each_safe(pos, n, &gtt->oos_page_free_list_head) {
2600		oos_page = container_of(pos, struct intel_vgpu_oos_page, list);
2601		list_del(&oos_page->list);
2602		free_page((unsigned long)oos_page->mem);
2603		kfree(oos_page);
2604	}
2605}
2606
2607static int setup_spt_oos(struct intel_gvt *gvt)
2608{
2609	struct intel_gvt_gtt *gtt = &gvt->gtt;
2610	struct intel_vgpu_oos_page *oos_page;
2611	int i;
2612	int ret;
2613
2614	INIT_LIST_HEAD(&gtt->oos_page_free_list_head);
2615	INIT_LIST_HEAD(&gtt->oos_page_use_list_head);
2616
2617	for (i = 0; i < preallocated_oos_pages; i++) {
2618		oos_page = kzalloc(sizeof(*oos_page), GFP_KERNEL);
2619		if (!oos_page) {
2620			ret = -ENOMEM;
2621			goto fail;
2622		}
2623		oos_page->mem = (void *)__get_free_pages(GFP_KERNEL, 0);
2624		if (!oos_page->mem) {
2625			ret = -ENOMEM;
2626			kfree(oos_page);
2627			goto fail;
2628		}
2629
2630		INIT_LIST_HEAD(&oos_page->list);
2631		INIT_LIST_HEAD(&oos_page->vm_list);
2632		oos_page->id = i;
2633		list_add_tail(&oos_page->list, &gtt->oos_page_free_list_head);
2634	}
2635
2636	gvt_dbg_mm("%d oos pages preallocated\n", i);
2637
2638	return 0;
2639fail:
2640	clean_spt_oos(gvt);
2641	return ret;
2642}
2643
2644/**
2645 * intel_vgpu_find_ppgtt_mm - find a PPGTT mm object
2646 * @vgpu: a vGPU
2647 * @pdps: pdp root array
2648 *
2649 * This function is used to find a PPGTT mm object from mm object pool
2650 *
2651 * Returns:
2652 * pointer to mm object on success, NULL if failed.
2653 */
2654struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
2655		u64 pdps[])
2656{
2657	struct intel_vgpu_mm *mm;
2658	struct list_head *pos;
2659
2660	list_for_each(pos, &vgpu->gtt.ppgtt_mm_list_head) {
2661		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2662
2663		switch (mm->ppgtt_mm.root_entry_type) {
2664		case GTT_TYPE_PPGTT_ROOT_L4_ENTRY:
2665			if (pdps[0] == mm->ppgtt_mm.guest_pdps[0])
2666				return mm;
2667			break;
2668		case GTT_TYPE_PPGTT_ROOT_L3_ENTRY:
2669			if (!memcmp(pdps, mm->ppgtt_mm.guest_pdps,
2670				    sizeof(mm->ppgtt_mm.guest_pdps)))
2671				return mm;
2672			break;
2673		default:
2674			GEM_BUG_ON(1);
2675		}
2676	}
2677	return NULL;
2678}
2679
2680/**
2681 * intel_vgpu_get_ppgtt_mm - get or create a PPGTT mm object.
2682 * @vgpu: a vGPU
2683 * @root_entry_type: ppgtt root entry type
2684 * @pdps: guest pdps
2685 *
2686 * This function is used to find or create a PPGTT mm object from a guest.
2687 *
2688 * Returns:
2689 * Zero on success, negative error code if failed.
2690 */
2691struct intel_vgpu_mm *intel_vgpu_get_ppgtt_mm(struct intel_vgpu *vgpu,
2692		enum intel_gvt_gtt_type root_entry_type, u64 pdps[])
2693{
2694	struct intel_vgpu_mm *mm;
2695
2696	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2697	if (mm) {
2698		intel_vgpu_mm_get(mm);
2699	} else {
2700		mm = intel_vgpu_create_ppgtt_mm(vgpu, root_entry_type, pdps);
2701		if (IS_ERR(mm))
2702			gvt_vgpu_err("fail to create mm\n");
2703	}
2704	return mm;
2705}
2706
2707/**
2708 * intel_vgpu_put_ppgtt_mm - find and put a PPGTT mm object.
2709 * @vgpu: a vGPU
2710 * @pdps: guest pdps
2711 *
2712 * This function is used to find a PPGTT mm object from a guest and destroy it.
2713 *
2714 * Returns:
2715 * Zero on success, negative error code if failed.
2716 */
2717int intel_vgpu_put_ppgtt_mm(struct intel_vgpu *vgpu, u64 pdps[])
2718{
2719	struct intel_vgpu_mm *mm;
2720
2721	mm = intel_vgpu_find_ppgtt_mm(vgpu, pdps);
2722	if (!mm) {
2723		gvt_vgpu_err("fail to find ppgtt instance.\n");
2724		return -EINVAL;
2725	}
2726	intel_vgpu_mm_put(mm);
2727	return 0;
2728}
2729
2730/**
2731 * intel_gvt_init_gtt - initialize mm components of a GVT device
2732 * @gvt: GVT device
2733 *
2734 * This function is called at the initialization stage, to initialize
2735 * the mm components of a GVT device.
2736 *
2737 * Returns:
2738 * zero on success, negative error code if failed.
2739 */
2740int intel_gvt_init_gtt(struct intel_gvt *gvt)
2741{
2742	int ret;
2743	void *page;
2744	struct device *dev = &gvt->gt->i915->drm.pdev->dev;
2745	dma_addr_t daddr;
2746
2747	gvt_dbg_core("init gtt\n");
2748
2749	gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
2750	gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
2751
2752	page = (void *)get_zeroed_page(GFP_KERNEL);
2753	if (!page) {
2754		gvt_err("fail to allocate scratch ggtt page\n");
2755		return -ENOMEM;
2756	}
2757
2758	daddr = dma_map_page(dev, virt_to_page(page), 0,
2759			4096, PCI_DMA_BIDIRECTIONAL);
2760	if (dma_mapping_error(dev, daddr)) {
2761		gvt_err("fail to dmamap scratch ggtt page\n");
2762		__free_page(virt_to_page(page));
2763		return -ENOMEM;
2764	}
2765
2766	gvt->gtt.scratch_page = virt_to_page(page);
2767	gvt->gtt.scratch_mfn = (unsigned long)(daddr >> I915_GTT_PAGE_SHIFT);
2768
2769	if (enable_out_of_sync) {
2770		ret = setup_spt_oos(gvt);
2771		if (ret) {
2772			gvt_err("fail to initialize SPT oos\n");
2773			dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2774			__free_page(gvt->gtt.scratch_page);
2775			return ret;
2776		}
2777	}
2778	INIT_LIST_HEAD(&gvt->gtt.ppgtt_mm_lru_list_head);
2779	mutex_init(&gvt->gtt.ppgtt_mm_lock);
2780	return 0;
2781}
2782
2783/**
2784 * intel_gvt_clean_gtt - clean up mm components of a GVT device
2785 * @gvt: GVT device
2786 *
2787 * This function is called at the driver unloading stage, to clean up the
2788 * the mm components of a GVT device.
2789 *
2790 */
2791void intel_gvt_clean_gtt(struct intel_gvt *gvt)
2792{
2793	struct device *dev = &gvt->gt->i915->drm.pdev->dev;
2794	dma_addr_t daddr = (dma_addr_t)(gvt->gtt.scratch_mfn <<
2795					I915_GTT_PAGE_SHIFT);
2796
2797	dma_unmap_page(dev, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
2798
2799	__free_page(gvt->gtt.scratch_page);
2800
2801	if (enable_out_of_sync)
2802		clean_spt_oos(gvt);
2803}
2804
2805/**
2806 * intel_vgpu_invalidate_ppgtt - invalidate PPGTT instances
2807 * @vgpu: a vGPU
2808 *
2809 * This function is called when invalidate all PPGTT instances of a vGPU.
2810 *
2811 */
2812void intel_vgpu_invalidate_ppgtt(struct intel_vgpu *vgpu)
2813{
2814	struct list_head *pos, *n;
2815	struct intel_vgpu_mm *mm;
2816
2817	list_for_each_safe(pos, n, &vgpu->gtt.ppgtt_mm_list_head) {
2818		mm = container_of(pos, struct intel_vgpu_mm, ppgtt_mm.list);
2819		if (mm->type == INTEL_GVT_MM_PPGTT) {
2820			mutex_lock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2821			list_del_init(&mm->ppgtt_mm.lru_list);
2822			mutex_unlock(&vgpu->gvt->gtt.ppgtt_mm_lock);
2823			if (mm->ppgtt_mm.shadowed)
2824				invalidate_ppgtt_mm(mm);
2825		}
2826	}
2827}
2828
2829/**
2830 * intel_vgpu_reset_ggtt - reset the GGTT entry
2831 * @vgpu: a vGPU
2832 * @invalidate_old: invalidate old entries
2833 *
2834 * This function is called at the vGPU create stage
2835 * to reset all the GGTT entries.
2836 *
2837 */
2838void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu, bool invalidate_old)
2839{
2840	struct intel_gvt *gvt = vgpu->gvt;
2841	struct intel_gvt_gtt_pte_ops *pte_ops = vgpu->gvt->gtt.pte_ops;
2842	struct intel_gvt_gtt_entry entry = {.type = GTT_TYPE_GGTT_PTE};
2843	struct intel_gvt_gtt_entry old_entry;
2844	u32 index;
2845	u32 num_entries;
2846
2847	pte_ops->set_pfn(&entry, gvt->gtt.scratch_mfn);
2848	pte_ops->set_present(&entry);
2849
2850	index = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2851	num_entries = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2852	while (num_entries--) {
2853		if (invalidate_old) {
2854			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2855			ggtt_invalidate_pte(vgpu, &old_entry);
2856		}
2857		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2858	}
2859
2860	index = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2861	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2862	while (num_entries--) {
2863		if (invalidate_old) {
2864			ggtt_get_host_entry(vgpu->gtt.ggtt_mm, &old_entry, index);
2865			ggtt_invalidate_pte(vgpu, &old_entry);
2866		}
2867		ggtt_set_host_entry(vgpu->gtt.ggtt_mm, &entry, index++);
2868	}
2869
2870	ggtt_invalidate(gvt->gt);
2871}
2872
2873/**
2874 * intel_gvt_restore_ggtt - restore all vGPU's ggtt entries
2875 * @gvt: intel gvt device
2876 *
2877 * This function is called at driver resume stage to restore
2878 * GGTT entries of every vGPU.
2879 *
2880 */
2881void intel_gvt_restore_ggtt(struct intel_gvt *gvt)
2882{
2883	struct intel_vgpu *vgpu;
2884	struct intel_vgpu_mm *mm;
2885	int id;
2886	gen8_pte_t pte;
2887	u32 idx, num_low, num_hi, offset;
2888
2889	/* Restore dirty host ggtt for all vGPUs */
2890	idr_for_each_entry(&(gvt)->vgpu_idr, vgpu, id) {
2891		mm = vgpu->gtt.ggtt_mm;
2892
2893		num_low = vgpu_aperture_sz(vgpu) >> PAGE_SHIFT;
2894		offset = vgpu_aperture_gmadr_base(vgpu) >> PAGE_SHIFT;
2895		for (idx = 0; idx < num_low; idx++) {
2896			pte = mm->ggtt_mm.host_ggtt_aperture[idx];
2897			if (pte & _PAGE_PRESENT)
2898				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2899		}
2900
2901		num_hi = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
2902		offset = vgpu_hidden_gmadr_base(vgpu) >> PAGE_SHIFT;
2903		for (idx = 0; idx < num_hi; idx++) {
2904			pte = mm->ggtt_mm.host_ggtt_hidden[idx];
2905			if (pte & _PAGE_PRESENT)
2906				write_pte64(vgpu->gvt->gt->ggtt, offset + idx, pte);
2907		}
2908	}
2909}
2910