1// SPDX-License-Identifier: GPL-2.0-only
2/*
3 * Copyright (C) 2015 Broadcom
4 */
5
6/**
7 * DOC: VC4 plane module
8 *
9 * Each DRM plane is a layer of pixels being scanned out by the HVS.
10 *
11 * At atomic modeset check time, we compute the HVS display element
12 * state that would be necessary for displaying the plane (giving us a
13 * chance to figure out if a plane configuration is invalid), then at
14 * atomic flush time the CRTC will ask us to write our element state
15 * into the region of the HVS that it has allocated for us.
16 */
17
18#include <drm/drm_atomic.h>
19#include <drm/drm_atomic_helper.h>
20#include <drm/drm_atomic_uapi.h>
21#include <drm/drm_fb_cma_helper.h>
22#include <drm/drm_fourcc.h>
23#include <drm/drm_gem_framebuffer_helper.h>
24#include <drm/drm_plane_helper.h>
25
26#include "uapi/drm/vc4_drm.h"
27
28#include "vc4_drv.h"
29#include "vc4_regs.h"
30
31static const struct hvs_format {
32	u32 drm; /* DRM_FORMAT_* */
33	u32 hvs; /* HVS_FORMAT_* */
34	u32 pixel_order;
35	u32 pixel_order_hvs5;
36} hvs_formats[] = {
37	{
38		.drm = DRM_FORMAT_XRGB8888,
39		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
40		.pixel_order = HVS_PIXEL_ORDER_ABGR,
41		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
42	},
43	{
44		.drm = DRM_FORMAT_ARGB8888,
45		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
46		.pixel_order = HVS_PIXEL_ORDER_ABGR,
47		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
48	},
49	{
50		.drm = DRM_FORMAT_ABGR8888,
51		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
52		.pixel_order = HVS_PIXEL_ORDER_ARGB,
53		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
54	},
55	{
56		.drm = DRM_FORMAT_XBGR8888,
57		.hvs = HVS_PIXEL_FORMAT_RGBA8888,
58		.pixel_order = HVS_PIXEL_ORDER_ARGB,
59		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR,
60	},
61	{
62		.drm = DRM_FORMAT_RGB565,
63		.hvs = HVS_PIXEL_FORMAT_RGB565,
64		.pixel_order = HVS_PIXEL_ORDER_XRGB,
65	},
66	{
67		.drm = DRM_FORMAT_BGR565,
68		.hvs = HVS_PIXEL_FORMAT_RGB565,
69		.pixel_order = HVS_PIXEL_ORDER_XBGR,
70	},
71	{
72		.drm = DRM_FORMAT_ARGB1555,
73		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
74		.pixel_order = HVS_PIXEL_ORDER_ABGR,
75		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
76	},
77	{
78		.drm = DRM_FORMAT_XRGB1555,
79		.hvs = HVS_PIXEL_FORMAT_RGBA5551,
80		.pixel_order = HVS_PIXEL_ORDER_ABGR,
81		.pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB,
82	},
83	{
84		.drm = DRM_FORMAT_RGB888,
85		.hvs = HVS_PIXEL_FORMAT_RGB888,
86		.pixel_order = HVS_PIXEL_ORDER_XRGB,
87	},
88	{
89		.drm = DRM_FORMAT_BGR888,
90		.hvs = HVS_PIXEL_FORMAT_RGB888,
91		.pixel_order = HVS_PIXEL_ORDER_XBGR,
92	},
93	{
94		.drm = DRM_FORMAT_YUV422,
95		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
96		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
97	},
98	{
99		.drm = DRM_FORMAT_YVU422,
100		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE,
101		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
102	},
103	{
104		.drm = DRM_FORMAT_YUV420,
105		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
106		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
107	},
108	{
109		.drm = DRM_FORMAT_YVU420,
110		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE,
111		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
112	},
113	{
114		.drm = DRM_FORMAT_NV12,
115		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
116		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
117	},
118	{
119		.drm = DRM_FORMAT_NV21,
120		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE,
121		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
122	},
123	{
124		.drm = DRM_FORMAT_NV16,
125		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
126		.pixel_order = HVS_PIXEL_ORDER_XYCBCR,
127	},
128	{
129		.drm = DRM_FORMAT_NV61,
130		.hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE,
131		.pixel_order = HVS_PIXEL_ORDER_XYCRCB,
132	},
133};
134
135static const struct hvs_format *vc4_get_hvs_format(u32 drm_format)
136{
137	unsigned i;
138
139	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
140		if (hvs_formats[i].drm == drm_format)
141			return &hvs_formats[i];
142	}
143
144	return NULL;
145}
146
147static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst)
148{
149	if (dst == src)
150		return VC4_SCALING_NONE;
151	if (3 * dst >= 2 * src)
152		return VC4_SCALING_PPF;
153	else
154		return VC4_SCALING_TPZ;
155}
156
157static bool plane_enabled(struct drm_plane_state *state)
158{
159	return state->fb && !WARN_ON(!state->crtc);
160}
161
162static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane)
163{
164	struct vc4_plane_state *vc4_state;
165
166	if (WARN_ON(!plane->state))
167		return NULL;
168
169	vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL);
170	if (!vc4_state)
171		return NULL;
172
173	memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm));
174	vc4_state->dlist_initialized = 0;
175
176	__drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base);
177
178	if (vc4_state->dlist) {
179		vc4_state->dlist = kmemdup(vc4_state->dlist,
180					   vc4_state->dlist_count * 4,
181					   GFP_KERNEL);
182		if (!vc4_state->dlist) {
183			kfree(vc4_state);
184			return NULL;
185		}
186		vc4_state->dlist_size = vc4_state->dlist_count;
187	}
188
189	return &vc4_state->base;
190}
191
192static void vc4_plane_destroy_state(struct drm_plane *plane,
193				    struct drm_plane_state *state)
194{
195	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
196	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
197
198	if (drm_mm_node_allocated(&vc4_state->lbm)) {
199		unsigned long irqflags;
200
201		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
202		drm_mm_remove_node(&vc4_state->lbm);
203		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
204	}
205
206	kfree(vc4_state->dlist);
207	__drm_atomic_helper_plane_destroy_state(&vc4_state->base);
208	kfree(state);
209}
210
211/* Called during init to allocate the plane's atomic state. */
212static void vc4_plane_reset(struct drm_plane *plane)
213{
214	struct vc4_plane_state *vc4_state;
215
216	WARN_ON(plane->state);
217
218	vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL);
219	if (!vc4_state)
220		return;
221
222	__drm_atomic_helper_plane_reset(plane, &vc4_state->base);
223}
224
225static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state)
226{
227	if (vc4_state->dlist_count == vc4_state->dlist_size) {
228		u32 new_size = max(4u, vc4_state->dlist_count * 2);
229		u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL);
230
231		if (!new_dlist)
232			return;
233		memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4);
234
235		kfree(vc4_state->dlist);
236		vc4_state->dlist = new_dlist;
237		vc4_state->dlist_size = new_size;
238	}
239
240	vc4_state->dlist_count++;
241}
242
243static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val)
244{
245	unsigned int idx = vc4_state->dlist_count;
246
247	vc4_dlist_counter_increment(vc4_state);
248	vc4_state->dlist[idx] = val;
249}
250
251/* Returns the scl0/scl1 field based on whether the dimensions need to
252 * be up/down/non-scaled.
253 *
254 * This is a replication of a table from the spec.
255 */
256static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane)
257{
258	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
259
260	switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) {
261	case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF:
262		return SCALER_CTL0_SCL_H_PPF_V_PPF;
263	case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF:
264		return SCALER_CTL0_SCL_H_TPZ_V_PPF;
265	case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ:
266		return SCALER_CTL0_SCL_H_PPF_V_TPZ;
267	case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ:
268		return SCALER_CTL0_SCL_H_TPZ_V_TPZ;
269	case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE:
270		return SCALER_CTL0_SCL_H_PPF_V_NONE;
271	case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF:
272		return SCALER_CTL0_SCL_H_NONE_V_PPF;
273	case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ:
274		return SCALER_CTL0_SCL_H_NONE_V_TPZ;
275	case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE:
276		return SCALER_CTL0_SCL_H_TPZ_V_NONE;
277	default:
278	case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE:
279		/* The unity case is independently handled by
280		 * SCALER_CTL0_UNITY.
281		 */
282		return 0;
283	}
284}
285
286static int vc4_plane_margins_adj(struct drm_plane_state *pstate)
287{
288	struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate);
289	unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay;
290	struct drm_crtc_state *crtc_state;
291
292	crtc_state = drm_atomic_get_new_crtc_state(pstate->state,
293						   pstate->crtc);
294
295	vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom);
296	if (!left && !right && !top && !bottom)
297		return 0;
298
299	if (left + right >= crtc_state->mode.hdisplay ||
300	    top + bottom >= crtc_state->mode.vdisplay)
301		return -EINVAL;
302
303	adjhdisplay = crtc_state->mode.hdisplay - (left + right);
304	vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x *
305					       adjhdisplay,
306					       crtc_state->mode.hdisplay);
307	vc4_pstate->crtc_x += left;
308	if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right)
309		vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right;
310
311	adjvdisplay = crtc_state->mode.vdisplay - (top + bottom);
312	vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y *
313					       adjvdisplay,
314					       crtc_state->mode.vdisplay);
315	vc4_pstate->crtc_y += top;
316	if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom)
317		vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom;
318
319	vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w *
320					       adjhdisplay,
321					       crtc_state->mode.hdisplay);
322	vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h *
323					       adjvdisplay,
324					       crtc_state->mode.vdisplay);
325
326	if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h)
327		return -EINVAL;
328
329	return 0;
330}
331
332static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state)
333{
334	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
335	struct drm_framebuffer *fb = state->fb;
336	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
337	int num_planes = fb->format->num_planes;
338	struct drm_crtc_state *crtc_state;
339	u32 h_subsample = fb->format->hsub;
340	u32 v_subsample = fb->format->vsub;
341	int i, ret;
342
343	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
344							state->crtc);
345	if (!crtc_state) {
346		DRM_DEBUG_KMS("Invalid crtc state\n");
347		return -EINVAL;
348	}
349
350	ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1,
351						  INT_MAX, true, true);
352	if (ret)
353		return ret;
354
355	for (i = 0; i < num_planes; i++)
356		vc4_state->offsets[i] = bo->paddr + fb->offsets[i];
357
358	/*
359	 * We don't support subpixel source positioning for scaling,
360	 * but fractional coordinates can be generated by clipping
361	 * so just round for now
362	 */
363	vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16);
364	vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16);
365	vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x;
366	vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y;
367
368	vc4_state->crtc_x = state->dst.x1;
369	vc4_state->crtc_y = state->dst.y1;
370	vc4_state->crtc_w = state->dst.x2 - state->dst.x1;
371	vc4_state->crtc_h = state->dst.y2 - state->dst.y1;
372
373	ret = vc4_plane_margins_adj(state);
374	if (ret)
375		return ret;
376
377	vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0],
378						       vc4_state->crtc_w);
379	vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0],
380						       vc4_state->crtc_h);
381
382	vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE &&
383			       vc4_state->y_scaling[0] == VC4_SCALING_NONE);
384
385	if (num_planes > 1) {
386		vc4_state->is_yuv = true;
387
388		vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample;
389		vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample;
390
391		vc4_state->x_scaling[1] =
392			vc4_get_scaling_mode(vc4_state->src_w[1],
393					     vc4_state->crtc_w);
394		vc4_state->y_scaling[1] =
395			vc4_get_scaling_mode(vc4_state->src_h[1],
396					     vc4_state->crtc_h);
397
398		/* YUV conversion requires that horizontal scaling be enabled
399		 * on the UV plane even if vc4_get_scaling_mode() returned
400		 * VC4_SCALING_NONE (which can happen when the down-scaling
401		 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this
402		 * case.
403		 */
404		if (vc4_state->x_scaling[1] == VC4_SCALING_NONE)
405			vc4_state->x_scaling[1] = VC4_SCALING_PPF;
406	} else {
407		vc4_state->is_yuv = false;
408		vc4_state->x_scaling[1] = VC4_SCALING_NONE;
409		vc4_state->y_scaling[1] = VC4_SCALING_NONE;
410	}
411
412	return 0;
413}
414
415static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
416{
417	u32 scale, recip;
418
419	scale = (1 << 16) * src / dst;
420
421	/* The specs note that while the reciprocal would be defined
422	 * as (1<<32)/scale, ~0 is close enough.
423	 */
424	recip = ~0 / scale;
425
426	vc4_dlist_write(vc4_state,
427			VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) |
428			VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE));
429	vc4_dlist_write(vc4_state,
430			VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP));
431}
432
433static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst)
434{
435	u32 scale = (1 << 16) * src / dst;
436
437	vc4_dlist_write(vc4_state,
438			SCALER_PPF_AGC |
439			VC4_SET_FIELD(scale, SCALER_PPF_SCALE) |
440			VC4_SET_FIELD(0, SCALER_PPF_IPHASE));
441}
442
443static u32 vc4_lbm_size(struct drm_plane_state *state)
444{
445	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
446	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
447	u32 pix_per_line;
448	u32 lbm;
449
450	/* LBM is not needed when there's no vertical scaling. */
451	if (vc4_state->y_scaling[0] == VC4_SCALING_NONE &&
452	    vc4_state->y_scaling[1] == VC4_SCALING_NONE)
453		return 0;
454
455	/*
456	 * This can be further optimized in the RGB/YUV444 case if the PPF
457	 * decimation factor is between 0.5 and 1.0 by using crtc_w.
458	 *
459	 * It's not an issue though, since in that case since src_w[0] is going
460	 * to be greater than or equal to crtc_w.
461	 */
462	if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ)
463		pix_per_line = vc4_state->crtc_w;
464	else
465		pix_per_line = vc4_state->src_w[0];
466
467	if (!vc4_state->is_yuv) {
468		if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ)
469			lbm = pix_per_line * 8;
470		else {
471			/* In special cases, this multiplier might be 12. */
472			lbm = pix_per_line * 16;
473		}
474	} else {
475		/* There are cases for this going down to a multiplier
476		 * of 2, but according to the firmware source, the
477		 * table in the docs is somewhat wrong.
478		 */
479		lbm = pix_per_line * 16;
480	}
481
482	/* Align it to 64 or 128 (hvs5) bytes */
483	lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64);
484
485	/* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
486	lbm /= vc4->hvs->hvs5 ? 4 : 2;
487
488	return lbm;
489}
490
491static void vc4_write_scaling_parameters(struct drm_plane_state *state,
492					 int channel)
493{
494	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
495
496	/* Ch0 H-PPF Word 0: Scaling Parameters */
497	if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) {
498		vc4_write_ppf(vc4_state,
499			      vc4_state->src_w[channel], vc4_state->crtc_w);
500	}
501
502	/* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */
503	if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) {
504		vc4_write_ppf(vc4_state,
505			      vc4_state->src_h[channel], vc4_state->crtc_h);
506		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
507	}
508
509	/* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */
510	if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) {
511		vc4_write_tpz(vc4_state,
512			      vc4_state->src_w[channel], vc4_state->crtc_w);
513	}
514
515	/* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */
516	if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) {
517		vc4_write_tpz(vc4_state,
518			      vc4_state->src_h[channel], vc4_state->crtc_h);
519		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
520	}
521}
522
523static void vc4_plane_calc_load(struct drm_plane_state *state)
524{
525	unsigned int hvs_load_shift, vrefresh, i;
526	struct drm_framebuffer *fb = state->fb;
527	struct vc4_plane_state *vc4_state;
528	struct drm_crtc_state *crtc_state;
529	unsigned int vscale_factor;
530	struct vc4_dev *vc4;
531
532	vc4 = to_vc4_dev(state->plane->dev);
533	if (!vc4->load_tracker_available)
534		return;
535
536	vc4_state = to_vc4_plane_state(state);
537	crtc_state = drm_atomic_get_existing_crtc_state(state->state,
538							state->crtc);
539	vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode);
540
541	/* The HVS is able to process 2 pixels/cycle when scaling the source,
542	 * 4 pixels/cycle otherwise.
543	 * Alpha blending step seems to be pipelined and it's always operating
544	 * at 4 pixels/cycle, so the limiting aspect here seems to be the
545	 * scaler block.
546	 * HVS load is expressed in clk-cycles/sec (AKA Hz).
547	 */
548	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
549	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
550	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
551	    vc4_state->y_scaling[1] != VC4_SCALING_NONE)
552		hvs_load_shift = 1;
553	else
554		hvs_load_shift = 2;
555
556	vc4_state->membus_load = 0;
557	vc4_state->hvs_load = 0;
558	for (i = 0; i < fb->format->num_planes; i++) {
559		/* Even if the bandwidth/plane required for a single frame is
560		 *
561		 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh
562		 *
563		 * when downscaling, we have to read more pixels per line in
564		 * the time frame reserved for a single line, so the bandwidth
565		 * demand can be punctually higher. To account for that, we
566		 * calculate the down-scaling factor and multiply the plane
567		 * load by this number. We're likely over-estimating the read
568		 * demand, but that's better than under-estimating it.
569		 */
570		vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i],
571					     vc4_state->crtc_h);
572		vc4_state->membus_load += vc4_state->src_w[i] *
573					  vc4_state->src_h[i] * vscale_factor *
574					  fb->format->cpp[i];
575		vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w;
576	}
577
578	vc4_state->hvs_load *= vrefresh;
579	vc4_state->hvs_load >>= hvs_load_shift;
580	vc4_state->membus_load *= vrefresh;
581}
582
583static int vc4_plane_allocate_lbm(struct drm_plane_state *state)
584{
585	struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev);
586	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
587	unsigned long irqflags;
588	u32 lbm_size;
589
590	lbm_size = vc4_lbm_size(state);
591	if (!lbm_size)
592		return 0;
593
594	if (WARN_ON(!vc4_state->lbm_offset))
595		return -EINVAL;
596
597	/* Allocate the LBM memory that the HVS will use for temporary
598	 * storage due to our scaling/format conversion.
599	 */
600	if (!drm_mm_node_allocated(&vc4_state->lbm)) {
601		int ret;
602
603		spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
604		ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
605						 &vc4_state->lbm,
606						 lbm_size,
607						 vc4->hvs->hvs5 ? 64 : 32,
608						 0, 0);
609		spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
610
611		if (ret)
612			return ret;
613	} else {
614		WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
615	}
616
617	vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start;
618
619	return 0;
620}
621
622/* Writes out a full display list for an active plane to the plane's
623 * private dlist state.
624 */
625static int vc4_plane_mode_set(struct drm_plane *plane,
626			      struct drm_plane_state *state)
627{
628	struct vc4_dev *vc4 = to_vc4_dev(plane->dev);
629	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
630	struct drm_framebuffer *fb = state->fb;
631	u32 ctl0_offset = vc4_state->dlist_count;
632	const struct hvs_format *format = vc4_get_hvs_format(fb->format->format);
633	u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier);
634	int num_planes = fb->format->num_planes;
635	u32 h_subsample = fb->format->hsub;
636	u32 v_subsample = fb->format->vsub;
637	bool mix_plane_alpha;
638	bool covers_screen;
639	u32 scl0, scl1, pitch0;
640	u32 tiling, src_y;
641	u32 hvs_format = format->hvs;
642	unsigned int rotation;
643	int ret, i;
644
645	if (vc4_state->dlist_initialized)
646		return 0;
647
648	ret = vc4_plane_setup_clipping_and_scaling(state);
649	if (ret)
650		return ret;
651
652	/* SCL1 is used for Cb/Cr scaling of planar formats.  For RGB
653	 * and 4:4:4, scl1 should be set to scl0 so both channels of
654	 * the scaler do the same thing.  For YUV, the Y plane needs
655	 * to be put in channel 1 and Cb/Cr in channel 0, so we swap
656	 * the scl fields here.
657	 */
658	if (num_planes == 1) {
659		scl0 = vc4_get_scl_field(state, 0);
660		scl1 = scl0;
661	} else {
662		scl0 = vc4_get_scl_field(state, 1);
663		scl1 = vc4_get_scl_field(state, 0);
664	}
665
666	rotation = drm_rotation_simplify(state->rotation,
667					 DRM_MODE_ROTATE_0 |
668					 DRM_MODE_REFLECT_X |
669					 DRM_MODE_REFLECT_Y);
670
671	/* We must point to the last line when Y reflection is enabled. */
672	src_y = vc4_state->src_y;
673	if (rotation & DRM_MODE_REFLECT_Y)
674		src_y += vc4_state->src_h[0] - 1;
675
676	switch (base_format_mod) {
677	case DRM_FORMAT_MOD_LINEAR:
678		tiling = SCALER_CTL0_TILING_LINEAR;
679		pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH);
680
681		/* Adjust the base pointer to the first pixel to be scanned
682		 * out.
683		 */
684		for (i = 0; i < num_planes; i++) {
685			vc4_state->offsets[i] += src_y /
686						 (i ? v_subsample : 1) *
687						 fb->pitches[i];
688
689			vc4_state->offsets[i] += vc4_state->src_x /
690						 (i ? h_subsample : 1) *
691						 fb->format->cpp[i];
692		}
693
694		break;
695
696	case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: {
697		u32 tile_size_shift = 12; /* T tiles are 4kb */
698		/* Whole-tile offsets, mostly for setting the pitch. */
699		u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5;
700		u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */
701		u32 tile_w_mask = (1 << tile_w_shift) - 1;
702		/* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice
703		 * the height (in pixels) of a 4k tile.
704		 */
705		u32 tile_h_mask = (2 << tile_h_shift) - 1;
706		/* For T-tiled, the FB pitch is "how many bytes from one row to
707		 * the next, such that
708		 *
709		 *	pitch * tile_h == tile_size * tiles_per_row
710		 */
711		u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift);
712		u32 tiles_l = vc4_state->src_x >> tile_w_shift;
713		u32 tiles_r = tiles_w - tiles_l;
714		u32 tiles_t = src_y >> tile_h_shift;
715		/* Intra-tile offsets, which modify the base address (the
716		 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that
717		 * base address).
718		 */
719		u32 tile_y = (src_y >> 4) & 1;
720		u32 subtile_y = (src_y >> 2) & 3;
721		u32 utile_y = src_y & 3;
722		u32 x_off = vc4_state->src_x & tile_w_mask;
723		u32 y_off = src_y & tile_h_mask;
724
725		/* When Y reflection is requested we must set the
726		 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines
727		 * after the initial one should be fetched in descending order,
728		 * which makes sense since we start from the last line and go
729		 * backward.
730		 * Don't know why we need y_off = max_y_off - y_off, but it's
731		 * definitely required (I guess it's also related to the "going
732		 * backward" situation).
733		 */
734		if (rotation & DRM_MODE_REFLECT_Y) {
735			y_off = tile_h_mask - y_off;
736			pitch0 = SCALER_PITCH0_TILE_LINE_DIR;
737		} else {
738			pitch0 = 0;
739		}
740
741		tiling = SCALER_CTL0_TILING_256B_OR_T;
742		pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) |
743			   VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) |
744			   VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) |
745			   VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R));
746		vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift);
747		vc4_state->offsets[0] += subtile_y << 8;
748		vc4_state->offsets[0] += utile_y << 4;
749
750		/* Rows of tiles alternate left-to-right and right-to-left. */
751		if (tiles_t & 1) {
752			pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR;
753			vc4_state->offsets[0] += (tiles_w - tiles_l) <<
754						 tile_size_shift;
755			vc4_state->offsets[0] -= (1 + !tile_y) << 10;
756		} else {
757			vc4_state->offsets[0] += tiles_l << tile_size_shift;
758			vc4_state->offsets[0] += tile_y << 10;
759		}
760
761		break;
762	}
763
764	case DRM_FORMAT_MOD_BROADCOM_SAND64:
765	case DRM_FORMAT_MOD_BROADCOM_SAND128:
766	case DRM_FORMAT_MOD_BROADCOM_SAND256: {
767		uint32_t param = fourcc_mod_broadcom_param(fb->modifier);
768		u32 tile_w, tile, x_off, pix_per_tile;
769
770		hvs_format = HVS_PIXEL_FORMAT_H264;
771
772		switch (base_format_mod) {
773		case DRM_FORMAT_MOD_BROADCOM_SAND64:
774			tiling = SCALER_CTL0_TILING_64B;
775			tile_w = 64;
776			break;
777		case DRM_FORMAT_MOD_BROADCOM_SAND128:
778			tiling = SCALER_CTL0_TILING_128B;
779			tile_w = 128;
780			break;
781		case DRM_FORMAT_MOD_BROADCOM_SAND256:
782			tiling = SCALER_CTL0_TILING_256B_OR_T;
783			tile_w = 256;
784			break;
785		default:
786			break;
787		}
788
789		if (param > SCALER_TILE_HEIGHT_MASK) {
790			DRM_DEBUG_KMS("SAND height too large (%d)\n", param);
791			return -EINVAL;
792		}
793
794		pix_per_tile = tile_w / fb->format->cpp[0];
795		tile = vc4_state->src_x / pix_per_tile;
796		x_off = vc4_state->src_x % pix_per_tile;
797
798		/* Adjust the base pointer to the first pixel to be scanned
799		 * out.
800		 */
801		for (i = 0; i < num_planes; i++) {
802			vc4_state->offsets[i] += param * tile_w * tile;
803			vc4_state->offsets[i] += src_y /
804						 (i ? v_subsample : 1) *
805						 tile_w;
806			vc4_state->offsets[i] += x_off /
807						 (i ? h_subsample : 1) *
808						 fb->format->cpp[i];
809		}
810
811		pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT);
812		break;
813	}
814
815	default:
816		DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx",
817			      (long long)fb->modifier);
818		return -EINVAL;
819	}
820
821	/* Don't waste cycles mixing with plane alpha if the set alpha
822	 * is opaque or there is no per-pixel alpha information.
823	 * In any case we use the alpha property value as the fixed alpha.
824	 */
825	mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
826			  fb->format->has_alpha;
827
828	if (!vc4->hvs->hvs5) {
829	/* Control word */
830		vc4_dlist_write(vc4_state,
831				SCALER_CTL0_VALID |
832				(rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) |
833				(rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) |
834				VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) |
835				(format->pixel_order << SCALER_CTL0_ORDER_SHIFT) |
836				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
837				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
838				(vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) |
839				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
840				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1));
841
842		/* Position Word 0: Image Positions and Alpha Value */
843		vc4_state->pos0_offset = vc4_state->dlist_count;
844		vc4_dlist_write(vc4_state,
845				VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) |
846				VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) |
847				VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y));
848
849		/* Position Word 1: Scaled Image Dimensions. */
850		if (!vc4_state->is_unity) {
851			vc4_dlist_write(vc4_state,
852					VC4_SET_FIELD(vc4_state->crtc_w,
853						      SCALER_POS1_SCL_WIDTH) |
854					VC4_SET_FIELD(vc4_state->crtc_h,
855						      SCALER_POS1_SCL_HEIGHT));
856		}
857
858		/* Position Word 2: Source Image Size, Alpha */
859		vc4_state->pos2_offset = vc4_state->dlist_count;
860		vc4_dlist_write(vc4_state,
861				VC4_SET_FIELD(fb->format->has_alpha ?
862					      SCALER_POS2_ALPHA_MODE_PIPELINE :
863					      SCALER_POS2_ALPHA_MODE_FIXED,
864					      SCALER_POS2_ALPHA_MODE) |
865				(mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) |
866				(fb->format->has_alpha ?
867						SCALER_POS2_ALPHA_PREMULT : 0) |
868				VC4_SET_FIELD(vc4_state->src_w[0],
869					      SCALER_POS2_WIDTH) |
870				VC4_SET_FIELD(vc4_state->src_h[0],
871					      SCALER_POS2_HEIGHT));
872
873		/* Position Word 3: Context.  Written by the HVS. */
874		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
875
876	} else {
877		u32 hvs_pixel_order = format->pixel_order;
878
879		if (format->pixel_order_hvs5)
880			hvs_pixel_order = format->pixel_order_hvs5;
881
882		/* Control word */
883		vc4_dlist_write(vc4_state,
884				SCALER_CTL0_VALID |
885				(hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) |
886				(hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) |
887				VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) |
888				(vc4_state->is_unity ?
889						SCALER5_CTL0_UNITY : 0) |
890				VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) |
891				VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) |
892				SCALER5_CTL0_ALPHA_EXPAND |
893				SCALER5_CTL0_RGB_EXPAND);
894
895		/* Position Word 0: Image Positions and Alpha Value */
896		vc4_state->pos0_offset = vc4_state->dlist_count;
897		vc4_dlist_write(vc4_state,
898				(rotation & DRM_MODE_REFLECT_Y ?
899						SCALER5_POS0_VFLIP : 0) |
900				VC4_SET_FIELD(vc4_state->crtc_x,
901					      SCALER_POS0_START_X) |
902				(rotation & DRM_MODE_REFLECT_X ?
903					      SCALER5_POS0_HFLIP : 0) |
904				VC4_SET_FIELD(vc4_state->crtc_y,
905					      SCALER5_POS0_START_Y)
906			       );
907
908		/* Control Word 2 */
909		vc4_dlist_write(vc4_state,
910				VC4_SET_FIELD(state->alpha >> 4,
911					      SCALER5_CTL2_ALPHA) |
912				(fb->format->has_alpha ?
913					SCALER5_CTL2_ALPHA_PREMULT : 0) |
914				(mix_plane_alpha ?
915					SCALER5_CTL2_ALPHA_MIX : 0) |
916				VC4_SET_FIELD(fb->format->has_alpha ?
917				      SCALER5_CTL2_ALPHA_MODE_PIPELINE :
918				      SCALER5_CTL2_ALPHA_MODE_FIXED,
919				      SCALER5_CTL2_ALPHA_MODE)
920			       );
921
922		/* Position Word 1: Scaled Image Dimensions. */
923		if (!vc4_state->is_unity) {
924			vc4_dlist_write(vc4_state,
925					VC4_SET_FIELD(vc4_state->crtc_w,
926						      SCALER5_POS1_SCL_WIDTH) |
927					VC4_SET_FIELD(vc4_state->crtc_h,
928						      SCALER5_POS1_SCL_HEIGHT));
929		}
930
931		/* Position Word 2: Source Image Size */
932		vc4_state->pos2_offset = vc4_state->dlist_count;
933		vc4_dlist_write(vc4_state,
934				VC4_SET_FIELD(vc4_state->src_w[0],
935					      SCALER5_POS2_WIDTH) |
936				VC4_SET_FIELD(vc4_state->src_h[0],
937					      SCALER5_POS2_HEIGHT));
938
939		/* Position Word 3: Context.  Written by the HVS. */
940		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
941	}
942
943
944	/* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers
945	 *
946	 * The pointers may be any byte address.
947	 */
948	vc4_state->ptr0_offset = vc4_state->dlist_count;
949	for (i = 0; i < num_planes; i++)
950		vc4_dlist_write(vc4_state, vc4_state->offsets[i]);
951
952	/* Pointer Context Word 0/1/2: Written by the HVS */
953	for (i = 0; i < num_planes; i++)
954		vc4_dlist_write(vc4_state, 0xc0c0c0c0);
955
956	/* Pitch word 0 */
957	vc4_dlist_write(vc4_state, pitch0);
958
959	/* Pitch word 1/2 */
960	for (i = 1; i < num_planes; i++) {
961		if (hvs_format != HVS_PIXEL_FORMAT_H264) {
962			vc4_dlist_write(vc4_state,
963					VC4_SET_FIELD(fb->pitches[i],
964						      SCALER_SRC_PITCH));
965		} else {
966			vc4_dlist_write(vc4_state, pitch0);
967		}
968	}
969
970	/* Colorspace conversion words */
971	if (vc4_state->is_yuv) {
972		vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5);
973		vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5);
974		vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5);
975	}
976
977	vc4_state->lbm_offset = 0;
978
979	if (vc4_state->x_scaling[0] != VC4_SCALING_NONE ||
980	    vc4_state->x_scaling[1] != VC4_SCALING_NONE ||
981	    vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
982	    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
983		/* Reserve a slot for the LBM Base Address. The real value will
984		 * be set when calling vc4_plane_allocate_lbm().
985		 */
986		if (vc4_state->y_scaling[0] != VC4_SCALING_NONE ||
987		    vc4_state->y_scaling[1] != VC4_SCALING_NONE) {
988			vc4_state->lbm_offset = vc4_state->dlist_count;
989			vc4_dlist_counter_increment(vc4_state);
990		}
991
992		if (num_planes > 1) {
993			/* Emit Cb/Cr as channel 0 and Y as channel
994			 * 1. This matches how we set up scl0/scl1
995			 * above.
996			 */
997			vc4_write_scaling_parameters(state, 1);
998		}
999		vc4_write_scaling_parameters(state, 0);
1000
1001		/* If any PPF setup was done, then all the kernel
1002		 * pointers get uploaded.
1003		 */
1004		if (vc4_state->x_scaling[0] == VC4_SCALING_PPF ||
1005		    vc4_state->y_scaling[0] == VC4_SCALING_PPF ||
1006		    vc4_state->x_scaling[1] == VC4_SCALING_PPF ||
1007		    vc4_state->y_scaling[1] == VC4_SCALING_PPF) {
1008			u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start,
1009						   SCALER_PPF_KERNEL_OFFSET);
1010
1011			/* HPPF plane 0 */
1012			vc4_dlist_write(vc4_state, kernel);
1013			/* VPPF plane 0 */
1014			vc4_dlist_write(vc4_state, kernel);
1015			/* HPPF plane 1 */
1016			vc4_dlist_write(vc4_state, kernel);
1017			/* VPPF plane 1 */
1018			vc4_dlist_write(vc4_state, kernel);
1019		}
1020	}
1021
1022	vc4_state->dlist[ctl0_offset] |=
1023		VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE);
1024
1025	/* crtc_* are already clipped coordinates. */
1026	covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 &&
1027			vc4_state->crtc_w == state->crtc->mode.hdisplay &&
1028			vc4_state->crtc_h == state->crtc->mode.vdisplay;
1029	/* Background fill might be necessary when the plane has per-pixel
1030	 * alpha content or a non-opaque plane alpha and could blend from the
1031	 * background or does not cover the entire screen.
1032	 */
1033	vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen ||
1034				   state->alpha != DRM_BLEND_ALPHA_OPAQUE;
1035
1036	/* Flag the dlist as initialized to avoid checking it twice in case
1037	 * the async update check already called vc4_plane_mode_set() and
1038	 * decided to fallback to sync update because async update was not
1039	 * possible.
1040	 */
1041	vc4_state->dlist_initialized = 1;
1042
1043	vc4_plane_calc_load(state);
1044
1045	return 0;
1046}
1047
1048/* If a modeset involves changing the setup of a plane, the atomic
1049 * infrastructure will call this to validate a proposed plane setup.
1050 * However, if a plane isn't getting updated, this (and the
1051 * corresponding vc4_plane_atomic_update) won't get called.  Thus, we
1052 * compute the dlist here and have all active plane dlists get updated
1053 * in the CRTC's flush.
1054 */
1055static int vc4_plane_atomic_check(struct drm_plane *plane,
1056				  struct drm_plane_state *state)
1057{
1058	struct vc4_plane_state *vc4_state = to_vc4_plane_state(state);
1059	int ret;
1060
1061	vc4_state->dlist_count = 0;
1062
1063	if (!plane_enabled(state))
1064		return 0;
1065
1066	ret = vc4_plane_mode_set(plane, state);
1067	if (ret)
1068		return ret;
1069
1070	return vc4_plane_allocate_lbm(state);
1071}
1072
1073static void vc4_plane_atomic_update(struct drm_plane *plane,
1074				    struct drm_plane_state *old_state)
1075{
1076	/* No contents here.  Since we don't know where in the CRTC's
1077	 * dlist we should be stored, our dlist is uploaded to the
1078	 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush
1079	 * time.
1080	 */
1081}
1082
1083u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist)
1084{
1085	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
1086	int i;
1087
1088	vc4_state->hw_dlist = dlist;
1089
1090	/* Can't memcpy_toio() because it needs to be 32-bit writes. */
1091	for (i = 0; i < vc4_state->dlist_count; i++)
1092		writel(vc4_state->dlist[i], &dlist[i]);
1093
1094	return vc4_state->dlist_count;
1095}
1096
1097u32 vc4_plane_dlist_size(const struct drm_plane_state *state)
1098{
1099	const struct vc4_plane_state *vc4_state =
1100		container_of(state, typeof(*vc4_state), base);
1101
1102	return vc4_state->dlist_count;
1103}
1104
1105/* Updates the plane to immediately (well, once the FIFO needs
1106 * refilling) scan out from at a new framebuffer.
1107 */
1108void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb)
1109{
1110	struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state);
1111	struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0);
1112	uint32_t addr;
1113
1114	/* We're skipping the address adjustment for negative origin,
1115	 * because this is only called on the primary plane.
1116	 */
1117	WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0);
1118	addr = bo->paddr + fb->offsets[0];
1119
1120	/* Write the new address into the hardware immediately.  The
1121	 * scanout will start from this address as soon as the FIFO
1122	 * needs to refill with pixels.
1123	 */
1124	writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
1125
1126	/* Also update the CPU-side dlist copy, so that any later
1127	 * atomic updates that don't do a new modeset on our plane
1128	 * also use our updated address.
1129	 */
1130	vc4_state->dlist[vc4_state->ptr0_offset] = addr;
1131}
1132
1133static void vc4_plane_atomic_async_update(struct drm_plane *plane,
1134					  struct drm_plane_state *state)
1135{
1136	struct vc4_plane_state *vc4_state, *new_vc4_state;
1137
1138	swap(plane->state->fb, state->fb);
1139	plane->state->crtc_x = state->crtc_x;
1140	plane->state->crtc_y = state->crtc_y;
1141	plane->state->crtc_w = state->crtc_w;
1142	plane->state->crtc_h = state->crtc_h;
1143	plane->state->src_x = state->src_x;
1144	plane->state->src_y = state->src_y;
1145	plane->state->src_w = state->src_w;
1146	plane->state->src_h = state->src_h;
1147	plane->state->src_h = state->src_h;
1148	plane->state->alpha = state->alpha;
1149	plane->state->pixel_blend_mode = state->pixel_blend_mode;
1150	plane->state->rotation = state->rotation;
1151	plane->state->zpos = state->zpos;
1152	plane->state->normalized_zpos = state->normalized_zpos;
1153	plane->state->color_encoding = state->color_encoding;
1154	plane->state->color_range = state->color_range;
1155	plane->state->src = state->src;
1156	plane->state->dst = state->dst;
1157	plane->state->visible = state->visible;
1158
1159	new_vc4_state = to_vc4_plane_state(state);
1160	vc4_state = to_vc4_plane_state(plane->state);
1161
1162	vc4_state->crtc_x = new_vc4_state->crtc_x;
1163	vc4_state->crtc_y = new_vc4_state->crtc_y;
1164	vc4_state->crtc_h = new_vc4_state->crtc_h;
1165	vc4_state->crtc_w = new_vc4_state->crtc_w;
1166	vc4_state->src_x = new_vc4_state->src_x;
1167	vc4_state->src_y = new_vc4_state->src_y;
1168	memcpy(vc4_state->src_w, new_vc4_state->src_w,
1169	       sizeof(vc4_state->src_w));
1170	memcpy(vc4_state->src_h, new_vc4_state->src_h,
1171	       sizeof(vc4_state->src_h));
1172	memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling,
1173	       sizeof(vc4_state->x_scaling));
1174	memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling,
1175	       sizeof(vc4_state->y_scaling));
1176	vc4_state->is_unity = new_vc4_state->is_unity;
1177	vc4_state->is_yuv = new_vc4_state->is_yuv;
1178	memcpy(vc4_state->offsets, new_vc4_state->offsets,
1179	       sizeof(vc4_state->offsets));
1180	vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill;
1181
1182	/* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */
1183	vc4_state->dlist[vc4_state->pos0_offset] =
1184		new_vc4_state->dlist[vc4_state->pos0_offset];
1185	vc4_state->dlist[vc4_state->pos2_offset] =
1186		new_vc4_state->dlist[vc4_state->pos2_offset];
1187	vc4_state->dlist[vc4_state->ptr0_offset] =
1188		new_vc4_state->dlist[vc4_state->ptr0_offset];
1189
1190	/* Note that we can't just call vc4_plane_write_dlist()
1191	 * because that would smash the context data that the HVS is
1192	 * currently using.
1193	 */
1194	writel(vc4_state->dlist[vc4_state->pos0_offset],
1195	       &vc4_state->hw_dlist[vc4_state->pos0_offset]);
1196	writel(vc4_state->dlist[vc4_state->pos2_offset],
1197	       &vc4_state->hw_dlist[vc4_state->pos2_offset]);
1198	writel(vc4_state->dlist[vc4_state->ptr0_offset],
1199	       &vc4_state->hw_dlist[vc4_state->ptr0_offset]);
1200}
1201
1202static int vc4_plane_atomic_async_check(struct drm_plane *plane,
1203					struct drm_plane_state *state)
1204{
1205	struct vc4_plane_state *old_vc4_state, *new_vc4_state;
1206	int ret;
1207	u32 i;
1208
1209	ret = vc4_plane_mode_set(plane, state);
1210	if (ret)
1211		return ret;
1212
1213	old_vc4_state = to_vc4_plane_state(plane->state);
1214	new_vc4_state = to_vc4_plane_state(state);
1215	if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
1216	    old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
1217	    old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
1218	    old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset ||
1219	    vc4_lbm_size(plane->state) != vc4_lbm_size(state))
1220		return -EINVAL;
1221
1222	/* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update
1223	 * if anything else has changed, fallback to a sync update.
1224	 */
1225	for (i = 0; i < new_vc4_state->dlist_count; i++) {
1226		if (i == new_vc4_state->pos0_offset ||
1227		    i == new_vc4_state->pos2_offset ||
1228		    i == new_vc4_state->ptr0_offset ||
1229		    (new_vc4_state->lbm_offset &&
1230		     i == new_vc4_state->lbm_offset))
1231			continue;
1232
1233		if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i])
1234			return -EINVAL;
1235	}
1236
1237	return 0;
1238}
1239
1240static int vc4_prepare_fb(struct drm_plane *plane,
1241			  struct drm_plane_state *state)
1242{
1243	struct vc4_bo *bo;
1244	int ret;
1245
1246	if (!state->fb)
1247		return 0;
1248
1249	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
1250
1251	drm_gem_fb_prepare_fb(plane, state);
1252
1253	if (plane->state->fb == state->fb)
1254		return 0;
1255
1256	ret = vc4_bo_inc_usecnt(bo);
1257	if (ret)
1258		return ret;
1259
1260	return 0;
1261}
1262
1263static void vc4_cleanup_fb(struct drm_plane *plane,
1264			   struct drm_plane_state *state)
1265{
1266	struct vc4_bo *bo;
1267
1268	if (plane->state->fb == state->fb || !state->fb)
1269		return;
1270
1271	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
1272	vc4_bo_dec_usecnt(bo);
1273}
1274
1275static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
1276	.atomic_check = vc4_plane_atomic_check,
1277	.atomic_update = vc4_plane_atomic_update,
1278	.prepare_fb = vc4_prepare_fb,
1279	.cleanup_fb = vc4_cleanup_fb,
1280	.atomic_async_check = vc4_plane_atomic_async_check,
1281	.atomic_async_update = vc4_plane_atomic_async_update,
1282};
1283
1284static void vc4_plane_destroy(struct drm_plane *plane)
1285{
1286	drm_plane_cleanup(plane);
1287}
1288
1289static bool vc4_format_mod_supported(struct drm_plane *plane,
1290				     uint32_t format,
1291				     uint64_t modifier)
1292{
1293	/* Support T_TILING for RGB formats only. */
1294	switch (format) {
1295	case DRM_FORMAT_XRGB8888:
1296	case DRM_FORMAT_ARGB8888:
1297	case DRM_FORMAT_ABGR8888:
1298	case DRM_FORMAT_XBGR8888:
1299	case DRM_FORMAT_RGB565:
1300	case DRM_FORMAT_BGR565:
1301	case DRM_FORMAT_ARGB1555:
1302	case DRM_FORMAT_XRGB1555:
1303		switch (fourcc_mod_broadcom_mod(modifier)) {
1304		case DRM_FORMAT_MOD_LINEAR:
1305		case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED:
1306			return true;
1307		default:
1308			return false;
1309		}
1310	case DRM_FORMAT_NV12:
1311	case DRM_FORMAT_NV21:
1312		switch (fourcc_mod_broadcom_mod(modifier)) {
1313		case DRM_FORMAT_MOD_LINEAR:
1314		case DRM_FORMAT_MOD_BROADCOM_SAND64:
1315		case DRM_FORMAT_MOD_BROADCOM_SAND128:
1316		case DRM_FORMAT_MOD_BROADCOM_SAND256:
1317			return true;
1318		default:
1319			return false;
1320		}
1321	case DRM_FORMAT_RGBX1010102:
1322	case DRM_FORMAT_BGRX1010102:
1323	case DRM_FORMAT_RGBA1010102:
1324	case DRM_FORMAT_BGRA1010102:
1325	case DRM_FORMAT_YUV422:
1326	case DRM_FORMAT_YVU422:
1327	case DRM_FORMAT_YUV420:
1328	case DRM_FORMAT_YVU420:
1329	case DRM_FORMAT_NV16:
1330	case DRM_FORMAT_NV61:
1331	default:
1332		return (modifier == DRM_FORMAT_MOD_LINEAR);
1333	}
1334}
1335
1336static const struct drm_plane_funcs vc4_plane_funcs = {
1337	.update_plane = drm_atomic_helper_update_plane,
1338	.disable_plane = drm_atomic_helper_disable_plane,
1339	.destroy = vc4_plane_destroy,
1340	.set_property = NULL,
1341	.reset = vc4_plane_reset,
1342	.atomic_duplicate_state = vc4_plane_duplicate_state,
1343	.atomic_destroy_state = vc4_plane_destroy_state,
1344	.format_mod_supported = vc4_format_mod_supported,
1345};
1346
1347struct drm_plane *vc4_plane_init(struct drm_device *dev,
1348				 enum drm_plane_type type)
1349{
1350	struct drm_plane *plane = NULL;
1351	struct vc4_plane *vc4_plane;
1352	u32 formats[ARRAY_SIZE(hvs_formats)];
1353	int ret = 0;
1354	unsigned i;
1355	static const uint64_t modifiers[] = {
1356		DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
1357		DRM_FORMAT_MOD_BROADCOM_SAND128,
1358		DRM_FORMAT_MOD_BROADCOM_SAND64,
1359		DRM_FORMAT_MOD_BROADCOM_SAND256,
1360		DRM_FORMAT_MOD_LINEAR,
1361		DRM_FORMAT_MOD_INVALID
1362	};
1363
1364	vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane),
1365				 GFP_KERNEL);
1366	if (!vc4_plane)
1367		return ERR_PTR(-ENOMEM);
1368
1369	for (i = 0; i < ARRAY_SIZE(hvs_formats); i++)
1370		formats[i] = hvs_formats[i].drm;
1371
1372	plane = &vc4_plane->base;
1373	ret = drm_universal_plane_init(dev, plane, 0,
1374				       &vc4_plane_funcs,
1375				       formats, ARRAY_SIZE(formats),
1376				       modifiers, type, NULL);
1377	if (ret)
1378		return ERR_PTR(ret);
1379
1380	drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
1381
1382	drm_plane_create_alpha_property(plane);
1383	drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
1384					   DRM_MODE_ROTATE_0 |
1385					   DRM_MODE_ROTATE_180 |
1386					   DRM_MODE_REFLECT_X |
1387					   DRM_MODE_REFLECT_Y);
1388
1389	return plane;
1390}
1391
1392int vc4_plane_create_additional_planes(struct drm_device *drm)
1393{
1394	struct drm_plane *cursor_plane;
1395	struct drm_crtc *crtc;
1396	unsigned int i;
1397
1398	/* Set up some arbitrary number of planes.  We're not limited
1399	 * by a set number of physical registers, just the space in
1400	 * the HVS (16k) and how small an plane can be (28 bytes).
1401	 * However, each plane we set up takes up some memory, and
1402	 * increases the cost of looping over planes, which atomic
1403	 * modesetting does quite a bit.  As a result, we pick a
1404	 * modest number of planes to expose, that should hopefully
1405	 * still cover any sane usecase.
1406	 */
1407	for (i = 0; i < 16; i++) {
1408		struct drm_plane *plane =
1409			vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY);
1410
1411		if (IS_ERR(plane))
1412			continue;
1413
1414		plane->possible_crtcs =
1415			GENMASK(drm->mode_config.num_crtc - 1, 0);
1416	}
1417
1418	drm_for_each_crtc(crtc, drm) {
1419		/* Set up the legacy cursor after overlay initialization,
1420		 * since we overlay planes on the CRTC in the order they were
1421		 * initialized.
1422		 */
1423		cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR);
1424		if (!IS_ERR(cursor_plane)) {
1425			cursor_plane->possible_crtcs = drm_crtc_mask(crtc);
1426			crtc->cursor = cursor_plane;
1427		}
1428	}
1429
1430	return 0;
1431}
1432