1// SPDX-License-Identifier: GPL-2.0-only 2/* 3 * Copyright (C) 2015 Broadcom 4 */ 5 6/** 7 * DOC: VC4 plane module 8 * 9 * Each DRM plane is a layer of pixels being scanned out by the HVS. 10 * 11 * At atomic modeset check time, we compute the HVS display element 12 * state that would be necessary for displaying the plane (giving us a 13 * chance to figure out if a plane configuration is invalid), then at 14 * atomic flush time the CRTC will ask us to write our element state 15 * into the region of the HVS that it has allocated for us. 16 */ 17 18#include <drm/drm_atomic.h> 19#include <drm/drm_atomic_helper.h> 20#include <drm/drm_atomic_uapi.h> 21#include <drm/drm_fb_cma_helper.h> 22#include <drm/drm_fourcc.h> 23#include <drm/drm_gem_framebuffer_helper.h> 24#include <drm/drm_plane_helper.h> 25 26#include "uapi/drm/vc4_drm.h" 27 28#include "vc4_drv.h" 29#include "vc4_regs.h" 30 31static const struct hvs_format { 32 u32 drm; /* DRM_FORMAT_* */ 33 u32 hvs; /* HVS_FORMAT_* */ 34 u32 pixel_order; 35 u32 pixel_order_hvs5; 36} hvs_formats[] = { 37 { 38 .drm = DRM_FORMAT_XRGB8888, 39 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 40 .pixel_order = HVS_PIXEL_ORDER_ABGR, 41 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 42 }, 43 { 44 .drm = DRM_FORMAT_ARGB8888, 45 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 46 .pixel_order = HVS_PIXEL_ORDER_ABGR, 47 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 48 }, 49 { 50 .drm = DRM_FORMAT_ABGR8888, 51 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 52 .pixel_order = HVS_PIXEL_ORDER_ARGB, 53 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 54 }, 55 { 56 .drm = DRM_FORMAT_XBGR8888, 57 .hvs = HVS_PIXEL_FORMAT_RGBA8888, 58 .pixel_order = HVS_PIXEL_ORDER_ARGB, 59 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ABGR, 60 }, 61 { 62 .drm = DRM_FORMAT_RGB565, 63 .hvs = HVS_PIXEL_FORMAT_RGB565, 64 .pixel_order = HVS_PIXEL_ORDER_XRGB, 65 }, 66 { 67 .drm = DRM_FORMAT_BGR565, 68 .hvs = HVS_PIXEL_FORMAT_RGB565, 69 .pixel_order = HVS_PIXEL_ORDER_XBGR, 70 }, 71 { 72 .drm = DRM_FORMAT_ARGB1555, 73 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 74 .pixel_order = HVS_PIXEL_ORDER_ABGR, 75 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 76 }, 77 { 78 .drm = DRM_FORMAT_XRGB1555, 79 .hvs = HVS_PIXEL_FORMAT_RGBA5551, 80 .pixel_order = HVS_PIXEL_ORDER_ABGR, 81 .pixel_order_hvs5 = HVS_PIXEL_ORDER_ARGB, 82 }, 83 { 84 .drm = DRM_FORMAT_RGB888, 85 .hvs = HVS_PIXEL_FORMAT_RGB888, 86 .pixel_order = HVS_PIXEL_ORDER_XRGB, 87 }, 88 { 89 .drm = DRM_FORMAT_BGR888, 90 .hvs = HVS_PIXEL_FORMAT_RGB888, 91 .pixel_order = HVS_PIXEL_ORDER_XBGR, 92 }, 93 { 94 .drm = DRM_FORMAT_YUV422, 95 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 96 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 97 }, 98 { 99 .drm = DRM_FORMAT_YVU422, 100 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_3PLANE, 101 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 102 }, 103 { 104 .drm = DRM_FORMAT_YUV420, 105 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 106 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 107 }, 108 { 109 .drm = DRM_FORMAT_YVU420, 110 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_3PLANE, 111 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 112 }, 113 { 114 .drm = DRM_FORMAT_NV12, 115 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 116 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 117 }, 118 { 119 .drm = DRM_FORMAT_NV21, 120 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV420_2PLANE, 121 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 122 }, 123 { 124 .drm = DRM_FORMAT_NV16, 125 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 126 .pixel_order = HVS_PIXEL_ORDER_XYCBCR, 127 }, 128 { 129 .drm = DRM_FORMAT_NV61, 130 .hvs = HVS_PIXEL_FORMAT_YCBCR_YUV422_2PLANE, 131 .pixel_order = HVS_PIXEL_ORDER_XYCRCB, 132 }, 133}; 134 135static const struct hvs_format *vc4_get_hvs_format(u32 drm_format) 136{ 137 unsigned i; 138 139 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) { 140 if (hvs_formats[i].drm == drm_format) 141 return &hvs_formats[i]; 142 } 143 144 return NULL; 145} 146 147static enum vc4_scaling_mode vc4_get_scaling_mode(u32 src, u32 dst) 148{ 149 if (dst == src) 150 return VC4_SCALING_NONE; 151 if (3 * dst >= 2 * src) 152 return VC4_SCALING_PPF; 153 else 154 return VC4_SCALING_TPZ; 155} 156 157static bool plane_enabled(struct drm_plane_state *state) 158{ 159 return state->fb && !WARN_ON(!state->crtc); 160} 161 162static struct drm_plane_state *vc4_plane_duplicate_state(struct drm_plane *plane) 163{ 164 struct vc4_plane_state *vc4_state; 165 166 if (WARN_ON(!plane->state)) 167 return NULL; 168 169 vc4_state = kmemdup(plane->state, sizeof(*vc4_state), GFP_KERNEL); 170 if (!vc4_state) 171 return NULL; 172 173 memset(&vc4_state->lbm, 0, sizeof(vc4_state->lbm)); 174 vc4_state->dlist_initialized = 0; 175 176 __drm_atomic_helper_plane_duplicate_state(plane, &vc4_state->base); 177 178 if (vc4_state->dlist) { 179 vc4_state->dlist = kmemdup(vc4_state->dlist, 180 vc4_state->dlist_count * 4, 181 GFP_KERNEL); 182 if (!vc4_state->dlist) { 183 kfree(vc4_state); 184 return NULL; 185 } 186 vc4_state->dlist_size = vc4_state->dlist_count; 187 } 188 189 return &vc4_state->base; 190} 191 192static void vc4_plane_destroy_state(struct drm_plane *plane, 193 struct drm_plane_state *state) 194{ 195 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 196 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 197 198 if (drm_mm_node_allocated(&vc4_state->lbm)) { 199 unsigned long irqflags; 200 201 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 202 drm_mm_remove_node(&vc4_state->lbm); 203 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 204 } 205 206 kfree(vc4_state->dlist); 207 __drm_atomic_helper_plane_destroy_state(&vc4_state->base); 208 kfree(state); 209} 210 211/* Called during init to allocate the plane's atomic state. */ 212static void vc4_plane_reset(struct drm_plane *plane) 213{ 214 struct vc4_plane_state *vc4_state; 215 216 WARN_ON(plane->state); 217 218 vc4_state = kzalloc(sizeof(*vc4_state), GFP_KERNEL); 219 if (!vc4_state) 220 return; 221 222 __drm_atomic_helper_plane_reset(plane, &vc4_state->base); 223} 224 225static void vc4_dlist_counter_increment(struct vc4_plane_state *vc4_state) 226{ 227 if (vc4_state->dlist_count == vc4_state->dlist_size) { 228 u32 new_size = max(4u, vc4_state->dlist_count * 2); 229 u32 *new_dlist = kmalloc_array(new_size, 4, GFP_KERNEL); 230 231 if (!new_dlist) 232 return; 233 memcpy(new_dlist, vc4_state->dlist, vc4_state->dlist_count * 4); 234 235 kfree(vc4_state->dlist); 236 vc4_state->dlist = new_dlist; 237 vc4_state->dlist_size = new_size; 238 } 239 240 vc4_state->dlist_count++; 241} 242 243static void vc4_dlist_write(struct vc4_plane_state *vc4_state, u32 val) 244{ 245 unsigned int idx = vc4_state->dlist_count; 246 247 vc4_dlist_counter_increment(vc4_state); 248 vc4_state->dlist[idx] = val; 249} 250 251/* Returns the scl0/scl1 field based on whether the dimensions need to 252 * be up/down/non-scaled. 253 * 254 * This is a replication of a table from the spec. 255 */ 256static u32 vc4_get_scl_field(struct drm_plane_state *state, int plane) 257{ 258 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 259 260 switch (vc4_state->x_scaling[plane] << 2 | vc4_state->y_scaling[plane]) { 261 case VC4_SCALING_PPF << 2 | VC4_SCALING_PPF: 262 return SCALER_CTL0_SCL_H_PPF_V_PPF; 263 case VC4_SCALING_TPZ << 2 | VC4_SCALING_PPF: 264 return SCALER_CTL0_SCL_H_TPZ_V_PPF; 265 case VC4_SCALING_PPF << 2 | VC4_SCALING_TPZ: 266 return SCALER_CTL0_SCL_H_PPF_V_TPZ; 267 case VC4_SCALING_TPZ << 2 | VC4_SCALING_TPZ: 268 return SCALER_CTL0_SCL_H_TPZ_V_TPZ; 269 case VC4_SCALING_PPF << 2 | VC4_SCALING_NONE: 270 return SCALER_CTL0_SCL_H_PPF_V_NONE; 271 case VC4_SCALING_NONE << 2 | VC4_SCALING_PPF: 272 return SCALER_CTL0_SCL_H_NONE_V_PPF; 273 case VC4_SCALING_NONE << 2 | VC4_SCALING_TPZ: 274 return SCALER_CTL0_SCL_H_NONE_V_TPZ; 275 case VC4_SCALING_TPZ << 2 | VC4_SCALING_NONE: 276 return SCALER_CTL0_SCL_H_TPZ_V_NONE; 277 default: 278 case VC4_SCALING_NONE << 2 | VC4_SCALING_NONE: 279 /* The unity case is independently handled by 280 * SCALER_CTL0_UNITY. 281 */ 282 return 0; 283 } 284} 285 286static int vc4_plane_margins_adj(struct drm_plane_state *pstate) 287{ 288 struct vc4_plane_state *vc4_pstate = to_vc4_plane_state(pstate); 289 unsigned int left, right, top, bottom, adjhdisplay, adjvdisplay; 290 struct drm_crtc_state *crtc_state; 291 292 crtc_state = drm_atomic_get_new_crtc_state(pstate->state, 293 pstate->crtc); 294 295 vc4_crtc_get_margins(crtc_state, &left, &right, &top, &bottom); 296 if (!left && !right && !top && !bottom) 297 return 0; 298 299 if (left + right >= crtc_state->mode.hdisplay || 300 top + bottom >= crtc_state->mode.vdisplay) 301 return -EINVAL; 302 303 adjhdisplay = crtc_state->mode.hdisplay - (left + right); 304 vc4_pstate->crtc_x = DIV_ROUND_CLOSEST(vc4_pstate->crtc_x * 305 adjhdisplay, 306 crtc_state->mode.hdisplay); 307 vc4_pstate->crtc_x += left; 308 if (vc4_pstate->crtc_x > crtc_state->mode.hdisplay - right) 309 vc4_pstate->crtc_x = crtc_state->mode.hdisplay - right; 310 311 adjvdisplay = crtc_state->mode.vdisplay - (top + bottom); 312 vc4_pstate->crtc_y = DIV_ROUND_CLOSEST(vc4_pstate->crtc_y * 313 adjvdisplay, 314 crtc_state->mode.vdisplay); 315 vc4_pstate->crtc_y += top; 316 if (vc4_pstate->crtc_y > crtc_state->mode.vdisplay - bottom) 317 vc4_pstate->crtc_y = crtc_state->mode.vdisplay - bottom; 318 319 vc4_pstate->crtc_w = DIV_ROUND_CLOSEST(vc4_pstate->crtc_w * 320 adjhdisplay, 321 crtc_state->mode.hdisplay); 322 vc4_pstate->crtc_h = DIV_ROUND_CLOSEST(vc4_pstate->crtc_h * 323 adjvdisplay, 324 crtc_state->mode.vdisplay); 325 326 if (!vc4_pstate->crtc_w || !vc4_pstate->crtc_h) 327 return -EINVAL; 328 329 return 0; 330} 331 332static int vc4_plane_setup_clipping_and_scaling(struct drm_plane_state *state) 333{ 334 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 335 struct drm_framebuffer *fb = state->fb; 336 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 337 int num_planes = fb->format->num_planes; 338 struct drm_crtc_state *crtc_state; 339 u32 h_subsample = fb->format->hsub; 340 u32 v_subsample = fb->format->vsub; 341 int i, ret; 342 343 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 344 state->crtc); 345 if (!crtc_state) { 346 DRM_DEBUG_KMS("Invalid crtc state\n"); 347 return -EINVAL; 348 } 349 350 ret = drm_atomic_helper_check_plane_state(state, crtc_state, 1, 351 INT_MAX, true, true); 352 if (ret) 353 return ret; 354 355 for (i = 0; i < num_planes; i++) 356 vc4_state->offsets[i] = bo->paddr + fb->offsets[i]; 357 358 /* 359 * We don't support subpixel source positioning for scaling, 360 * but fractional coordinates can be generated by clipping 361 * so just round for now 362 */ 363 vc4_state->src_x = DIV_ROUND_CLOSEST(state->src.x1, 1 << 16); 364 vc4_state->src_y = DIV_ROUND_CLOSEST(state->src.y1, 1 << 16); 365 vc4_state->src_w[0] = DIV_ROUND_CLOSEST(state->src.x2, 1 << 16) - vc4_state->src_x; 366 vc4_state->src_h[0] = DIV_ROUND_CLOSEST(state->src.y2, 1 << 16) - vc4_state->src_y; 367 368 vc4_state->crtc_x = state->dst.x1; 369 vc4_state->crtc_y = state->dst.y1; 370 vc4_state->crtc_w = state->dst.x2 - state->dst.x1; 371 vc4_state->crtc_h = state->dst.y2 - state->dst.y1; 372 373 ret = vc4_plane_margins_adj(state); 374 if (ret) 375 return ret; 376 377 vc4_state->x_scaling[0] = vc4_get_scaling_mode(vc4_state->src_w[0], 378 vc4_state->crtc_w); 379 vc4_state->y_scaling[0] = vc4_get_scaling_mode(vc4_state->src_h[0], 380 vc4_state->crtc_h); 381 382 vc4_state->is_unity = (vc4_state->x_scaling[0] == VC4_SCALING_NONE && 383 vc4_state->y_scaling[0] == VC4_SCALING_NONE); 384 385 if (num_planes > 1) { 386 vc4_state->is_yuv = true; 387 388 vc4_state->src_w[1] = vc4_state->src_w[0] / h_subsample; 389 vc4_state->src_h[1] = vc4_state->src_h[0] / v_subsample; 390 391 vc4_state->x_scaling[1] = 392 vc4_get_scaling_mode(vc4_state->src_w[1], 393 vc4_state->crtc_w); 394 vc4_state->y_scaling[1] = 395 vc4_get_scaling_mode(vc4_state->src_h[1], 396 vc4_state->crtc_h); 397 398 /* YUV conversion requires that horizontal scaling be enabled 399 * on the UV plane even if vc4_get_scaling_mode() returned 400 * VC4_SCALING_NONE (which can happen when the down-scaling 401 * ratio is 0.5). Let's force it to VC4_SCALING_PPF in this 402 * case. 403 */ 404 if (vc4_state->x_scaling[1] == VC4_SCALING_NONE) 405 vc4_state->x_scaling[1] = VC4_SCALING_PPF; 406 } else { 407 vc4_state->is_yuv = false; 408 vc4_state->x_scaling[1] = VC4_SCALING_NONE; 409 vc4_state->y_scaling[1] = VC4_SCALING_NONE; 410 } 411 412 return 0; 413} 414 415static void vc4_write_tpz(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 416{ 417 u32 scale, recip; 418 419 scale = (1 << 16) * src / dst; 420 421 /* The specs note that while the reciprocal would be defined 422 * as (1<<32)/scale, ~0 is close enough. 423 */ 424 recip = ~0 / scale; 425 426 vc4_dlist_write(vc4_state, 427 VC4_SET_FIELD(scale, SCALER_TPZ0_SCALE) | 428 VC4_SET_FIELD(0, SCALER_TPZ0_IPHASE)); 429 vc4_dlist_write(vc4_state, 430 VC4_SET_FIELD(recip, SCALER_TPZ1_RECIP)); 431} 432 433static void vc4_write_ppf(struct vc4_plane_state *vc4_state, u32 src, u32 dst) 434{ 435 u32 scale = (1 << 16) * src / dst; 436 437 vc4_dlist_write(vc4_state, 438 SCALER_PPF_AGC | 439 VC4_SET_FIELD(scale, SCALER_PPF_SCALE) | 440 VC4_SET_FIELD(0, SCALER_PPF_IPHASE)); 441} 442 443static u32 vc4_lbm_size(struct drm_plane_state *state) 444{ 445 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 446 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 447 u32 pix_per_line; 448 u32 lbm; 449 450 /* LBM is not needed when there's no vertical scaling. */ 451 if (vc4_state->y_scaling[0] == VC4_SCALING_NONE && 452 vc4_state->y_scaling[1] == VC4_SCALING_NONE) 453 return 0; 454 455 /* 456 * This can be further optimized in the RGB/YUV444 case if the PPF 457 * decimation factor is between 0.5 and 1.0 by using crtc_w. 458 * 459 * It's not an issue though, since in that case since src_w[0] is going 460 * to be greater than or equal to crtc_w. 461 */ 462 if (vc4_state->x_scaling[0] == VC4_SCALING_TPZ) 463 pix_per_line = vc4_state->crtc_w; 464 else 465 pix_per_line = vc4_state->src_w[0]; 466 467 if (!vc4_state->is_yuv) { 468 if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ) 469 lbm = pix_per_line * 8; 470 else { 471 /* In special cases, this multiplier might be 12. */ 472 lbm = pix_per_line * 16; 473 } 474 } else { 475 /* There are cases for this going down to a multiplier 476 * of 2, but according to the firmware source, the 477 * table in the docs is somewhat wrong. 478 */ 479 lbm = pix_per_line * 16; 480 } 481 482 /* Align it to 64 or 128 (hvs5) bytes */ 483 lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64); 484 485 /* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */ 486 lbm /= vc4->hvs->hvs5 ? 4 : 2; 487 488 return lbm; 489} 490 491static void vc4_write_scaling_parameters(struct drm_plane_state *state, 492 int channel) 493{ 494 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 495 496 /* Ch0 H-PPF Word 0: Scaling Parameters */ 497 if (vc4_state->x_scaling[channel] == VC4_SCALING_PPF) { 498 vc4_write_ppf(vc4_state, 499 vc4_state->src_w[channel], vc4_state->crtc_w); 500 } 501 502 /* Ch0 V-PPF Words 0-1: Scaling Parameters, Context */ 503 if (vc4_state->y_scaling[channel] == VC4_SCALING_PPF) { 504 vc4_write_ppf(vc4_state, 505 vc4_state->src_h[channel], vc4_state->crtc_h); 506 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 507 } 508 509 /* Ch0 H-TPZ Words 0-1: Scaling Parameters, Recip */ 510 if (vc4_state->x_scaling[channel] == VC4_SCALING_TPZ) { 511 vc4_write_tpz(vc4_state, 512 vc4_state->src_w[channel], vc4_state->crtc_w); 513 } 514 515 /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ 516 if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { 517 vc4_write_tpz(vc4_state, 518 vc4_state->src_h[channel], vc4_state->crtc_h); 519 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 520 } 521} 522 523static void vc4_plane_calc_load(struct drm_plane_state *state) 524{ 525 unsigned int hvs_load_shift, vrefresh, i; 526 struct drm_framebuffer *fb = state->fb; 527 struct vc4_plane_state *vc4_state; 528 struct drm_crtc_state *crtc_state; 529 unsigned int vscale_factor; 530 struct vc4_dev *vc4; 531 532 vc4 = to_vc4_dev(state->plane->dev); 533 if (!vc4->load_tracker_available) 534 return; 535 536 vc4_state = to_vc4_plane_state(state); 537 crtc_state = drm_atomic_get_existing_crtc_state(state->state, 538 state->crtc); 539 vrefresh = drm_mode_vrefresh(&crtc_state->adjusted_mode); 540 541 /* The HVS is able to process 2 pixels/cycle when scaling the source, 542 * 4 pixels/cycle otherwise. 543 * Alpha blending step seems to be pipelined and it's always operating 544 * at 4 pixels/cycle, so the limiting aspect here seems to be the 545 * scaler block. 546 * HVS load is expressed in clk-cycles/sec (AKA Hz). 547 */ 548 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 549 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 550 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 551 vc4_state->y_scaling[1] != VC4_SCALING_NONE) 552 hvs_load_shift = 1; 553 else 554 hvs_load_shift = 2; 555 556 vc4_state->membus_load = 0; 557 vc4_state->hvs_load = 0; 558 for (i = 0; i < fb->format->num_planes; i++) { 559 /* Even if the bandwidth/plane required for a single frame is 560 * 561 * vc4_state->src_w[i] * vc4_state->src_h[i] * cpp * vrefresh 562 * 563 * when downscaling, we have to read more pixels per line in 564 * the time frame reserved for a single line, so the bandwidth 565 * demand can be punctually higher. To account for that, we 566 * calculate the down-scaling factor and multiply the plane 567 * load by this number. We're likely over-estimating the read 568 * demand, but that's better than under-estimating it. 569 */ 570 vscale_factor = DIV_ROUND_UP(vc4_state->src_h[i], 571 vc4_state->crtc_h); 572 vc4_state->membus_load += vc4_state->src_w[i] * 573 vc4_state->src_h[i] * vscale_factor * 574 fb->format->cpp[i]; 575 vc4_state->hvs_load += vc4_state->crtc_h * vc4_state->crtc_w; 576 } 577 578 vc4_state->hvs_load *= vrefresh; 579 vc4_state->hvs_load >>= hvs_load_shift; 580 vc4_state->membus_load *= vrefresh; 581} 582 583static int vc4_plane_allocate_lbm(struct drm_plane_state *state) 584{ 585 struct vc4_dev *vc4 = to_vc4_dev(state->plane->dev); 586 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 587 unsigned long irqflags; 588 u32 lbm_size; 589 590 lbm_size = vc4_lbm_size(state); 591 if (!lbm_size) 592 return 0; 593 594 if (WARN_ON(!vc4_state->lbm_offset)) 595 return -EINVAL; 596 597 /* Allocate the LBM memory that the HVS will use for temporary 598 * storage due to our scaling/format conversion. 599 */ 600 if (!drm_mm_node_allocated(&vc4_state->lbm)) { 601 int ret; 602 603 spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags); 604 ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm, 605 &vc4_state->lbm, 606 lbm_size, 607 vc4->hvs->hvs5 ? 64 : 32, 608 0, 0); 609 spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags); 610 611 if (ret) 612 return ret; 613 } else { 614 WARN_ON_ONCE(lbm_size != vc4_state->lbm.size); 615 } 616 617 vc4_state->dlist[vc4_state->lbm_offset] = vc4_state->lbm.start; 618 619 return 0; 620} 621 622/* Writes out a full display list for an active plane to the plane's 623 * private dlist state. 624 */ 625static int vc4_plane_mode_set(struct drm_plane *plane, 626 struct drm_plane_state *state) 627{ 628 struct vc4_dev *vc4 = to_vc4_dev(plane->dev); 629 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 630 struct drm_framebuffer *fb = state->fb; 631 u32 ctl0_offset = vc4_state->dlist_count; 632 const struct hvs_format *format = vc4_get_hvs_format(fb->format->format); 633 u64 base_format_mod = fourcc_mod_broadcom_mod(fb->modifier); 634 int num_planes = fb->format->num_planes; 635 u32 h_subsample = fb->format->hsub; 636 u32 v_subsample = fb->format->vsub; 637 bool mix_plane_alpha; 638 bool covers_screen; 639 u32 scl0, scl1, pitch0; 640 u32 tiling, src_y; 641 u32 hvs_format = format->hvs; 642 unsigned int rotation; 643 int ret, i; 644 645 if (vc4_state->dlist_initialized) 646 return 0; 647 648 ret = vc4_plane_setup_clipping_and_scaling(state); 649 if (ret) 650 return ret; 651 652 /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB 653 * and 4:4:4, scl1 should be set to scl0 so both channels of 654 * the scaler do the same thing. For YUV, the Y plane needs 655 * to be put in channel 1 and Cb/Cr in channel 0, so we swap 656 * the scl fields here. 657 */ 658 if (num_planes == 1) { 659 scl0 = vc4_get_scl_field(state, 0); 660 scl1 = scl0; 661 } else { 662 scl0 = vc4_get_scl_field(state, 1); 663 scl1 = vc4_get_scl_field(state, 0); 664 } 665 666 rotation = drm_rotation_simplify(state->rotation, 667 DRM_MODE_ROTATE_0 | 668 DRM_MODE_REFLECT_X | 669 DRM_MODE_REFLECT_Y); 670 671 /* We must point to the last line when Y reflection is enabled. */ 672 src_y = vc4_state->src_y; 673 if (rotation & DRM_MODE_REFLECT_Y) 674 src_y += vc4_state->src_h[0] - 1; 675 676 switch (base_format_mod) { 677 case DRM_FORMAT_MOD_LINEAR: 678 tiling = SCALER_CTL0_TILING_LINEAR; 679 pitch0 = VC4_SET_FIELD(fb->pitches[0], SCALER_SRC_PITCH); 680 681 /* Adjust the base pointer to the first pixel to be scanned 682 * out. 683 */ 684 for (i = 0; i < num_planes; i++) { 685 vc4_state->offsets[i] += src_y / 686 (i ? v_subsample : 1) * 687 fb->pitches[i]; 688 689 vc4_state->offsets[i] += vc4_state->src_x / 690 (i ? h_subsample : 1) * 691 fb->format->cpp[i]; 692 } 693 694 break; 695 696 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { 697 u32 tile_size_shift = 12; /* T tiles are 4kb */ 698 /* Whole-tile offsets, mostly for setting the pitch. */ 699 u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; 700 u32 tile_h_shift = 5; /* 16 and 32bpp are 32 pixels high */ 701 u32 tile_w_mask = (1 << tile_w_shift) - 1; 702 /* The height mask on 32-bit-per-pixel tiles is 63, i.e. twice 703 * the height (in pixels) of a 4k tile. 704 */ 705 u32 tile_h_mask = (2 << tile_h_shift) - 1; 706 /* For T-tiled, the FB pitch is "how many bytes from one row to 707 * the next, such that 708 * 709 * pitch * tile_h == tile_size * tiles_per_row 710 */ 711 u32 tiles_w = fb->pitches[0] >> (tile_size_shift - tile_h_shift); 712 u32 tiles_l = vc4_state->src_x >> tile_w_shift; 713 u32 tiles_r = tiles_w - tiles_l; 714 u32 tiles_t = src_y >> tile_h_shift; 715 /* Intra-tile offsets, which modify the base address (the 716 * SCALER_PITCH0_TILE_Y_OFFSET tells HVS how to walk from that 717 * base address). 718 */ 719 u32 tile_y = (src_y >> 4) & 1; 720 u32 subtile_y = (src_y >> 2) & 3; 721 u32 utile_y = src_y & 3; 722 u32 x_off = vc4_state->src_x & tile_w_mask; 723 u32 y_off = src_y & tile_h_mask; 724 725 /* When Y reflection is requested we must set the 726 * SCALER_PITCH0_TILE_LINE_DIR flag to tell HVS that all lines 727 * after the initial one should be fetched in descending order, 728 * which makes sense since we start from the last line and go 729 * backward. 730 * Don't know why we need y_off = max_y_off - y_off, but it's 731 * definitely required (I guess it's also related to the "going 732 * backward" situation). 733 */ 734 if (rotation & DRM_MODE_REFLECT_Y) { 735 y_off = tile_h_mask - y_off; 736 pitch0 = SCALER_PITCH0_TILE_LINE_DIR; 737 } else { 738 pitch0 = 0; 739 } 740 741 tiling = SCALER_CTL0_TILING_256B_OR_T; 742 pitch0 |= (VC4_SET_FIELD(x_off, SCALER_PITCH0_SINK_PIX) | 743 VC4_SET_FIELD(y_off, SCALER_PITCH0_TILE_Y_OFFSET) | 744 VC4_SET_FIELD(tiles_l, SCALER_PITCH0_TILE_WIDTH_L) | 745 VC4_SET_FIELD(tiles_r, SCALER_PITCH0_TILE_WIDTH_R)); 746 vc4_state->offsets[0] += tiles_t * (tiles_w << tile_size_shift); 747 vc4_state->offsets[0] += subtile_y << 8; 748 vc4_state->offsets[0] += utile_y << 4; 749 750 /* Rows of tiles alternate left-to-right and right-to-left. */ 751 if (tiles_t & 1) { 752 pitch0 |= SCALER_PITCH0_TILE_INITIAL_LINE_DIR; 753 vc4_state->offsets[0] += (tiles_w - tiles_l) << 754 tile_size_shift; 755 vc4_state->offsets[0] -= (1 + !tile_y) << 10; 756 } else { 757 vc4_state->offsets[0] += tiles_l << tile_size_shift; 758 vc4_state->offsets[0] += tile_y << 10; 759 } 760 761 break; 762 } 763 764 case DRM_FORMAT_MOD_BROADCOM_SAND64: 765 case DRM_FORMAT_MOD_BROADCOM_SAND128: 766 case DRM_FORMAT_MOD_BROADCOM_SAND256: { 767 uint32_t param = fourcc_mod_broadcom_param(fb->modifier); 768 u32 tile_w, tile, x_off, pix_per_tile; 769 770 hvs_format = HVS_PIXEL_FORMAT_H264; 771 772 switch (base_format_mod) { 773 case DRM_FORMAT_MOD_BROADCOM_SAND64: 774 tiling = SCALER_CTL0_TILING_64B; 775 tile_w = 64; 776 break; 777 case DRM_FORMAT_MOD_BROADCOM_SAND128: 778 tiling = SCALER_CTL0_TILING_128B; 779 tile_w = 128; 780 break; 781 case DRM_FORMAT_MOD_BROADCOM_SAND256: 782 tiling = SCALER_CTL0_TILING_256B_OR_T; 783 tile_w = 256; 784 break; 785 default: 786 break; 787 } 788 789 if (param > SCALER_TILE_HEIGHT_MASK) { 790 DRM_DEBUG_KMS("SAND height too large (%d)\n", param); 791 return -EINVAL; 792 } 793 794 pix_per_tile = tile_w / fb->format->cpp[0]; 795 tile = vc4_state->src_x / pix_per_tile; 796 x_off = vc4_state->src_x % pix_per_tile; 797 798 /* Adjust the base pointer to the first pixel to be scanned 799 * out. 800 */ 801 for (i = 0; i < num_planes; i++) { 802 vc4_state->offsets[i] += param * tile_w * tile; 803 vc4_state->offsets[i] += src_y / 804 (i ? v_subsample : 1) * 805 tile_w; 806 vc4_state->offsets[i] += x_off / 807 (i ? h_subsample : 1) * 808 fb->format->cpp[i]; 809 } 810 811 pitch0 = VC4_SET_FIELD(param, SCALER_TILE_HEIGHT); 812 break; 813 } 814 815 default: 816 DRM_DEBUG_KMS("Unsupported FB tiling flag 0x%16llx", 817 (long long)fb->modifier); 818 return -EINVAL; 819 } 820 821 /* Don't waste cycles mixing with plane alpha if the set alpha 822 * is opaque or there is no per-pixel alpha information. 823 * In any case we use the alpha property value as the fixed alpha. 824 */ 825 mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE && 826 fb->format->has_alpha; 827 828 if (!vc4->hvs->hvs5) { 829 /* Control word */ 830 vc4_dlist_write(vc4_state, 831 SCALER_CTL0_VALID | 832 (rotation & DRM_MODE_REFLECT_X ? SCALER_CTL0_HFLIP : 0) | 833 (rotation & DRM_MODE_REFLECT_Y ? SCALER_CTL0_VFLIP : 0) | 834 VC4_SET_FIELD(SCALER_CTL0_RGBA_EXPAND_ROUND, SCALER_CTL0_RGBA_EXPAND) | 835 (format->pixel_order << SCALER_CTL0_ORDER_SHIFT) | 836 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 837 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 838 (vc4_state->is_unity ? SCALER_CTL0_UNITY : 0) | 839 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 840 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1)); 841 842 /* Position Word 0: Image Positions and Alpha Value */ 843 vc4_state->pos0_offset = vc4_state->dlist_count; 844 vc4_dlist_write(vc4_state, 845 VC4_SET_FIELD(state->alpha >> 8, SCALER_POS0_FIXED_ALPHA) | 846 VC4_SET_FIELD(vc4_state->crtc_x, SCALER_POS0_START_X) | 847 VC4_SET_FIELD(vc4_state->crtc_y, SCALER_POS0_START_Y)); 848 849 /* Position Word 1: Scaled Image Dimensions. */ 850 if (!vc4_state->is_unity) { 851 vc4_dlist_write(vc4_state, 852 VC4_SET_FIELD(vc4_state->crtc_w, 853 SCALER_POS1_SCL_WIDTH) | 854 VC4_SET_FIELD(vc4_state->crtc_h, 855 SCALER_POS1_SCL_HEIGHT)); 856 } 857 858 /* Position Word 2: Source Image Size, Alpha */ 859 vc4_state->pos2_offset = vc4_state->dlist_count; 860 vc4_dlist_write(vc4_state, 861 VC4_SET_FIELD(fb->format->has_alpha ? 862 SCALER_POS2_ALPHA_MODE_PIPELINE : 863 SCALER_POS2_ALPHA_MODE_FIXED, 864 SCALER_POS2_ALPHA_MODE) | 865 (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | 866 (fb->format->has_alpha ? 867 SCALER_POS2_ALPHA_PREMULT : 0) | 868 VC4_SET_FIELD(vc4_state->src_w[0], 869 SCALER_POS2_WIDTH) | 870 VC4_SET_FIELD(vc4_state->src_h[0], 871 SCALER_POS2_HEIGHT)); 872 873 /* Position Word 3: Context. Written by the HVS. */ 874 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 875 876 } else { 877 u32 hvs_pixel_order = format->pixel_order; 878 879 if (format->pixel_order_hvs5) 880 hvs_pixel_order = format->pixel_order_hvs5; 881 882 /* Control word */ 883 vc4_dlist_write(vc4_state, 884 SCALER_CTL0_VALID | 885 (hvs_pixel_order << SCALER_CTL0_ORDER_SHIFT) | 886 (hvs_format << SCALER_CTL0_PIXEL_FORMAT_SHIFT) | 887 VC4_SET_FIELD(tiling, SCALER_CTL0_TILING) | 888 (vc4_state->is_unity ? 889 SCALER5_CTL0_UNITY : 0) | 890 VC4_SET_FIELD(scl0, SCALER_CTL0_SCL0) | 891 VC4_SET_FIELD(scl1, SCALER_CTL0_SCL1) | 892 SCALER5_CTL0_ALPHA_EXPAND | 893 SCALER5_CTL0_RGB_EXPAND); 894 895 /* Position Word 0: Image Positions and Alpha Value */ 896 vc4_state->pos0_offset = vc4_state->dlist_count; 897 vc4_dlist_write(vc4_state, 898 (rotation & DRM_MODE_REFLECT_Y ? 899 SCALER5_POS0_VFLIP : 0) | 900 VC4_SET_FIELD(vc4_state->crtc_x, 901 SCALER_POS0_START_X) | 902 (rotation & DRM_MODE_REFLECT_X ? 903 SCALER5_POS0_HFLIP : 0) | 904 VC4_SET_FIELD(vc4_state->crtc_y, 905 SCALER5_POS0_START_Y) 906 ); 907 908 /* Control Word 2 */ 909 vc4_dlist_write(vc4_state, 910 VC4_SET_FIELD(state->alpha >> 4, 911 SCALER5_CTL2_ALPHA) | 912 (fb->format->has_alpha ? 913 SCALER5_CTL2_ALPHA_PREMULT : 0) | 914 (mix_plane_alpha ? 915 SCALER5_CTL2_ALPHA_MIX : 0) | 916 VC4_SET_FIELD(fb->format->has_alpha ? 917 SCALER5_CTL2_ALPHA_MODE_PIPELINE : 918 SCALER5_CTL2_ALPHA_MODE_FIXED, 919 SCALER5_CTL2_ALPHA_MODE) 920 ); 921 922 /* Position Word 1: Scaled Image Dimensions. */ 923 if (!vc4_state->is_unity) { 924 vc4_dlist_write(vc4_state, 925 VC4_SET_FIELD(vc4_state->crtc_w, 926 SCALER5_POS1_SCL_WIDTH) | 927 VC4_SET_FIELD(vc4_state->crtc_h, 928 SCALER5_POS1_SCL_HEIGHT)); 929 } 930 931 /* Position Word 2: Source Image Size */ 932 vc4_state->pos2_offset = vc4_state->dlist_count; 933 vc4_dlist_write(vc4_state, 934 VC4_SET_FIELD(vc4_state->src_w[0], 935 SCALER5_POS2_WIDTH) | 936 VC4_SET_FIELD(vc4_state->src_h[0], 937 SCALER5_POS2_HEIGHT)); 938 939 /* Position Word 3: Context. Written by the HVS. */ 940 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 941 } 942 943 944 /* Pointer Word 0/1/2: RGB / Y / Cb / Cr Pointers 945 * 946 * The pointers may be any byte address. 947 */ 948 vc4_state->ptr0_offset = vc4_state->dlist_count; 949 for (i = 0; i < num_planes; i++) 950 vc4_dlist_write(vc4_state, vc4_state->offsets[i]); 951 952 /* Pointer Context Word 0/1/2: Written by the HVS */ 953 for (i = 0; i < num_planes; i++) 954 vc4_dlist_write(vc4_state, 0xc0c0c0c0); 955 956 /* Pitch word 0 */ 957 vc4_dlist_write(vc4_state, pitch0); 958 959 /* Pitch word 1/2 */ 960 for (i = 1; i < num_planes; i++) { 961 if (hvs_format != HVS_PIXEL_FORMAT_H264) { 962 vc4_dlist_write(vc4_state, 963 VC4_SET_FIELD(fb->pitches[i], 964 SCALER_SRC_PITCH)); 965 } else { 966 vc4_dlist_write(vc4_state, pitch0); 967 } 968 } 969 970 /* Colorspace conversion words */ 971 if (vc4_state->is_yuv) { 972 vc4_dlist_write(vc4_state, SCALER_CSC0_ITR_R_601_5); 973 vc4_dlist_write(vc4_state, SCALER_CSC1_ITR_R_601_5); 974 vc4_dlist_write(vc4_state, SCALER_CSC2_ITR_R_601_5); 975 } 976 977 vc4_state->lbm_offset = 0; 978 979 if (vc4_state->x_scaling[0] != VC4_SCALING_NONE || 980 vc4_state->x_scaling[1] != VC4_SCALING_NONE || 981 vc4_state->y_scaling[0] != VC4_SCALING_NONE || 982 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 983 /* Reserve a slot for the LBM Base Address. The real value will 984 * be set when calling vc4_plane_allocate_lbm(). 985 */ 986 if (vc4_state->y_scaling[0] != VC4_SCALING_NONE || 987 vc4_state->y_scaling[1] != VC4_SCALING_NONE) { 988 vc4_state->lbm_offset = vc4_state->dlist_count; 989 vc4_dlist_counter_increment(vc4_state); 990 } 991 992 if (num_planes > 1) { 993 /* Emit Cb/Cr as channel 0 and Y as channel 994 * 1. This matches how we set up scl0/scl1 995 * above. 996 */ 997 vc4_write_scaling_parameters(state, 1); 998 } 999 vc4_write_scaling_parameters(state, 0); 1000 1001 /* If any PPF setup was done, then all the kernel 1002 * pointers get uploaded. 1003 */ 1004 if (vc4_state->x_scaling[0] == VC4_SCALING_PPF || 1005 vc4_state->y_scaling[0] == VC4_SCALING_PPF || 1006 vc4_state->x_scaling[1] == VC4_SCALING_PPF || 1007 vc4_state->y_scaling[1] == VC4_SCALING_PPF) { 1008 u32 kernel = VC4_SET_FIELD(vc4->hvs->mitchell_netravali_filter.start, 1009 SCALER_PPF_KERNEL_OFFSET); 1010 1011 /* HPPF plane 0 */ 1012 vc4_dlist_write(vc4_state, kernel); 1013 /* VPPF plane 0 */ 1014 vc4_dlist_write(vc4_state, kernel); 1015 /* HPPF plane 1 */ 1016 vc4_dlist_write(vc4_state, kernel); 1017 /* VPPF plane 1 */ 1018 vc4_dlist_write(vc4_state, kernel); 1019 } 1020 } 1021 1022 vc4_state->dlist[ctl0_offset] |= 1023 VC4_SET_FIELD(vc4_state->dlist_count, SCALER_CTL0_SIZE); 1024 1025 /* crtc_* are already clipped coordinates. */ 1026 covers_screen = vc4_state->crtc_x == 0 && vc4_state->crtc_y == 0 && 1027 vc4_state->crtc_w == state->crtc->mode.hdisplay && 1028 vc4_state->crtc_h == state->crtc->mode.vdisplay; 1029 /* Background fill might be necessary when the plane has per-pixel 1030 * alpha content or a non-opaque plane alpha and could blend from the 1031 * background or does not cover the entire screen. 1032 */ 1033 vc4_state->needs_bg_fill = fb->format->has_alpha || !covers_screen || 1034 state->alpha != DRM_BLEND_ALPHA_OPAQUE; 1035 1036 /* Flag the dlist as initialized to avoid checking it twice in case 1037 * the async update check already called vc4_plane_mode_set() and 1038 * decided to fallback to sync update because async update was not 1039 * possible. 1040 */ 1041 vc4_state->dlist_initialized = 1; 1042 1043 vc4_plane_calc_load(state); 1044 1045 return 0; 1046} 1047 1048/* If a modeset involves changing the setup of a plane, the atomic 1049 * infrastructure will call this to validate a proposed plane setup. 1050 * However, if a plane isn't getting updated, this (and the 1051 * corresponding vc4_plane_atomic_update) won't get called. Thus, we 1052 * compute the dlist here and have all active plane dlists get updated 1053 * in the CRTC's flush. 1054 */ 1055static int vc4_plane_atomic_check(struct drm_plane *plane, 1056 struct drm_plane_state *state) 1057{ 1058 struct vc4_plane_state *vc4_state = to_vc4_plane_state(state); 1059 int ret; 1060 1061 vc4_state->dlist_count = 0; 1062 1063 if (!plane_enabled(state)) 1064 return 0; 1065 1066 ret = vc4_plane_mode_set(plane, state); 1067 if (ret) 1068 return ret; 1069 1070 return vc4_plane_allocate_lbm(state); 1071} 1072 1073static void vc4_plane_atomic_update(struct drm_plane *plane, 1074 struct drm_plane_state *old_state) 1075{ 1076 /* No contents here. Since we don't know where in the CRTC's 1077 * dlist we should be stored, our dlist is uploaded to the 1078 * hardware with vc4_plane_write_dlist() at CRTC atomic_flush 1079 * time. 1080 */ 1081} 1082 1083u32 vc4_plane_write_dlist(struct drm_plane *plane, u32 __iomem *dlist) 1084{ 1085 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1086 int i; 1087 1088 vc4_state->hw_dlist = dlist; 1089 1090 /* Can't memcpy_toio() because it needs to be 32-bit writes. */ 1091 for (i = 0; i < vc4_state->dlist_count; i++) 1092 writel(vc4_state->dlist[i], &dlist[i]); 1093 1094 return vc4_state->dlist_count; 1095} 1096 1097u32 vc4_plane_dlist_size(const struct drm_plane_state *state) 1098{ 1099 const struct vc4_plane_state *vc4_state = 1100 container_of(state, typeof(*vc4_state), base); 1101 1102 return vc4_state->dlist_count; 1103} 1104 1105/* Updates the plane to immediately (well, once the FIFO needs 1106 * refilling) scan out from at a new framebuffer. 1107 */ 1108void vc4_plane_async_set_fb(struct drm_plane *plane, struct drm_framebuffer *fb) 1109{ 1110 struct vc4_plane_state *vc4_state = to_vc4_plane_state(plane->state); 1111 struct drm_gem_cma_object *bo = drm_fb_cma_get_gem_obj(fb, 0); 1112 uint32_t addr; 1113 1114 /* We're skipping the address adjustment for negative origin, 1115 * because this is only called on the primary plane. 1116 */ 1117 WARN_ON_ONCE(plane->state->crtc_x < 0 || plane->state->crtc_y < 0); 1118 addr = bo->paddr + fb->offsets[0]; 1119 1120 /* Write the new address into the hardware immediately. The 1121 * scanout will start from this address as soon as the FIFO 1122 * needs to refill with pixels. 1123 */ 1124 writel(addr, &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1125 1126 /* Also update the CPU-side dlist copy, so that any later 1127 * atomic updates that don't do a new modeset on our plane 1128 * also use our updated address. 1129 */ 1130 vc4_state->dlist[vc4_state->ptr0_offset] = addr; 1131} 1132 1133static void vc4_plane_atomic_async_update(struct drm_plane *plane, 1134 struct drm_plane_state *state) 1135{ 1136 struct vc4_plane_state *vc4_state, *new_vc4_state; 1137 1138 swap(plane->state->fb, state->fb); 1139 plane->state->crtc_x = state->crtc_x; 1140 plane->state->crtc_y = state->crtc_y; 1141 plane->state->crtc_w = state->crtc_w; 1142 plane->state->crtc_h = state->crtc_h; 1143 plane->state->src_x = state->src_x; 1144 plane->state->src_y = state->src_y; 1145 plane->state->src_w = state->src_w; 1146 plane->state->src_h = state->src_h; 1147 plane->state->src_h = state->src_h; 1148 plane->state->alpha = state->alpha; 1149 plane->state->pixel_blend_mode = state->pixel_blend_mode; 1150 plane->state->rotation = state->rotation; 1151 plane->state->zpos = state->zpos; 1152 plane->state->normalized_zpos = state->normalized_zpos; 1153 plane->state->color_encoding = state->color_encoding; 1154 plane->state->color_range = state->color_range; 1155 plane->state->src = state->src; 1156 plane->state->dst = state->dst; 1157 plane->state->visible = state->visible; 1158 1159 new_vc4_state = to_vc4_plane_state(state); 1160 vc4_state = to_vc4_plane_state(plane->state); 1161 1162 vc4_state->crtc_x = new_vc4_state->crtc_x; 1163 vc4_state->crtc_y = new_vc4_state->crtc_y; 1164 vc4_state->crtc_h = new_vc4_state->crtc_h; 1165 vc4_state->crtc_w = new_vc4_state->crtc_w; 1166 vc4_state->src_x = new_vc4_state->src_x; 1167 vc4_state->src_y = new_vc4_state->src_y; 1168 memcpy(vc4_state->src_w, new_vc4_state->src_w, 1169 sizeof(vc4_state->src_w)); 1170 memcpy(vc4_state->src_h, new_vc4_state->src_h, 1171 sizeof(vc4_state->src_h)); 1172 memcpy(vc4_state->x_scaling, new_vc4_state->x_scaling, 1173 sizeof(vc4_state->x_scaling)); 1174 memcpy(vc4_state->y_scaling, new_vc4_state->y_scaling, 1175 sizeof(vc4_state->y_scaling)); 1176 vc4_state->is_unity = new_vc4_state->is_unity; 1177 vc4_state->is_yuv = new_vc4_state->is_yuv; 1178 memcpy(vc4_state->offsets, new_vc4_state->offsets, 1179 sizeof(vc4_state->offsets)); 1180 vc4_state->needs_bg_fill = new_vc4_state->needs_bg_fill; 1181 1182 /* Update the current vc4_state pos0, pos2 and ptr0 dlist entries. */ 1183 vc4_state->dlist[vc4_state->pos0_offset] = 1184 new_vc4_state->dlist[vc4_state->pos0_offset]; 1185 vc4_state->dlist[vc4_state->pos2_offset] = 1186 new_vc4_state->dlist[vc4_state->pos2_offset]; 1187 vc4_state->dlist[vc4_state->ptr0_offset] = 1188 new_vc4_state->dlist[vc4_state->ptr0_offset]; 1189 1190 /* Note that we can't just call vc4_plane_write_dlist() 1191 * because that would smash the context data that the HVS is 1192 * currently using. 1193 */ 1194 writel(vc4_state->dlist[vc4_state->pos0_offset], 1195 &vc4_state->hw_dlist[vc4_state->pos0_offset]); 1196 writel(vc4_state->dlist[vc4_state->pos2_offset], 1197 &vc4_state->hw_dlist[vc4_state->pos2_offset]); 1198 writel(vc4_state->dlist[vc4_state->ptr0_offset], 1199 &vc4_state->hw_dlist[vc4_state->ptr0_offset]); 1200} 1201 1202static int vc4_plane_atomic_async_check(struct drm_plane *plane, 1203 struct drm_plane_state *state) 1204{ 1205 struct vc4_plane_state *old_vc4_state, *new_vc4_state; 1206 int ret; 1207 u32 i; 1208 1209 ret = vc4_plane_mode_set(plane, state); 1210 if (ret) 1211 return ret; 1212 1213 old_vc4_state = to_vc4_plane_state(plane->state); 1214 new_vc4_state = to_vc4_plane_state(state); 1215 if (old_vc4_state->dlist_count != new_vc4_state->dlist_count || 1216 old_vc4_state->pos0_offset != new_vc4_state->pos0_offset || 1217 old_vc4_state->pos2_offset != new_vc4_state->pos2_offset || 1218 old_vc4_state->ptr0_offset != new_vc4_state->ptr0_offset || 1219 vc4_lbm_size(plane->state) != vc4_lbm_size(state)) 1220 return -EINVAL; 1221 1222 /* Only pos0, pos2 and ptr0 DWORDS can be updated in an async update 1223 * if anything else has changed, fallback to a sync update. 1224 */ 1225 for (i = 0; i < new_vc4_state->dlist_count; i++) { 1226 if (i == new_vc4_state->pos0_offset || 1227 i == new_vc4_state->pos2_offset || 1228 i == new_vc4_state->ptr0_offset || 1229 (new_vc4_state->lbm_offset && 1230 i == new_vc4_state->lbm_offset)) 1231 continue; 1232 1233 if (new_vc4_state->dlist[i] != old_vc4_state->dlist[i]) 1234 return -EINVAL; 1235 } 1236 1237 return 0; 1238} 1239 1240static int vc4_prepare_fb(struct drm_plane *plane, 1241 struct drm_plane_state *state) 1242{ 1243 struct vc4_bo *bo; 1244 int ret; 1245 1246 if (!state->fb) 1247 return 0; 1248 1249 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1250 1251 drm_gem_fb_prepare_fb(plane, state); 1252 1253 if (plane->state->fb == state->fb) 1254 return 0; 1255 1256 ret = vc4_bo_inc_usecnt(bo); 1257 if (ret) 1258 return ret; 1259 1260 return 0; 1261} 1262 1263static void vc4_cleanup_fb(struct drm_plane *plane, 1264 struct drm_plane_state *state) 1265{ 1266 struct vc4_bo *bo; 1267 1268 if (plane->state->fb == state->fb || !state->fb) 1269 return; 1270 1271 bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base); 1272 vc4_bo_dec_usecnt(bo); 1273} 1274 1275static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = { 1276 .atomic_check = vc4_plane_atomic_check, 1277 .atomic_update = vc4_plane_atomic_update, 1278 .prepare_fb = vc4_prepare_fb, 1279 .cleanup_fb = vc4_cleanup_fb, 1280 .atomic_async_check = vc4_plane_atomic_async_check, 1281 .atomic_async_update = vc4_plane_atomic_async_update, 1282}; 1283 1284static void vc4_plane_destroy(struct drm_plane *plane) 1285{ 1286 drm_plane_cleanup(plane); 1287} 1288 1289static bool vc4_format_mod_supported(struct drm_plane *plane, 1290 uint32_t format, 1291 uint64_t modifier) 1292{ 1293 /* Support T_TILING for RGB formats only. */ 1294 switch (format) { 1295 case DRM_FORMAT_XRGB8888: 1296 case DRM_FORMAT_ARGB8888: 1297 case DRM_FORMAT_ABGR8888: 1298 case DRM_FORMAT_XBGR8888: 1299 case DRM_FORMAT_RGB565: 1300 case DRM_FORMAT_BGR565: 1301 case DRM_FORMAT_ARGB1555: 1302 case DRM_FORMAT_XRGB1555: 1303 switch (fourcc_mod_broadcom_mod(modifier)) { 1304 case DRM_FORMAT_MOD_LINEAR: 1305 case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: 1306 return true; 1307 default: 1308 return false; 1309 } 1310 case DRM_FORMAT_NV12: 1311 case DRM_FORMAT_NV21: 1312 switch (fourcc_mod_broadcom_mod(modifier)) { 1313 case DRM_FORMAT_MOD_LINEAR: 1314 case DRM_FORMAT_MOD_BROADCOM_SAND64: 1315 case DRM_FORMAT_MOD_BROADCOM_SAND128: 1316 case DRM_FORMAT_MOD_BROADCOM_SAND256: 1317 return true; 1318 default: 1319 return false; 1320 } 1321 case DRM_FORMAT_RGBX1010102: 1322 case DRM_FORMAT_BGRX1010102: 1323 case DRM_FORMAT_RGBA1010102: 1324 case DRM_FORMAT_BGRA1010102: 1325 case DRM_FORMAT_YUV422: 1326 case DRM_FORMAT_YVU422: 1327 case DRM_FORMAT_YUV420: 1328 case DRM_FORMAT_YVU420: 1329 case DRM_FORMAT_NV16: 1330 case DRM_FORMAT_NV61: 1331 default: 1332 return (modifier == DRM_FORMAT_MOD_LINEAR); 1333 } 1334} 1335 1336static const struct drm_plane_funcs vc4_plane_funcs = { 1337 .update_plane = drm_atomic_helper_update_plane, 1338 .disable_plane = drm_atomic_helper_disable_plane, 1339 .destroy = vc4_plane_destroy, 1340 .set_property = NULL, 1341 .reset = vc4_plane_reset, 1342 .atomic_duplicate_state = vc4_plane_duplicate_state, 1343 .atomic_destroy_state = vc4_plane_destroy_state, 1344 .format_mod_supported = vc4_format_mod_supported, 1345}; 1346 1347struct drm_plane *vc4_plane_init(struct drm_device *dev, 1348 enum drm_plane_type type) 1349{ 1350 struct drm_plane *plane = NULL; 1351 struct vc4_plane *vc4_plane; 1352 u32 formats[ARRAY_SIZE(hvs_formats)]; 1353 int ret = 0; 1354 unsigned i; 1355 static const uint64_t modifiers[] = { 1356 DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED, 1357 DRM_FORMAT_MOD_BROADCOM_SAND128, 1358 DRM_FORMAT_MOD_BROADCOM_SAND64, 1359 DRM_FORMAT_MOD_BROADCOM_SAND256, 1360 DRM_FORMAT_MOD_LINEAR, 1361 DRM_FORMAT_MOD_INVALID 1362 }; 1363 1364 vc4_plane = devm_kzalloc(dev->dev, sizeof(*vc4_plane), 1365 GFP_KERNEL); 1366 if (!vc4_plane) 1367 return ERR_PTR(-ENOMEM); 1368 1369 for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) 1370 formats[i] = hvs_formats[i].drm; 1371 1372 plane = &vc4_plane->base; 1373 ret = drm_universal_plane_init(dev, plane, 0, 1374 &vc4_plane_funcs, 1375 formats, ARRAY_SIZE(formats), 1376 modifiers, type, NULL); 1377 if (ret) 1378 return ERR_PTR(ret); 1379 1380 drm_plane_helper_add(plane, &vc4_plane_helper_funcs); 1381 1382 drm_plane_create_alpha_property(plane); 1383 drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0, 1384 DRM_MODE_ROTATE_0 | 1385 DRM_MODE_ROTATE_180 | 1386 DRM_MODE_REFLECT_X | 1387 DRM_MODE_REFLECT_Y); 1388 1389 return plane; 1390} 1391 1392int vc4_plane_create_additional_planes(struct drm_device *drm) 1393{ 1394 struct drm_plane *cursor_plane; 1395 struct drm_crtc *crtc; 1396 unsigned int i; 1397 1398 /* Set up some arbitrary number of planes. We're not limited 1399 * by a set number of physical registers, just the space in 1400 * the HVS (16k) and how small an plane can be (28 bytes). 1401 * However, each plane we set up takes up some memory, and 1402 * increases the cost of looping over planes, which atomic 1403 * modesetting does quite a bit. As a result, we pick a 1404 * modest number of planes to expose, that should hopefully 1405 * still cover any sane usecase. 1406 */ 1407 for (i = 0; i < 16; i++) { 1408 struct drm_plane *plane = 1409 vc4_plane_init(drm, DRM_PLANE_TYPE_OVERLAY); 1410 1411 if (IS_ERR(plane)) 1412 continue; 1413 1414 plane->possible_crtcs = 1415 GENMASK(drm->mode_config.num_crtc - 1, 0); 1416 } 1417 1418 drm_for_each_crtc(crtc, drm) { 1419 /* Set up the legacy cursor after overlay initialization, 1420 * since we overlay planes on the CRTC in the order they were 1421 * initialized. 1422 */ 1423 cursor_plane = vc4_plane_init(drm, DRM_PLANE_TYPE_CURSOR); 1424 if (!IS_ERR(cursor_plane)) { 1425 cursor_plane->possible_crtcs = drm_crtc_mask(crtc); 1426 crtc->cursor = cursor_plane; 1427 } 1428 } 1429 1430 return 0; 1431} 1432