1/* 2 * Copyright © 2014-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/format/u_format.h" 25#include "util/half_float.h" 26#include "v3d_context.h" 27#include "broadcom/common/v3d_macros.h" 28#include "broadcom/cle/v3dx_pack.h" 29#include "broadcom/common/v3d_util.h" 30#include "broadcom/compiler/v3d_compiler.h" 31 32static uint8_t 33v3d_factor(enum pipe_blendfactor factor, bool dst_alpha_one) 34{ 35 /* We may get a bad blendfactor when blending is disabled. */ 36 if (factor == 0) 37 return V3D_BLEND_FACTOR_ZERO; 38 39 switch (factor) { 40 case PIPE_BLENDFACTOR_ZERO: 41 return V3D_BLEND_FACTOR_ZERO; 42 case PIPE_BLENDFACTOR_ONE: 43 return V3D_BLEND_FACTOR_ONE; 44 case PIPE_BLENDFACTOR_SRC_COLOR: 45 return V3D_BLEND_FACTOR_SRC_COLOR; 46 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 47 return V3D_BLEND_FACTOR_INV_SRC_COLOR; 48 case PIPE_BLENDFACTOR_DST_COLOR: 49 return V3D_BLEND_FACTOR_DST_COLOR; 50 case PIPE_BLENDFACTOR_INV_DST_COLOR: 51 return V3D_BLEND_FACTOR_INV_DST_COLOR; 52 case PIPE_BLENDFACTOR_SRC_ALPHA: 53 return V3D_BLEND_FACTOR_SRC_ALPHA; 54 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 55 return V3D_BLEND_FACTOR_INV_SRC_ALPHA; 56 case PIPE_BLENDFACTOR_DST_ALPHA: 57 return (dst_alpha_one ? 58 V3D_BLEND_FACTOR_ONE : 59 V3D_BLEND_FACTOR_DST_ALPHA); 60 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 61 return (dst_alpha_one ? 62 V3D_BLEND_FACTOR_ZERO : 63 V3D_BLEND_FACTOR_INV_DST_ALPHA); 64 case PIPE_BLENDFACTOR_CONST_COLOR: 65 return V3D_BLEND_FACTOR_CONST_COLOR; 66 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 67 return V3D_BLEND_FACTOR_INV_CONST_COLOR; 68 case PIPE_BLENDFACTOR_CONST_ALPHA: 69 return V3D_BLEND_FACTOR_CONST_ALPHA; 70 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 71 return V3D_BLEND_FACTOR_INV_CONST_ALPHA; 72 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 73 return (dst_alpha_one ? 74 V3D_BLEND_FACTOR_ZERO : 75 V3D_BLEND_FACTOR_SRC_ALPHA_SATURATE); 76 default: 77 unreachable("Bad blend factor"); 78 } 79} 80 81#if V3D_VERSION < 40 82static inline uint16_t 83swizzled_border_color(const struct v3d_device_info *devinfo, 84 struct pipe_sampler_state *sampler, 85 struct v3d_sampler_view *sview, 86 int chan) 87{ 88 const struct util_format_description *desc = 89 util_format_description(sview->base.format); 90 uint8_t swiz = chan; 91 92 /* If we're doing swizzling in the sampler, then only rearrange the 93 * border color for the mismatch between the V3D texture format and 94 * the PIPE_FORMAT, since GL_ARB_texture_swizzle will be handled by 95 * the sampler's swizzle. 96 * 97 * For swizzling in the shader, we don't do any pre-swizzling of the 98 * border color. 99 */ 100 if (v3d_get_tex_return_size(devinfo, sview->base.format, 101 sampler->compare_mode) != 32) 102 swiz = desc->swizzle[swiz]; 103 104 switch (swiz) { 105 case PIPE_SWIZZLE_0: 106 return _mesa_float_to_half(0.0); 107 case PIPE_SWIZZLE_1: 108 return _mesa_float_to_half(1.0); 109 default: 110 return _mesa_float_to_half(sampler->border_color.f[swiz]); 111 } 112} 113 114static void 115emit_one_texture(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex, 116 int i) 117{ 118 struct v3d_job *job = v3d->job; 119 struct pipe_sampler_state *psampler = stage_tex->samplers[i]; 120 struct v3d_sampler_state *sampler = v3d_sampler_state(psampler); 121 struct pipe_sampler_view *psview = stage_tex->textures[i]; 122 struct v3d_sampler_view *sview = v3d_sampler_view(psview); 123 struct pipe_resource *prsc = psview->texture; 124 struct v3d_resource *rsc = v3d_resource(prsc); 125 const struct v3d_device_info *devinfo = &v3d->screen->devinfo; 126 127 stage_tex->texture_state[i].offset = 128 v3d_cl_ensure_space(&job->indirect, 129 cl_packet_length(TEXTURE_SHADER_STATE), 130 32); 131 v3d_bo_set_reference(&stage_tex->texture_state[i].bo, 132 job->indirect.bo); 133 134 uint32_t return_size = v3d_get_tex_return_size(devinfo, psview->format, 135 psampler->compare_mode); 136 137 struct V3D33_TEXTURE_SHADER_STATE unpacked = { 138 /* XXX */ 139 .border_color_red = swizzled_border_color(devinfo, psampler, 140 sview, 0), 141 .border_color_green = swizzled_border_color(devinfo, psampler, 142 sview, 1), 143 .border_color_blue = swizzled_border_color(devinfo, psampler, 144 sview, 2), 145 .border_color_alpha = swizzled_border_color(devinfo, psampler, 146 sview, 3), 147 148 /* In the normal texturing path, the LOD gets clamped between 149 * min/max, and the base_level field (set in the sampler view 150 * from first_level) only decides where the min/mag switch 151 * happens, so we need to use the LOD clamps to keep us 152 * between min and max. 153 * 154 * For txf, the LOD clamp is still used, despite GL not 155 * wanting that. We will need to have a separate 156 * TEXTURE_SHADER_STATE that ignores psview->min/max_lod to 157 * support txf properly. 158 */ 159 .min_level_of_detail = MIN2(psview->u.tex.first_level + 160 MAX2(psampler->min_lod, 0), 161 psview->u.tex.last_level), 162 .max_level_of_detail = MIN2(psview->u.tex.first_level + 163 MAX2(psampler->max_lod, 164 psampler->min_lod), 165 psview->u.tex.last_level), 166 167 .texture_base_pointer = cl_address(rsc->bo, 168 rsc->slices[0].offset), 169 170 .output_32_bit = return_size == 32, 171 }; 172 173 /* Set up the sampler swizzle if we're doing 16-bit sampling. For 174 * 32-bit, we leave swizzling up to the shader compiler. 175 * 176 * Note: Contrary to the docs, the swizzle still applies even if the 177 * return size is 32. It's just that you probably want to swizzle in 178 * the shader, because you need the Y/Z/W channels to be defined. 179 */ 180 if (return_size == 32) { 181 unpacked.swizzle_r = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_X); 182 unpacked.swizzle_g = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Y); 183 unpacked.swizzle_b = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_Z); 184 unpacked.swizzle_a = v3d_translate_pipe_swizzle(PIPE_SWIZZLE_W); 185 } else { 186 unpacked.swizzle_r = v3d_translate_pipe_swizzle(sview->swizzle[0]); 187 unpacked.swizzle_g = v3d_translate_pipe_swizzle(sview->swizzle[1]); 188 unpacked.swizzle_b = v3d_translate_pipe_swizzle(sview->swizzle[2]); 189 unpacked.swizzle_a = v3d_translate_pipe_swizzle(sview->swizzle[3]); 190 } 191 192 int min_img_filter = psampler->min_img_filter; 193 int min_mip_filter = psampler->min_mip_filter; 194 int mag_img_filter = psampler->mag_img_filter; 195 196 if (return_size == 32) { 197 min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; 198 min_img_filter = PIPE_TEX_FILTER_NEAREST; 199 mag_img_filter = PIPE_TEX_FILTER_NEAREST; 200 } 201 202 bool min_nearest = min_img_filter == PIPE_TEX_FILTER_NEAREST; 203 switch (min_mip_filter) { 204 case PIPE_TEX_MIPFILTER_NONE: 205 unpacked.filter += min_nearest ? 2 : 0; 206 break; 207 case PIPE_TEX_MIPFILTER_NEAREST: 208 unpacked.filter += min_nearest ? 4 : 8; 209 break; 210 case PIPE_TEX_MIPFILTER_LINEAR: 211 unpacked.filter += min_nearest ? 4 : 8; 212 unpacked.filter += 2; 213 break; 214 } 215 216 if (mag_img_filter == PIPE_TEX_FILTER_NEAREST) 217 unpacked.filter++; 218 219 if (psampler->max_anisotropy > 8) 220 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_16_1; 221 else if (psampler->max_anisotropy > 4) 222 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_8_1; 223 else if (psampler->max_anisotropy > 2) 224 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_4_1; 225 else if (psampler->max_anisotropy) 226 unpacked.filter = V3D_TMU_FILTER_ANISOTROPIC_2_1; 227 228 uint8_t packed[cl_packet_length(TEXTURE_SHADER_STATE)]; 229 cl_packet_pack(TEXTURE_SHADER_STATE)(&job->indirect, packed, &unpacked); 230 231 for (int i = 0; i < ARRAY_SIZE(packed); i++) 232 packed[i] |= sview->texture_shader_state[i] | sampler->texture_shader_state[i]; 233 234 /* TMU indirect structs need to be 32b aligned. */ 235 v3d_cl_ensure_space(&job->indirect, ARRAY_SIZE(packed), 32); 236 cl_emit_prepacked(&job->indirect, &packed); 237} 238 239static void 240emit_textures(struct v3d_context *v3d, struct v3d_texture_stateobj *stage_tex) 241{ 242 for (int i = 0; i < stage_tex->num_textures; i++) { 243 if (stage_tex->textures[i]) 244 emit_one_texture(v3d, stage_tex, i); 245 } 246} 247#endif /* V3D_VERSION < 40 */ 248 249static uint32_t 250translate_colormask(struct v3d_context *v3d, uint32_t colormask, int rt) 251{ 252 if (v3d->swap_color_rb & (1 << rt)) { 253 colormask = ((colormask & (2 | 8)) | 254 ((colormask & 1) << 2) | 255 ((colormask & 4) >> 2)); 256 } 257 258 return (~colormask) & 0xf; 259} 260 261static void 262emit_rt_blend(struct v3d_context *v3d, struct v3d_job *job, 263 struct pipe_blend_state *blend, int rt, uint8_t rt_mask, 264 bool blend_dst_alpha_one) 265{ 266 struct pipe_rt_blend_state *rtblend = &blend->rt[rt]; 267 268#if V3D_VERSION >= 40 269 /* We don't need to emit blend state for disabled RTs. */ 270 if (!rtblend->blend_enable) 271 return; 272#endif 273 274 cl_emit(&job->bcl, BLEND_CFG, config) { 275#if V3D_VERSION >= 40 276 config.render_target_mask = rt_mask; 277#else 278 assert(rt == 0); 279#endif 280 281 config.color_blend_mode = rtblend->rgb_func; 282 config.color_blend_dst_factor = 283 v3d_factor(rtblend->rgb_dst_factor, 284 blend_dst_alpha_one); 285 config.color_blend_src_factor = 286 v3d_factor(rtblend->rgb_src_factor, 287 blend_dst_alpha_one); 288 289 config.alpha_blend_mode = rtblend->alpha_func; 290 config.alpha_blend_dst_factor = 291 v3d_factor(rtblend->alpha_dst_factor, 292 blend_dst_alpha_one); 293 config.alpha_blend_src_factor = 294 v3d_factor(rtblend->alpha_src_factor, 295 blend_dst_alpha_one); 296 } 297} 298 299static void 300emit_flat_shade_flags(struct v3d_job *job, 301 int varying_offset, 302 uint32_t varyings, 303 enum V3DX(Varying_Flags_Action) lower, 304 enum V3DX(Varying_Flags_Action) higher) 305{ 306 cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { 307 flags.varying_offset_v0 = varying_offset; 308 flags.flat_shade_flags_for_varyings_v024 = varyings; 309 flags.action_for_flat_shade_flags_of_lower_numbered_varyings = 310 lower; 311 flags.action_for_flat_shade_flags_of_higher_numbered_varyings = 312 higher; 313 } 314} 315 316#if V3D_VERSION >= 40 317static void 318emit_noperspective_flags(struct v3d_job *job, 319 int varying_offset, 320 uint32_t varyings, 321 enum V3DX(Varying_Flags_Action) lower, 322 enum V3DX(Varying_Flags_Action) higher) 323{ 324 cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) { 325 flags.varying_offset_v0 = varying_offset; 326 flags.non_perspective_flags_for_varyings_v024 = varyings; 327 flags.action_for_non_perspective_flags_of_lower_numbered_varyings = 328 lower; 329 flags.action_for_non_perspective_flags_of_higher_numbered_varyings = 330 higher; 331 } 332} 333 334static void 335emit_centroid_flags(struct v3d_job *job, 336 int varying_offset, 337 uint32_t varyings, 338 enum V3DX(Varying_Flags_Action) lower, 339 enum V3DX(Varying_Flags_Action) higher) 340{ 341 cl_emit(&job->bcl, CENTROID_FLAGS, flags) { 342 flags.varying_offset_v0 = varying_offset; 343 flags.centroid_flags_for_varyings_v024 = varyings; 344 flags.action_for_centroid_flags_of_lower_numbered_varyings = 345 lower; 346 flags.action_for_centroid_flags_of_higher_numbered_varyings = 347 higher; 348 } 349} 350#endif /* V3D_VERSION >= 40 */ 351 352static bool 353emit_varying_flags(struct v3d_job *job, uint32_t *flags, 354 void (*flag_emit_callback)(struct v3d_job *job, 355 int varying_offset, 356 uint32_t flags, 357 enum V3DX(Varying_Flags_Action) lower, 358 enum V3DX(Varying_Flags_Action) higher)) 359{ 360 struct v3d_context *v3d = job->v3d; 361 bool emitted_any = false; 362 363 for (int i = 0; i < ARRAY_SIZE(v3d->prog.fs->prog_data.fs->flat_shade_flags); i++) { 364 if (!flags[i]) 365 continue; 366 367 if (emitted_any) { 368 flag_emit_callback(job, i, flags[i], 369 V3D_VARYING_FLAGS_ACTION_UNCHANGED, 370 V3D_VARYING_FLAGS_ACTION_UNCHANGED); 371 } else if (i == 0) { 372 flag_emit_callback(job, i, flags[i], 373 V3D_VARYING_FLAGS_ACTION_UNCHANGED, 374 V3D_VARYING_FLAGS_ACTION_ZEROED); 375 } else { 376 flag_emit_callback(job, i, flags[i], 377 V3D_VARYING_FLAGS_ACTION_ZEROED, 378 V3D_VARYING_FLAGS_ACTION_ZEROED); 379 } 380 emitted_any = true; 381 } 382 383 return emitted_any; 384} 385 386static inline struct v3d_uncompiled_shader * 387get_tf_shader(struct v3d_context *v3d) 388{ 389 if (v3d->prog.bind_gs) 390 return v3d->prog.bind_gs; 391 else 392 return v3d->prog.bind_vs; 393} 394 395void 396v3dX(emit_state)(struct pipe_context *pctx) 397{ 398 struct v3d_context *v3d = v3d_context(pctx); 399 struct v3d_job *job = v3d->job; 400 bool rasterizer_discard = v3d->rasterizer->base.rasterizer_discard; 401 402 if (v3d->dirty & (V3D_DIRTY_SCISSOR | V3D_DIRTY_VIEWPORT | 403 V3D_DIRTY_RASTERIZER)) { 404 float *vpscale = v3d->viewport.scale; 405 float *vptranslate = v3d->viewport.translate; 406 float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; 407 float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; 408 float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; 409 float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; 410 411 /* Clip to the scissor if it's enabled, but still clip to the 412 * drawable regardless since that controls where the binner 413 * tries to put things. 414 * 415 * Additionally, always clip the rendering to the viewport, 416 * since the hardware does guardband clipping, meaning 417 * primitives would rasterize outside of the view volume. 418 */ 419 uint32_t minx, miny, maxx, maxy; 420 if (!v3d->rasterizer->base.scissor) { 421 minx = MAX2(vp_minx, 0); 422 miny = MAX2(vp_miny, 0); 423 maxx = MIN2(vp_maxx, job->draw_width); 424 maxy = MIN2(vp_maxy, job->draw_height); 425 } else { 426 minx = MAX2(vp_minx, v3d->scissor.minx); 427 miny = MAX2(vp_miny, v3d->scissor.miny); 428 maxx = MIN2(vp_maxx, v3d->scissor.maxx); 429 maxy = MIN2(vp_maxy, v3d->scissor.maxy); 430 } 431 432 cl_emit(&job->bcl, CLIP_WINDOW, clip) { 433 clip.clip_window_left_pixel_coordinate = minx; 434 clip.clip_window_bottom_pixel_coordinate = miny; 435 if (maxx > minx && maxy > miny) { 436 clip.clip_window_width_in_pixels = maxx - minx; 437 clip.clip_window_height_in_pixels = maxy - miny; 438 } else if (V3D_VERSION < 41) { 439 /* The HW won't entirely clip out when scissor 440 * w/h is 0. Just treat it the same as 441 * rasterizer discard. 442 */ 443 rasterizer_discard = true; 444 clip.clip_window_width_in_pixels = 1; 445 clip.clip_window_height_in_pixels = 1; 446 } 447 } 448 449 job->draw_min_x = MIN2(job->draw_min_x, minx); 450 job->draw_min_y = MIN2(job->draw_min_y, miny); 451 job->draw_max_x = MAX2(job->draw_max_x, maxx); 452 job->draw_max_y = MAX2(job->draw_max_y, maxy); 453 454 if (!v3d->rasterizer->base.scissor) { 455 job->scissor.disabled = true; 456 } else if (!job->scissor.disabled && 457 (v3d->dirty & V3D_DIRTY_SCISSOR)) { 458 if (job->scissor.count < MAX_JOB_SCISSORS) { 459 job->scissor.rects[job->scissor.count].min_x = 460 v3d->scissor.minx; 461 job->scissor.rects[job->scissor.count].min_y = 462 v3d->scissor.miny; 463 job->scissor.rects[job->scissor.count].max_x = 464 v3d->scissor.maxx - 1; 465 job->scissor.rects[job->scissor.count].max_y = 466 v3d->scissor.maxy - 1; 467 job->scissor.count++; 468 } else { 469 job->scissor.disabled = true; 470 perf_debug("Too many scissor rects."); 471 } 472 } 473 } 474 475 if (v3d->dirty & (V3D_DIRTY_RASTERIZER | 476 V3D_DIRTY_ZSA | 477 V3D_DIRTY_BLEND | 478 V3D_DIRTY_COMPILED_FS)) { 479 cl_emit(&job->bcl, CFG_BITS, config) { 480 config.enable_forward_facing_primitive = 481 !rasterizer_discard && 482 !(v3d->rasterizer->base.cull_face & 483 PIPE_FACE_FRONT); 484 config.enable_reverse_facing_primitive = 485 !rasterizer_discard && 486 !(v3d->rasterizer->base.cull_face & 487 PIPE_FACE_BACK); 488 /* This seems backwards, but it's what gets the 489 * clipflat test to pass. 490 */ 491 config.clockwise_primitives = 492 v3d->rasterizer->base.front_ccw; 493 494 config.enable_depth_offset = 495 v3d->rasterizer->base.offset_tri; 496 497 /* V3D follows GL behavior where the sample mask only 498 * applies when MSAA is enabled. Gallium has sample 499 * mask apply anyway, and the MSAA blit shaders will 500 * set sample mask without explicitly setting 501 * rasterizer oversample. Just force it on here, 502 * since the blit shaders are the only way to have 503 * !multisample && samplemask != 0xf. 504 */ 505 config.rasterizer_oversample_mode = 506 v3d->rasterizer->base.multisample || 507 v3d->sample_mask != 0xf; 508 509 config.direct3d_provoking_vertex = 510 v3d->rasterizer->base.flatshade_first; 511 512 config.blend_enable = v3d->blend->blend_enables; 513 514 /* Note: EZ state may update based on the compiled FS, 515 * along with ZSA 516 */ 517 config.early_z_updates_enable = 518 (job->ez_state != V3D_EZ_DISABLED); 519 if (v3d->zsa->base.depth_enabled) { 520 config.z_updates_enable = 521 v3d->zsa->base.depth_writemask; 522 config.early_z_enable = 523 config.early_z_updates_enable; 524 config.depth_test_function = 525 v3d->zsa->base.depth_func; 526 } else { 527 config.depth_test_function = PIPE_FUNC_ALWAYS; 528 } 529 530 config.stencil_enable = 531 v3d->zsa->base.stencil[0].enabled; 532 533 /* Use nicer line caps when line smoothing is 534 * enabled 535 */ 536 config.line_rasterization = 537 v3d_line_smoothing_enabled(v3d) ? 538 V3D_LINE_RASTERIZATION_PERP_END_CAPS : 539 V3D_LINE_RASTERIZATION_DIAMOND_EXIT; 540 } 541 542 } 543 544 if (v3d->dirty & V3D_DIRTY_RASTERIZER && 545 v3d->rasterizer->base.offset_tri) { 546 if (job->zsbuf && 547 job->zsbuf->format == PIPE_FORMAT_Z16_UNORM) { 548 cl_emit_prepacked_sized(&job->bcl, 549 v3d->rasterizer->depth_offset_z16, 550 cl_packet_length(DEPTH_OFFSET)); 551 } else { 552 cl_emit_prepacked_sized(&job->bcl, 553 v3d->rasterizer->depth_offset, 554 cl_packet_length(DEPTH_OFFSET)); 555 } 556 } 557 558 if (v3d->dirty & V3D_DIRTY_RASTERIZER) { 559 cl_emit(&job->bcl, POINT_SIZE, point_size) { 560 point_size.point_size = v3d->rasterizer->point_size; 561 } 562 563 cl_emit(&job->bcl, LINE_WIDTH, line_width) { 564 line_width.line_width = v3d_get_real_line_width(v3d); 565 } 566 } 567 568 if (v3d->dirty & V3D_DIRTY_VIEWPORT) { 569 cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { 570 clip.viewport_half_width_in_1_256th_of_pixel = 571 v3d->viewport.scale[0] * 256.0f; 572 clip.viewport_half_height_in_1_256th_of_pixel = 573 v3d->viewport.scale[1] * 256.0f; 574 } 575 576 cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { 577 clip.viewport_z_offset_zc_to_zs = 578 v3d->viewport.translate[2]; 579 clip.viewport_z_scale_zc_to_zs = 580 v3d->viewport.scale[2]; 581 } 582 cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { 583 float z1 = (v3d->viewport.translate[2] - 584 v3d->viewport.scale[2]); 585 float z2 = (v3d->viewport.translate[2] + 586 v3d->viewport.scale[2]); 587 clip.minimum_zw = MIN2(z1, z2); 588 clip.maximum_zw = MAX2(z1, z2); 589 } 590 591 cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { 592 vp.viewport_centre_x_coordinate = 593 v3d->viewport.translate[0]; 594 vp.viewport_centre_y_coordinate = 595 v3d->viewport.translate[1]; 596 } 597 } 598 599 if (v3d->dirty & V3D_DIRTY_BLEND) { 600 struct v3d_blend_state *blend = v3d->blend; 601 602 if (blend->blend_enables) { 603#if V3D_VERSION >= 40 604 cl_emit(&job->bcl, BLEND_ENABLES, enables) { 605 enables.mask = blend->blend_enables; 606 } 607#endif 608 609 if (blend->base.independent_blend_enable) { 610 for (int i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) 611 emit_rt_blend(v3d, job, &blend->base, i, 612 (1 << i), 613 v3d->blend_dst_alpha_one & (1 << i)); 614 } else if (v3d->blend_dst_alpha_one && 615 util_bitcount(v3d->blend_dst_alpha_one) < job->nr_cbufs) { 616 /* Even if we don't have independent per-RT 617 * blending, we may have a combination of RT 618 * formats were some RTs have an alpha channel 619 * and others don't. Since this affects how 620 * blending is performed, we also need to emit 621 * independent blend configurations in this 622 * case: one for RTs with alpha and one for 623 * RTs without. 624 */ 625 emit_rt_blend(v3d, job, &blend->base, 0, 626 ((1 << V3D_MAX_DRAW_BUFFERS) - 1) & 627 v3d->blend_dst_alpha_one, 628 true); 629 emit_rt_blend(v3d, job, &blend->base, 0, 630 ((1 << V3D_MAX_DRAW_BUFFERS) - 1) & 631 ~v3d->blend_dst_alpha_one, 632 false); 633 } else { 634 emit_rt_blend(v3d, job, &blend->base, 0, 635 (1 << V3D_MAX_DRAW_BUFFERS) - 1, 636 v3d->blend_dst_alpha_one); 637 } 638 } 639 } 640 641 if (v3d->dirty & V3D_DIRTY_BLEND) { 642 struct pipe_blend_state *blend = &v3d->blend->base; 643 644 cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) { 645 for (int i = 0; i < 4; i++) { 646 int rt = blend->independent_blend_enable ? i : 0; 647 int rt_mask = blend->rt[rt].colormask; 648 649 mask.mask |= translate_colormask(v3d, rt_mask, 650 i) << (4 * i); 651 } 652 } 653 } 654 655 /* GFXH-1431: On V3D 3.x, writing BLEND_CONFIG resets the constant 656 * color. 657 */ 658 if (v3d->dirty & V3D_DIRTY_BLEND_COLOR || 659 (V3D_VERSION < 41 && (v3d->dirty & V3D_DIRTY_BLEND))) { 660 cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) { 661 color.red_f16 = (v3d->swap_color_rb ? 662 v3d->blend_color.hf[2] : 663 v3d->blend_color.hf[0]); 664 color.green_f16 = v3d->blend_color.hf[1]; 665 color.blue_f16 = (v3d->swap_color_rb ? 666 v3d->blend_color.hf[0] : 667 v3d->blend_color.hf[2]); 668 color.alpha_f16 = v3d->blend_color.hf[3]; 669 } 670 } 671 672 if (v3d->dirty & (V3D_DIRTY_ZSA | V3D_DIRTY_STENCIL_REF)) { 673 struct pipe_stencil_state *front = &v3d->zsa->base.stencil[0]; 674 struct pipe_stencil_state *back = &v3d->zsa->base.stencil[1]; 675 676 if (front->enabled) { 677 cl_emit_with_prepacked(&job->bcl, STENCIL_CFG, 678 v3d->zsa->stencil_front, config) { 679 config.stencil_ref_value = 680 v3d->stencil_ref.ref_value[0]; 681 } 682 } 683 684 if (back->enabled) { 685 cl_emit_with_prepacked(&job->bcl, STENCIL_CFG, 686 v3d->zsa->stencil_back, config) { 687 config.stencil_ref_value = 688 v3d->stencil_ref.ref_value[1]; 689 } 690 } 691 } 692 693#if V3D_VERSION < 40 694 /* Pre-4.x, we have texture state that depends on both the sampler and 695 * the view, so we merge them together at draw time. 696 */ 697 if (v3d->dirty & V3D_DIRTY_FRAGTEX) 698 emit_textures(v3d, &v3d->tex[PIPE_SHADER_FRAGMENT]); 699 700 if (v3d->dirty & V3D_DIRTY_GEOMTEX) 701 emit_textures(v3d, &v3d->tex[PIPE_SHADER_GEOMETRY]); 702 703 if (v3d->dirty & V3D_DIRTY_VERTTEX) 704 emit_textures(v3d, &v3d->tex[PIPE_SHADER_VERTEX]); 705#endif 706 707 if (v3d->dirty & V3D_DIRTY_FLAT_SHADE_FLAGS) { 708 if (!emit_varying_flags(job, 709 v3d->prog.fs->prog_data.fs->flat_shade_flags, 710 emit_flat_shade_flags)) { 711 cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); 712 } 713 } 714 715#if V3D_VERSION >= 40 716 if (v3d->dirty & V3D_DIRTY_NOPERSPECTIVE_FLAGS) { 717 if (!emit_varying_flags(job, 718 v3d->prog.fs->prog_data.fs->noperspective_flags, 719 emit_noperspective_flags)) { 720 cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags); 721 } 722 } 723 724 if (v3d->dirty & V3D_DIRTY_CENTROID_FLAGS) { 725 if (!emit_varying_flags(job, 726 v3d->prog.fs->prog_data.fs->centroid_flags, 727 emit_centroid_flags)) { 728 cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); 729 } 730 } 731#endif 732 733 /* Set up the transform feedback data specs (which VPM entries to 734 * output to which buffers). 735 */ 736 if (v3d->dirty & (V3D_DIRTY_STREAMOUT | 737 V3D_DIRTY_RASTERIZER | 738 V3D_DIRTY_PRIM_MODE)) { 739 struct v3d_streamout_stateobj *so = &v3d->streamout; 740 if (so->num_targets) { 741 bool psiz_per_vertex = (v3d->prim_mode == PIPE_PRIM_POINTS && 742 v3d->rasterizer->base.point_size_per_vertex); 743 struct v3d_uncompiled_shader *tf_shader = 744 get_tf_shader(v3d); 745 uint16_t *tf_specs = (psiz_per_vertex ? 746 tf_shader->tf_specs_psiz : 747 tf_shader->tf_specs); 748 749#if V3D_VERSION >= 40 750 bool tf_enabled = v3d_transform_feedback_enabled(v3d); 751 job->tf_enabled |= tf_enabled; 752 753 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { 754 tfe.number_of_16_bit_output_data_specs_following = 755 tf_shader->num_tf_specs; 756 tfe.enable = tf_enabled; 757 }; 758#else /* V3D_VERSION < 40 */ 759 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_ENABLE, tfe) { 760 tfe.number_of_32_bit_output_buffer_address_following = 761 so->num_targets; 762 tfe.number_of_16_bit_output_data_specs_following = 763 tf_shader->num_tf_specs; 764 }; 765#endif /* V3D_VERSION < 40 */ 766 for (int i = 0; i < tf_shader->num_tf_specs; i++) { 767 cl_emit_prepacked(&job->bcl, &tf_specs[i]); 768 } 769 } else { 770#if V3D_VERSION >= 40 771 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_SPECS, tfe) { 772 tfe.enable = false; 773 }; 774#endif /* V3D_VERSION >= 40 */ 775 } 776 } 777 778 /* Set up the transform feedback buffers. */ 779 if (v3d->dirty & V3D_DIRTY_STREAMOUT) { 780 struct v3d_uncompiled_shader *tf_shader = get_tf_shader(v3d); 781 struct v3d_streamout_stateobj *so = &v3d->streamout; 782 for (int i = 0; i < so->num_targets; i++) { 783 const struct pipe_stream_output_target *target = 784 so->targets[i]; 785 struct v3d_resource *rsc = target ? 786 v3d_resource(target->buffer) : NULL; 787 struct pipe_shader_state *ss = &tf_shader->base; 788 struct pipe_stream_output_info *info = &ss->stream_output; 789 uint32_t offset = (v3d->streamout.offsets[i] * 790 info->stride[i] * 4); 791 792#if V3D_VERSION >= 40 793 if (!target) 794 continue; 795 796 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_BUFFER, output) { 797 output.buffer_address = 798 cl_address(rsc->bo, 799 target->buffer_offset + 800 offset); 801 output.buffer_size_in_32_bit_words = 802 (target->buffer_size - offset) >> 2; 803 output.buffer_number = i; 804 } 805#else /* V3D_VERSION < 40 */ 806 cl_emit(&job->bcl, TRANSFORM_FEEDBACK_OUTPUT_ADDRESS, output) { 807 if (target) { 808 output.address = 809 cl_address(rsc->bo, 810 target->buffer_offset + 811 offset); 812 } 813 }; 814#endif /* V3D_VERSION < 40 */ 815 if (target) { 816 v3d_job_add_tf_write_resource(v3d->job, 817 target->buffer); 818 } 819 /* XXX: buffer_size? */ 820 } 821 } 822 823 if (v3d->dirty & V3D_DIRTY_OQ) { 824 cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { 825 if (v3d->active_queries && v3d->current_oq) { 826 counter.address = cl_address(v3d->current_oq, 0); 827 } 828 } 829 } 830 831#if V3D_VERSION >= 40 832 if (v3d->dirty & V3D_DIRTY_SAMPLE_STATE) { 833 cl_emit(&job->bcl, SAMPLE_STATE, state) { 834 /* Note: SampleCoverage was handled at the 835 * frontend level by converting to sample_mask. 836 */ 837 state.coverage = 1.0; 838 state.mask = job->msaa ? v3d->sample_mask : 0xf; 839 } 840 } 841#endif 842} 843