1/* 2 * Copyright (C) 2021 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "genxml/gen_macros.h" 25 26#include "util/macros.h" 27#include "compiler/shader_enums.h" 28 29#include "vk_util.h" 30 31#include "pan_cs.h" 32#include "pan_encoder.h" 33#include "pan_pool.h" 34#include "pan_shader.h" 35#include "pan_earlyzs.h" 36 37#include "panvk_cs.h" 38#include "panvk_private.h" 39#include "panvk_varyings.h" 40 41#include "vk_sampler.h" 42 43static enum mali_mipmap_mode 44panvk_translate_sampler_mipmap_mode(VkSamplerMipmapMode mode) 45{ 46 switch (mode) { 47 case VK_SAMPLER_MIPMAP_MODE_NEAREST: return MALI_MIPMAP_MODE_NEAREST; 48 case VK_SAMPLER_MIPMAP_MODE_LINEAR: return MALI_MIPMAP_MODE_TRILINEAR; 49 default: unreachable("Invalid mipmap mode"); 50 } 51} 52 53static unsigned 54panvk_translate_sampler_address_mode(VkSamplerAddressMode mode) 55{ 56 switch (mode) { 57 case VK_SAMPLER_ADDRESS_MODE_REPEAT: return MALI_WRAP_MODE_REPEAT; 58 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: return MALI_WRAP_MODE_MIRRORED_REPEAT; 59 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: return MALI_WRAP_MODE_CLAMP_TO_EDGE; 60 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: return MALI_WRAP_MODE_CLAMP_TO_BORDER; 61 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: return MALI_WRAP_MODE_MIRRORED_CLAMP_TO_EDGE; 62 default: unreachable("Invalid wrap"); 63 } 64} 65 66static mali_pixel_format 67panvk_varying_hw_format(const struct panvk_device *dev, 68 const struct panvk_varyings_info *varyings, 69 gl_shader_stage stage, unsigned idx) 70{ 71 const struct panfrost_device *pdev = &dev->physical_device->pdev; 72 gl_varying_slot loc = varyings->stage[stage].loc[idx]; 73 74 switch (loc) { 75 case VARYING_SLOT_PNTC: 76 case VARYING_SLOT_PSIZ: 77#if PAN_ARCH <= 6 78 return (MALI_R16F << 12) | panfrost_get_default_swizzle(1); 79#else 80 return (MALI_R16F << 12) | MALI_RGB_COMPONENT_ORDER_R000; 81#endif 82 case VARYING_SLOT_POS: 83#if PAN_ARCH <= 6 84 return (MALI_SNAP_4 << 12) | panfrost_get_default_swizzle(4); 85#else 86 return (MALI_SNAP_4 << 12) | MALI_RGB_COMPONENT_ORDER_RGBA; 87#endif 88 default: 89 if (varyings->varying[loc].format != PIPE_FORMAT_NONE) 90 return pdev->formats[varyings->varying[loc].format].hw; 91#if PAN_ARCH >= 7 92 return (MALI_CONSTANT << 12) | MALI_RGB_COMPONENT_ORDER_0000; 93#else 94 return (MALI_CONSTANT << 12) | PAN_V6_SWIZZLE(0, 0, 0, 0); 95#endif 96 } 97} 98 99static void 100panvk_emit_varying(const struct panvk_device *dev, 101 const struct panvk_varyings_info *varyings, 102 gl_shader_stage stage, unsigned idx, 103 void *attrib) 104{ 105 gl_varying_slot loc = varyings->stage[stage].loc[idx]; 106 107 pan_pack(attrib, ATTRIBUTE, cfg) { 108 cfg.buffer_index = varyings->varying[loc].buf; 109 cfg.offset = varyings->varying[loc].offset; 110 cfg.format = panvk_varying_hw_format(dev, varyings, stage, idx); 111 } 112} 113 114void 115panvk_per_arch(emit_varyings)(const struct panvk_device *dev, 116 const struct panvk_varyings_info *varyings, 117 gl_shader_stage stage, 118 void *descs) 119{ 120 struct mali_attribute_packed *attrib = descs; 121 122 for (unsigned i = 0; i < varyings->stage[stage].count; i++) 123 panvk_emit_varying(dev, varyings, stage, i, attrib++); 124} 125 126static void 127panvk_emit_varying_buf(const struct panvk_varyings_info *varyings, 128 enum panvk_varying_buf_id id, void *buf) 129{ 130 unsigned buf_idx = panvk_varying_buf_index(varyings, id); 131 132 pan_pack(buf, ATTRIBUTE_BUFFER, cfg) { 133 unsigned offset = varyings->buf[buf_idx].address & 63; 134 135 cfg.stride = varyings->buf[buf_idx].stride; 136 cfg.size = varyings->buf[buf_idx].size + offset; 137 cfg.pointer = varyings->buf[buf_idx].address & ~63ULL; 138 } 139} 140 141void 142panvk_per_arch(emit_varying_bufs)(const struct panvk_varyings_info *varyings, 143 void *descs) 144{ 145 struct mali_attribute_buffer_packed *buf = descs; 146 147 for (unsigned i = 0; i < PANVK_VARY_BUF_MAX; i++) { 148 if (varyings->buf_mask & (1 << i)) 149 panvk_emit_varying_buf(varyings, i, buf++); 150 } 151} 152 153static void 154panvk_emit_attrib_buf(const struct panvk_attribs_info *info, 155 const struct panvk_draw_info *draw, 156 const struct panvk_attrib_buf *bufs, 157 unsigned buf_count, 158 unsigned idx, void *desc) 159{ 160 const struct panvk_attrib_buf_info *buf_info = &info->buf[idx]; 161 162 assert(idx < buf_count); 163 const struct panvk_attrib_buf *buf = &bufs[idx]; 164 mali_ptr addr = buf->address & ~63ULL; 165 unsigned size = buf->size + (buf->address & 63); 166 unsigned divisor = 167 draw->padded_vertex_count * buf_info->instance_divisor; 168 169 /* TODO: support instanced arrays */ 170 if (draw->instance_count <= 1) { 171 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { 172 cfg.type = MALI_ATTRIBUTE_TYPE_1D; 173 cfg.stride = buf_info->per_instance ? 0 : buf_info->stride; 174 cfg.pointer = addr; 175 cfg.size = size; 176 } 177 } else if (!buf_info->per_instance) { 178 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { 179 cfg.type = MALI_ATTRIBUTE_TYPE_1D_MODULUS; 180 cfg.divisor = draw->padded_vertex_count; 181 cfg.stride = buf_info->stride; 182 cfg.pointer = addr; 183 cfg.size = size; 184 } 185 } else if (!divisor) { 186 /* instance_divisor == 0 means all instances share the same value. 187 * Make it a 1D array with a zero stride. 188 */ 189 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { 190 cfg.type = MALI_ATTRIBUTE_TYPE_1D; 191 cfg.stride = 0; 192 cfg.pointer = addr; 193 cfg.size = size; 194 } 195 } else if (util_is_power_of_two_or_zero(divisor)) { 196 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { 197 cfg.type = MALI_ATTRIBUTE_TYPE_1D_POT_DIVISOR; 198 cfg.stride = buf_info->stride; 199 cfg.pointer = addr; 200 cfg.size = size; 201 cfg.divisor_r = __builtin_ctz(divisor); 202 } 203 } else { 204 unsigned divisor_r = 0, divisor_e = 0; 205 unsigned divisor_num = 206 panfrost_compute_magic_divisor(divisor, &divisor_r, &divisor_e); 207 pan_pack(desc, ATTRIBUTE_BUFFER, cfg) { 208 cfg.type = MALI_ATTRIBUTE_TYPE_1D_NPOT_DIVISOR; 209 cfg.stride = buf_info->stride; 210 cfg.pointer = addr; 211 cfg.size = size; 212 cfg.divisor_r = divisor_r; 213 cfg.divisor_e = divisor_e; 214 } 215 216 desc += pan_size(ATTRIBUTE_BUFFER); 217 pan_pack(desc, ATTRIBUTE_BUFFER_CONTINUATION_NPOT, cfg) { 218 cfg.divisor_numerator = divisor_num; 219 cfg.divisor = buf_info->instance_divisor; 220 } 221 } 222} 223 224void 225panvk_per_arch(emit_attrib_bufs)(const struct panvk_attribs_info *info, 226 const struct panvk_attrib_buf *bufs, 227 unsigned buf_count, 228 const struct panvk_draw_info *draw, 229 void *descs) 230{ 231 struct mali_attribute_buffer_packed *buf = descs; 232 233 for (unsigned i = 0; i < info->buf_count; i++) { 234 panvk_emit_attrib_buf(info, draw, bufs, buf_count, i, buf); 235 buf += 2; 236 } 237} 238 239void 240panvk_per_arch(emit_sampler)(const VkSamplerCreateInfo *pCreateInfo, 241 void *desc) 242{ 243 VkClearColorValue border_color = 244 vk_sampler_border_color_value(pCreateInfo, NULL); 245 246 pan_pack(desc, SAMPLER, cfg) { 247 cfg.magnify_nearest = pCreateInfo->magFilter == VK_FILTER_NEAREST; 248 cfg.minify_nearest = pCreateInfo->minFilter == VK_FILTER_NEAREST; 249 cfg.mipmap_mode = panvk_translate_sampler_mipmap_mode(pCreateInfo->mipmapMode); 250 cfg.normalized_coordinates = !pCreateInfo->unnormalizedCoordinates; 251 252 cfg.lod_bias = FIXED_16(pCreateInfo->mipLodBias, true); 253 cfg.minimum_lod = FIXED_16(pCreateInfo->minLod, false); 254 cfg.maximum_lod = FIXED_16(pCreateInfo->maxLod, false); 255 cfg.wrap_mode_s = panvk_translate_sampler_address_mode(pCreateInfo->addressModeU); 256 cfg.wrap_mode_t = panvk_translate_sampler_address_mode(pCreateInfo->addressModeV); 257 cfg.wrap_mode_r = panvk_translate_sampler_address_mode(pCreateInfo->addressModeW); 258 cfg.compare_function = panvk_per_arch(translate_sampler_compare_func)(pCreateInfo); 259 cfg.border_color_r = border_color.uint32[0]; 260 cfg.border_color_g = border_color.uint32[1]; 261 cfg.border_color_b = border_color.uint32[2]; 262 cfg.border_color_a = border_color.uint32[3]; 263 } 264} 265 266static void 267panvk_emit_attrib(const struct panvk_device *dev, 268 const struct panvk_draw_info *draw, 269 const struct panvk_attribs_info *attribs, 270 const struct panvk_attrib_buf *bufs, 271 unsigned buf_count, 272 unsigned idx, void *attrib) 273{ 274 const struct panfrost_device *pdev = &dev->physical_device->pdev; 275 unsigned buf_idx = attribs->attrib[idx].buf; 276 const struct panvk_attrib_buf_info *buf_info = &attribs->buf[buf_idx]; 277 278 pan_pack(attrib, ATTRIBUTE, cfg) { 279 cfg.buffer_index = buf_idx * 2; 280 cfg.offset = attribs->attrib[idx].offset + 281 (bufs[buf_idx].address & 63); 282 283 if (buf_info->per_instance) 284 cfg.offset += draw->first_instance * buf_info->stride; 285 286 cfg.format = pdev->formats[attribs->attrib[idx].format].hw; 287 } 288} 289 290void 291panvk_per_arch(emit_attribs)(const struct panvk_device *dev, 292 const struct panvk_draw_info *draw, 293 const struct panvk_attribs_info *attribs, 294 const struct panvk_attrib_buf *bufs, 295 unsigned buf_count, 296 void *descs) 297{ 298 struct mali_attribute_packed *attrib = descs; 299 300 for (unsigned i = 0; i < attribs->attrib_count; i++) 301 panvk_emit_attrib(dev, draw, attribs, bufs, buf_count, i, attrib++); 302} 303 304void 305panvk_per_arch(emit_ubo)(mali_ptr address, size_t size, void *desc) 306{ 307 pan_pack(desc, UNIFORM_BUFFER, cfg) { 308 cfg.pointer = address; 309 cfg.entries = DIV_ROUND_UP(size, 16); 310 } 311} 312 313void 314panvk_per_arch(emit_ubos)(const struct panvk_pipeline *pipeline, 315 const struct panvk_descriptor_state *state, 316 void *descs) 317{ 318 struct mali_uniform_buffer_packed *ubos = descs; 319 320 panvk_per_arch(emit_ubo)(state->sysvals_ptr, 321 sizeof(state->sysvals), 322 &ubos[PANVK_SYSVAL_UBO_INDEX]); 323 324 if (pipeline->layout->push_constants.size) { 325 panvk_per_arch(emit_ubo)(state->push_constants, 326 ALIGN_POT(pipeline->layout->push_constants.size, 16), 327 &ubos[PANVK_PUSH_CONST_UBO_INDEX]); 328 } else { 329 memset(&ubos[PANVK_PUSH_CONST_UBO_INDEX], 0, sizeof(*ubos)); 330 } 331 332 for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) { 333 const struct panvk_descriptor_set_layout *set_layout = 334 vk_to_panvk_descriptor_set_layout(pipeline->layout->vk.set_layouts[s]); 335 const struct panvk_descriptor_set *set = state->sets[s]; 336 337 unsigned ubo_start = 338 panvk_pipeline_layout_ubo_start(pipeline->layout, s, false); 339 340 if (!set) { 341 unsigned all_ubos = set_layout->num_ubos + set_layout->num_dyn_ubos; 342 memset(&ubos[ubo_start], 0, all_ubos * sizeof(*ubos)); 343 } else { 344 memcpy(&ubos[ubo_start], set->ubos, 345 set_layout->num_ubos * sizeof(*ubos)); 346 347 unsigned dyn_ubo_start = 348 panvk_pipeline_layout_ubo_start(pipeline->layout, s, true); 349 350 for (unsigned i = 0; i < set_layout->num_dyn_ubos; i++) { 351 const struct panvk_buffer_desc *bdesc = 352 &state->dyn.ubos[pipeline->layout->sets[s].dyn_ubo_offset + i]; 353 354 mali_ptr address = panvk_buffer_gpu_ptr(bdesc->buffer, 355 bdesc->offset); 356 size_t size = panvk_buffer_range(bdesc->buffer, 357 bdesc->offset, bdesc->size); 358 if (size) { 359 panvk_per_arch(emit_ubo)(address, size, 360 &ubos[dyn_ubo_start + i]); 361 } else { 362 memset(&ubos[dyn_ubo_start + i], 0, sizeof(*ubos)); 363 } 364 } 365 } 366 } 367} 368 369void 370panvk_per_arch(emit_vertex_job)(const struct panvk_pipeline *pipeline, 371 const struct panvk_draw_info *draw, 372 void *job) 373{ 374 void *section = pan_section_ptr(job, COMPUTE_JOB, INVOCATION); 375 376 memcpy(section, &draw->invocation, pan_size(INVOCATION)); 377 378 pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { 379 cfg.job_task_split = 5; 380 } 381 382 pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { 383 cfg.state = pipeline->rsds[MESA_SHADER_VERTEX]; 384 cfg.attributes = draw->stages[MESA_SHADER_VERTEX].attributes; 385 cfg.attribute_buffers = draw->stages[MESA_SHADER_VERTEX].attribute_bufs; 386 cfg.varyings = draw->stages[MESA_SHADER_VERTEX].varyings; 387 cfg.varying_buffers = draw->varying_bufs; 388 cfg.thread_storage = draw->tls; 389 cfg.offset_start = draw->offset_start; 390 cfg.instance_size = draw->instance_count > 1 ? 391 draw->padded_vertex_count : 1; 392 cfg.uniform_buffers = draw->ubos; 393 cfg.push_uniforms = draw->stages[PIPE_SHADER_VERTEX].push_constants; 394 cfg.textures = draw->textures; 395 cfg.samplers = draw->samplers; 396 } 397} 398 399void 400panvk_per_arch(emit_compute_job)(const struct panvk_pipeline *pipeline, 401 const struct panvk_dispatch_info *dispatch, 402 void *job) 403{ 404 panfrost_pack_work_groups_compute(pan_section_ptr(job, COMPUTE_JOB, INVOCATION), 405 dispatch->wg_count.x, 406 dispatch->wg_count.y, 407 dispatch->wg_count.z, 408 pipeline->cs.local_size.x, 409 pipeline->cs.local_size.y, 410 pipeline->cs.local_size.z, 411 false, false); 412 413 pan_section_pack(job, COMPUTE_JOB, PARAMETERS, cfg) { 414 cfg.job_task_split = 415 util_logbase2_ceil(pipeline->cs.local_size.x + 1) + 416 util_logbase2_ceil(pipeline->cs.local_size.y + 1) + 417 util_logbase2_ceil(pipeline->cs.local_size.z + 1); 418 } 419 420 pan_section_pack(job, COMPUTE_JOB, DRAW, cfg) { 421 cfg.state = pipeline->rsds[MESA_SHADER_COMPUTE]; 422 cfg.attributes = dispatch->attributes; 423 cfg.attribute_buffers = dispatch->attribute_bufs; 424 cfg.thread_storage = dispatch->tsd; 425 cfg.uniform_buffers = dispatch->ubos; 426 cfg.push_uniforms = dispatch->push_uniforms; 427 cfg.textures = dispatch->textures; 428 cfg.samplers = dispatch->samplers; 429 } 430} 431 432static void 433panvk_emit_tiler_primitive(const struct panvk_pipeline *pipeline, 434 const struct panvk_draw_info *draw, 435 void *prim) 436{ 437 pan_pack(prim, PRIMITIVE, cfg) { 438 cfg.draw_mode = pipeline->ia.topology; 439 if (pipeline->ia.writes_point_size) 440 cfg.point_size_array_format = MALI_POINT_SIZE_ARRAY_FORMAT_FP16; 441 442 cfg.first_provoking_vertex = true; 443 if (pipeline->ia.primitive_restart) 444 cfg.primitive_restart = MALI_PRIMITIVE_RESTART_IMPLICIT; 445 cfg.job_task_split = 6; 446 447 if (draw->index_size) { 448 cfg.index_count = draw->index_count; 449 cfg.indices = draw->indices; 450 cfg.base_vertex_offset = draw->vertex_offset - draw->offset_start; 451 452 switch (draw->index_size) { 453 case 32: cfg.index_type = MALI_INDEX_TYPE_UINT32; break; 454 case 16: cfg.index_type = MALI_INDEX_TYPE_UINT16; break; 455 case 8: cfg.index_type = MALI_INDEX_TYPE_UINT8; break; 456 default: unreachable("Invalid index size"); 457 } 458 } else { 459 cfg.index_count = draw->vertex_count; 460 cfg.index_type = MALI_INDEX_TYPE_NONE; 461 } 462 } 463} 464 465static void 466panvk_emit_tiler_primitive_size(const struct panvk_pipeline *pipeline, 467 const struct panvk_draw_info *draw, 468 void *primsz) 469{ 470 pan_pack(primsz, PRIMITIVE_SIZE, cfg) { 471 if (pipeline->ia.writes_point_size) { 472 cfg.size_array = draw->psiz; 473 } else { 474 cfg.constant = draw->line_width; 475 } 476 } 477} 478 479static void 480panvk_emit_tiler_dcd(const struct panvk_pipeline *pipeline, 481 const struct panvk_draw_info *draw, 482 void *dcd) 483{ 484 pan_pack(dcd, DRAW, cfg) { 485 cfg.front_face_ccw = pipeline->rast.front_ccw; 486 cfg.cull_front_face = pipeline->rast.cull_front_face; 487 cfg.cull_back_face = pipeline->rast.cull_back_face; 488 cfg.position = draw->position; 489 cfg.state = draw->fs_rsd; 490 cfg.attributes = draw->stages[MESA_SHADER_FRAGMENT].attributes; 491 cfg.attribute_buffers = draw->stages[MESA_SHADER_FRAGMENT].attribute_bufs; 492 cfg.viewport = draw->viewport; 493 cfg.varyings = draw->stages[MESA_SHADER_FRAGMENT].varyings; 494 cfg.varying_buffers = cfg.varyings ? draw->varying_bufs : 0; 495 cfg.thread_storage = draw->tls; 496 497 /* For all primitives but lines DRAW.flat_shading_vertex must 498 * be set to 0 and the provoking vertex is selected with the 499 * PRIMITIVE.first_provoking_vertex field. 500 */ 501 if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || 502 pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || 503 pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { 504 cfg.flat_shading_vertex = true; 505 } 506 507 cfg.offset_start = draw->offset_start; 508 cfg.instance_size = draw->instance_count > 1 ? 509 draw->padded_vertex_count : 1; 510 cfg.uniform_buffers = draw->ubos; 511 cfg.push_uniforms = draw->stages[PIPE_SHADER_FRAGMENT].push_constants; 512 cfg.textures = draw->textures; 513 cfg.samplers = draw->samplers; 514 515 /* TODO: occlusion queries */ 516 } 517} 518 519void 520panvk_per_arch(emit_tiler_job)(const struct panvk_pipeline *pipeline, 521 const struct panvk_draw_info *draw, 522 void *job) 523{ 524 void *section; 525 526 section = pan_section_ptr(job, TILER_JOB, INVOCATION); 527 memcpy(section, &draw->invocation, pan_size(INVOCATION)); 528 529 section = pan_section_ptr(job, TILER_JOB, PRIMITIVE); 530 panvk_emit_tiler_primitive(pipeline, draw, section); 531 532 section = pan_section_ptr(job, TILER_JOB, PRIMITIVE_SIZE); 533 panvk_emit_tiler_primitive_size(pipeline, draw, section); 534 535 section = pan_section_ptr(job, TILER_JOB, DRAW); 536 panvk_emit_tiler_dcd(pipeline, draw, section); 537 538 pan_section_pack(job, TILER_JOB, TILER, cfg) { 539 cfg.address = draw->tiler_ctx->bifrost; 540 } 541 pan_section_pack(job, TILER_JOB, PADDING, padding); 542} 543 544void 545panvk_per_arch(emit_viewport)(const VkViewport *viewport, 546 const VkRect2D *scissor, 547 void *vpd) 548{ 549 /* The spec says "width must be greater than 0.0" */ 550 assert(viewport->x >= 0); 551 int minx = (int)viewport->x; 552 int maxx = (int)(viewport->x + viewport->width); 553 554 /* Viewport height can be negative */ 555 int miny = MIN2((int)viewport->y, (int)(viewport->y + viewport->height)); 556 int maxy = MAX2((int)viewport->y, (int)(viewport->y + viewport->height)); 557 558 assert(scissor->offset.x >= 0 && scissor->offset.y >= 0); 559 miny = MAX2(scissor->offset.x, minx); 560 miny = MAX2(scissor->offset.y, miny); 561 maxx = MIN2(scissor->offset.x + scissor->extent.width, maxx); 562 maxy = MIN2(scissor->offset.y + scissor->extent.height, maxy); 563 564 /* Make sure we don't end up with a max < min when width/height is 0 */ 565 maxx = maxx > minx ? maxx - 1 : maxx; 566 maxy = maxy > miny ? maxy - 1 : maxy; 567 568 assert(viewport->minDepth >= 0.0f && viewport->minDepth <= 1.0f); 569 assert(viewport->maxDepth >= 0.0f && viewport->maxDepth <= 1.0f); 570 571 pan_pack(vpd, VIEWPORT, cfg) { 572 cfg.scissor_minimum_x = minx; 573 cfg.scissor_minimum_y = miny; 574 cfg.scissor_maximum_x = maxx; 575 cfg.scissor_maximum_y = maxy; 576 cfg.minimum_z = MIN2(viewport->minDepth, viewport->maxDepth); 577 cfg.maximum_z = MAX2(viewport->minDepth, viewport->maxDepth); 578 } 579} 580 581static enum mali_register_file_format 582bifrost_blend_type_from_nir(nir_alu_type nir_type) 583{ 584 switch(nir_type) { 585 case 0: /* Render target not in use */ 586 return 0; 587 case nir_type_float16: 588 return MALI_REGISTER_FILE_FORMAT_F16; 589 case nir_type_float32: 590 return MALI_REGISTER_FILE_FORMAT_F32; 591 case nir_type_int32: 592 return MALI_REGISTER_FILE_FORMAT_I32; 593 case nir_type_uint32: 594 return MALI_REGISTER_FILE_FORMAT_U32; 595 case nir_type_int16: 596 return MALI_REGISTER_FILE_FORMAT_I16; 597 case nir_type_uint16: 598 return MALI_REGISTER_FILE_FORMAT_U16; 599 default: 600 unreachable("Unsupported blend shader type for NIR alu type"); 601 } 602} 603 604void 605panvk_per_arch(emit_blend)(const struct panvk_device *dev, 606 const struct panvk_pipeline *pipeline, 607 unsigned rt, void *bd) 608{ 609 const struct pan_blend_state *blend = &pipeline->blend.state; 610 const struct pan_blend_rt_state *rts = &blend->rts[rt]; 611 bool dithered = false; 612 613 pan_pack(bd, BLEND, cfg) { 614 if (!blend->rt_count || !rts->equation.color_mask) { 615 cfg.enable = false; 616 cfg.internal.mode = MALI_BLEND_MODE_OFF; 617 continue; 618 } 619 620 cfg.srgb = util_format_is_srgb(rts->format); 621 cfg.load_destination = pan_blend_reads_dest(blend->rts[rt].equation); 622 cfg.round_to_fb_precision = !dithered; 623 624 const struct panfrost_device *pdev = &dev->physical_device->pdev; 625 const struct util_format_description *format_desc = 626 util_format_description(rts->format); 627 unsigned chan_size = 0; 628 for (unsigned i = 0; i < format_desc->nr_channels; i++) 629 chan_size = MAX2(format_desc->channel[i].size, chan_size); 630 631 pan_blend_to_fixed_function_equation(blend->rts[rt].equation, 632 &cfg.equation); 633 634 /* Fixed point constant */ 635 float fconst = 636 pan_blend_get_constant(pan_blend_constant_mask(blend->rts[rt].equation), 637 blend->constants); 638 u16 constant = fconst * ((1 << chan_size) - 1); 639 constant <<= 16 - chan_size; 640 cfg.constant = constant; 641 642 if (pan_blend_is_opaque(blend->rts[rt].equation)) { 643 cfg.internal.mode = MALI_BLEND_MODE_OPAQUE; 644 } else { 645 cfg.internal.mode = MALI_BLEND_MODE_FIXED_FUNCTION; 646 647 cfg.internal.fixed_function.alpha_zero_nop = 648 pan_blend_alpha_zero_nop(blend->rts[rt].equation); 649 cfg.internal.fixed_function.alpha_one_store = 650 pan_blend_alpha_one_store(blend->rts[rt].equation); 651 } 652 653 /* If we want the conversion to work properly, 654 * num_comps must be set to 4 655 */ 656 cfg.internal.fixed_function.num_comps = 4; 657 cfg.internal.fixed_function.conversion.memory_format = 658 panfrost_format_to_bifrost_blend(pdev, rts->format, dithered); 659 cfg.internal.fixed_function.conversion.register_format = 660 bifrost_blend_type_from_nir(pipeline->fs.info.bifrost.blend[rt].type); 661 cfg.internal.fixed_function.rt = rt; 662 } 663} 664 665void 666panvk_per_arch(emit_blend_constant)(const struct panvk_device *dev, 667 const struct panvk_pipeline *pipeline, 668 unsigned rt, const float *constants, 669 void *bd) 670{ 671 float constant = constants[pipeline->blend.constant[rt].index]; 672 673 pan_pack(bd, BLEND, cfg) { 674 cfg.enable = false; 675 cfg.constant = constant * pipeline->blend.constant[rt].bifrost_factor; 676 } 677} 678 679void 680panvk_per_arch(emit_dyn_fs_rsd)(const struct panvk_pipeline *pipeline, 681 const struct panvk_cmd_state *state, 682 void *rsd) 683{ 684 pan_pack(rsd, RENDERER_STATE, cfg) { 685 if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { 686 cfg.depth_units = state->rast.depth_bias.constant_factor * 2.0f; 687 cfg.depth_factor = state->rast.depth_bias.slope_factor; 688 cfg.depth_bias_clamp = state->rast.depth_bias.clamp; 689 } 690 691 if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { 692 cfg.stencil_front.mask = state->zs.s_front.compare_mask; 693 cfg.stencil_back.mask = state->zs.s_back.compare_mask; 694 } 695 696 if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { 697 cfg.stencil_mask_misc.stencil_mask_front = state->zs.s_front.write_mask; 698 cfg.stencil_mask_misc.stencil_mask_back = state->zs.s_back.write_mask; 699 } 700 701 if (pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { 702 cfg.stencil_front.reference_value = state->zs.s_front.ref; 703 cfg.stencil_back.reference_value = state->zs.s_back.ref; 704 } 705 } 706} 707 708void 709panvk_per_arch(emit_base_fs_rsd)(const struct panvk_device *dev, 710 const struct panvk_pipeline *pipeline, 711 void *rsd) 712{ 713 const struct pan_shader_info *info = &pipeline->fs.info; 714 715 pan_pack(rsd, RENDERER_STATE, cfg) { 716 if (pipeline->fs.required) { 717 pan_shader_prepare_rsd(info, pipeline->fs.address, &cfg); 718 719 uint8_t rt_written = pipeline->fs.info.outputs_written >> FRAG_RESULT_DATA0; 720 uint8_t rt_mask = pipeline->fs.rt_mask; 721 cfg.properties.allow_forward_pixel_to_kill = 722 pipeline->fs.info.fs.can_fpk && 723 !(rt_mask & ~rt_written) && 724 !pipeline->ms.alpha_to_coverage && 725 !pipeline->blend.reads_dest; 726 727 bool writes_zs = pipeline->zs.z_write || pipeline->zs.s_test; 728 bool zs_always_passes = !pipeline->zs.z_test && !pipeline->zs.s_test; 729 bool oq = false; /* TODO: Occlusion queries */ 730 731 struct pan_earlyzs_state earlyzs = 732 pan_earlyzs_get(pan_earlyzs_analyze(info), writes_zs || oq, 733 pipeline->ms.alpha_to_coverage, zs_always_passes); 734 735 cfg.properties.pixel_kill_operation = earlyzs.kill; 736 cfg.properties.zs_update_operation = earlyzs.update; 737 } else { 738 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; 739 cfg.properties.allow_forward_pixel_to_kill = true; 740 cfg.properties.allow_forward_pixel_to_be_killed = true; 741 cfg.properties.zs_update_operation = MALI_PIXEL_KILL_STRONG_EARLY; 742 } 743 744 bool msaa = pipeline->ms.rast_samples > 1; 745 cfg.multisample_misc.multisample_enable = msaa; 746 cfg.multisample_misc.sample_mask = 747 msaa ? pipeline->ms.sample_mask : UINT16_MAX; 748 749 cfg.multisample_misc.depth_function = 750 pipeline->zs.z_test ? pipeline->zs.z_compare_func : MALI_FUNC_ALWAYS; 751 752 cfg.multisample_misc.depth_write_mask = pipeline->zs.z_write; 753 cfg.multisample_misc.fixed_function_near_discard = !pipeline->rast.clamp_depth; 754 cfg.multisample_misc.fixed_function_far_discard = !pipeline->rast.clamp_depth; 755 cfg.multisample_misc.shader_depth_range_fixed = true; 756 757 cfg.stencil_mask_misc.stencil_enable = pipeline->zs.s_test; 758 cfg.stencil_mask_misc.alpha_to_coverage = pipeline->ms.alpha_to_coverage; 759 cfg.stencil_mask_misc.alpha_test_compare_function = MALI_FUNC_ALWAYS; 760 cfg.stencil_mask_misc.front_facing_depth_bias = pipeline->rast.depth_bias.enable; 761 cfg.stencil_mask_misc.back_facing_depth_bias = pipeline->rast.depth_bias.enable; 762 cfg.stencil_mask_misc.single_sampled_lines = pipeline->ms.rast_samples <= 1; 763 764 if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS))) { 765 cfg.depth_units = pipeline->rast.depth_bias.constant_factor * 2.0f; 766 cfg.depth_factor = pipeline->rast.depth_bias.slope_factor; 767 cfg.depth_bias_clamp = pipeline->rast.depth_bias.clamp; 768 } 769 770 if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK))) { 771 cfg.stencil_front.mask = pipeline->zs.s_front.compare_mask; 772 cfg.stencil_back.mask = pipeline->zs.s_back.compare_mask; 773 } 774 775 if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK))) { 776 cfg.stencil_mask_misc.stencil_mask_front = pipeline->zs.s_front.write_mask; 777 cfg.stencil_mask_misc.stencil_mask_back = pipeline->zs.s_back.write_mask; 778 } 779 780 if (!(pipeline->dynamic_state_mask & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))) { 781 cfg.stencil_front.reference_value = pipeline->zs.s_front.ref; 782 cfg.stencil_back.reference_value = pipeline->zs.s_back.ref; 783 } 784 785 cfg.stencil_front.compare_function = pipeline->zs.s_front.compare_func; 786 cfg.stencil_front.stencil_fail = pipeline->zs.s_front.fail_op; 787 cfg.stencil_front.depth_fail = pipeline->zs.s_front.z_fail_op; 788 cfg.stencil_front.depth_pass = pipeline->zs.s_front.pass_op; 789 cfg.stencil_back.compare_function = pipeline->zs.s_back.compare_func; 790 cfg.stencil_back.stencil_fail = pipeline->zs.s_back.fail_op; 791 cfg.stencil_back.depth_fail = pipeline->zs.s_back.z_fail_op; 792 cfg.stencil_back.depth_pass = pipeline->zs.s_back.pass_op; 793 } 794} 795 796void 797panvk_per_arch(emit_non_fs_rsd)(const struct panvk_device *dev, 798 const struct pan_shader_info *shader_info, 799 mali_ptr shader_ptr, 800 void *rsd) 801{ 802 assert(shader_info->stage != MESA_SHADER_FRAGMENT); 803 804 pan_pack(rsd, RENDERER_STATE, cfg) { 805 pan_shader_prepare_rsd(shader_info, shader_ptr, &cfg); 806 } 807} 808 809void 810panvk_per_arch(emit_tiler_context)(const struct panvk_device *dev, 811 unsigned width, unsigned height, 812 const struct panfrost_ptr *descs) 813{ 814 const struct panfrost_device *pdev = &dev->physical_device->pdev; 815 816 pan_pack(descs->cpu + pan_size(TILER_CONTEXT), TILER_HEAP, cfg) { 817 cfg.size = pdev->tiler_heap->size; 818 cfg.base = pdev->tiler_heap->ptr.gpu; 819 cfg.bottom = pdev->tiler_heap->ptr.gpu; 820 cfg.top = pdev->tiler_heap->ptr.gpu + pdev->tiler_heap->size; 821 } 822 823 pan_pack(descs->cpu, TILER_CONTEXT, cfg) { 824 cfg.hierarchy_mask = 0x28; 825 cfg.fb_width = width; 826 cfg.fb_height = height; 827 cfg.heap = descs->gpu + pan_size(TILER_CONTEXT); 828 } 829} 830