1/* 2 * Copyright (c) 2017 Lima Project 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25#include "util/ralloc.h" 26#include "util/half_float.h" 27#include "util/bitscan.h" 28 29#include "ppir.h" 30#include "codegen.h" 31#include "lima_context.h" 32 33static unsigned encode_swizzle(uint8_t *swizzle, int shift, int dest_shift) 34{ 35 unsigned ret = 0; 36 for (int i = 0; i < 4; i++) 37 ret |= ((swizzle[i] + shift) & 0x3) << ((i + dest_shift) * 2); 38 return ret; 39} 40 41static int get_scl_reg_index(ppir_src *src, int component) 42{ 43 int ret = ppir_target_get_src_reg_index(src); 44 ret += src->swizzle[component]; 45 return ret; 46} 47 48static void ppir_codegen_encode_varying(ppir_node *node, void *code) 49{ 50 ppir_codegen_field_varying *f = code; 51 ppir_load_node *load = ppir_node_to_load(node); 52 ppir_dest *dest = &load->dest; 53 int index = ppir_target_get_dest_reg_index(dest); 54 int num_components = load->num_components; 55 56 if (node->op != ppir_op_load_coords_reg) { 57 assert(node->op == ppir_op_load_varying || 58 node->op == ppir_op_load_coords || 59 node->op == ppir_op_load_fragcoord || 60 node->op == ppir_op_load_pointcoord || 61 node->op == ppir_op_load_frontface); 62 63 f->imm.dest = index >> 2; 64 f->imm.mask = dest->write_mask << (index & 0x3); 65 66 int alignment = num_components == 3 ? 3 : num_components - 1; 67 f->imm.alignment = alignment; 68 69 if (load->num_src) { 70 index = ppir_target_get_src_reg_index(&load->src); 71 f->imm.offset_vector = index >> 2; 72 f->imm.offset_scalar = index & 0x3; 73 } else 74 f->imm.offset_vector = 0xf; 75 76 if (alignment == 3) 77 f->imm.index = load->index >> 2; 78 else 79 f->imm.index = load->index >> alignment; 80 81 switch (node->op) { 82 case ppir_op_load_fragcoord: 83 f->imm.source_type = 2; 84 f->imm.perspective = 3; 85 break; 86 case ppir_op_load_pointcoord: 87 f->imm.source_type = 3; 88 break; 89 case ppir_op_load_frontface: 90 f->imm.source_type = 3; 91 f->imm.perspective = 1; 92 break; 93 case ppir_op_load_coords: 94 if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE) 95 f->imm.source_type = 2; 96 97 switch (load->perspective) { 98 case ppir_perspective_none: 99 f->imm.perspective = 0; 100 break; 101 case ppir_perspective_z: 102 f->imm.perspective = 2; 103 break; 104 case ppir_perspective_w: 105 f->imm.perspective = 3; 106 break; 107 } 108 break; 109 default: 110 break; 111 } 112 } 113 else { /* node->op == ppir_op_load_coords_reg */ 114 f->reg.dest = index >> 2; 115 f->reg.mask = dest->write_mask << (index & 0x3); 116 117 if (load->num_src) { 118 if (load->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { 119 f->reg.source_type = 2; 120 f->reg.perspective = 1; 121 } else { 122 f->reg.source_type = 1; 123 switch (load->perspective) { 124 case ppir_perspective_none: 125 f->reg.perspective = 0; 126 break; 127 case ppir_perspective_z: 128 f->reg.perspective = 2; 129 break; 130 case ppir_perspective_w: 131 f->reg.perspective = 3; 132 break; 133 } 134 } 135 ppir_src *src = &load->src; 136 index = ppir_target_get_src_reg_index(src); 137 f->reg.source = index >> 2; 138 f->reg.negate = src->negate; 139 f->reg.absolute = src->absolute; 140 f->reg.swizzle = encode_swizzle(src->swizzle, index & 0x3, 0); 141 } 142 } 143} 144 145static void ppir_codegen_encode_texld(ppir_node *node, void *code) 146{ 147 ppir_codegen_field_sampler *f = code; 148 ppir_load_texture_node *ldtex = ppir_node_to_load_texture(node); 149 150 f->index = ldtex->sampler; 151 152 f->lod_bias_en = ldtex->lod_bias_en; 153 f->explicit_lod = ldtex->explicit_lod; 154 if (ldtex->lod_bias_en) 155 f->lod_bias = ppir_target_get_src_reg_index(&ldtex->src[1]); 156 157 switch (ldtex->sampler_dim) { 158 case GLSL_SAMPLER_DIM_2D: 159 case GLSL_SAMPLER_DIM_3D: 160 case GLSL_SAMPLER_DIM_RECT: 161 case GLSL_SAMPLER_DIM_EXTERNAL: 162 f->type = ppir_codegen_sampler_type_generic; 163 break; 164 case GLSL_SAMPLER_DIM_CUBE: 165 f->type = ppir_codegen_sampler_type_cube; 166 break; 167 default: 168 break; 169 } 170 171 f->offset_en = 0; 172 f->unknown_2 = 0x39001; 173} 174 175static void ppir_codegen_encode_uniform(ppir_node *node, void *code) 176{ 177 ppir_codegen_field_uniform *f = code; 178 ppir_load_node *load = ppir_node_to_load(node); 179 180 switch (node->op) { 181 case ppir_op_load_uniform: 182 f->source = ppir_codegen_uniform_src_uniform; 183 break; 184 case ppir_op_load_temp: 185 f->source = ppir_codegen_uniform_src_temporary; 186 break; 187 default: 188 assert(0); 189 } 190 191 /* Uniforms are always aligned to vec4 boundary */ 192 f->alignment = 2; 193 f->index = load->index; 194 195 if (load->num_src) { 196 f->offset_en = 1; 197 f->offset_reg = ppir_target_get_src_reg_index(&load->src); 198 } 199} 200 201static ppir_codegen_outmod ppir_codegen_get_outmod(ppir_outmod outmod) 202{ 203 switch (outmod) { 204 case ppir_outmod_none: 205 return ppir_codegen_outmod_none; 206 case ppir_outmod_clamp_fraction: 207 return ppir_codegen_outmod_clamp_fraction; 208 case ppir_outmod_clamp_positive: 209 return ppir_codegen_outmod_clamp_positive; 210 case ppir_outmod_round: 211 return ppir_codegen_outmod_round; 212 default: 213 unreachable("invalid ppir_outmod"); 214 } 215} 216 217static unsigned shift_to_op(int shift) 218{ 219 assert(shift >= -3 && shift <= 3); 220 return shift < 0 ? shift + 8 : shift; 221} 222 223static void ppir_codegen_encode_vec_mul(ppir_node *node, void *code) 224{ 225 ppir_codegen_field_vec4_mul *f = code; 226 ppir_alu_node *alu = ppir_node_to_alu(node); 227 228 ppir_dest *dest = &alu->dest; 229 int dest_shift = 0; 230 if (dest->type != ppir_target_pipeline) { 231 int index = ppir_target_get_dest_reg_index(dest); 232 dest_shift = index & 0x3; 233 f->dest = index >> 2; 234 f->mask = dest->write_mask << dest_shift; 235 } 236 f->dest_modifier = ppir_codegen_get_outmod(dest->modifier); 237 238 switch (node->op) { 239 case ppir_op_mul: 240 f->op = shift_to_op(alu->shift); 241 break; 242 case ppir_op_mov: 243 f->op = ppir_codegen_vec4_mul_op_mov; 244 break; 245 case ppir_op_max: 246 f->op = ppir_codegen_vec4_mul_op_max; 247 break; 248 case ppir_op_min: 249 f->op = ppir_codegen_vec4_mul_op_min; 250 break; 251 case ppir_op_and: 252 f->op = ppir_codegen_vec4_mul_op_and; 253 break; 254 case ppir_op_or: 255 f->op = ppir_codegen_vec4_mul_op_or; 256 break; 257 case ppir_op_xor: 258 f->op = ppir_codegen_vec4_mul_op_xor; 259 break; 260 case ppir_op_gt: 261 f->op = ppir_codegen_vec4_mul_op_gt; 262 break; 263 case ppir_op_ge: 264 f->op = ppir_codegen_vec4_mul_op_ge; 265 break; 266 case ppir_op_eq: 267 f->op = ppir_codegen_vec4_mul_op_eq; 268 break; 269 case ppir_op_ne: 270 f->op = ppir_codegen_vec4_mul_op_ne; 271 break; 272 case ppir_op_not: 273 f->op = ppir_codegen_vec4_mul_op_not; 274 break; 275 default: 276 break; 277 } 278 279 ppir_src *src = alu->src; 280 int index = ppir_target_get_src_reg_index(src); 281 f->arg0_source = index >> 2; 282 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 283 f->arg0_absolute = src->absolute; 284 f->arg0_negate = src->negate; 285 286 if (alu->num_src == 2) { 287 src = alu->src + 1; 288 index = ppir_target_get_src_reg_index(src); 289 f->arg1_source = index >> 2; 290 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 291 f->arg1_absolute = src->absolute; 292 f->arg1_negate = src->negate; 293 } 294} 295 296static void ppir_codegen_encode_scl_mul(ppir_node *node, void *code) 297{ 298 ppir_codegen_field_float_mul *f = code; 299 ppir_alu_node *alu = ppir_node_to_alu(node); 300 301 ppir_dest *dest = &alu->dest; 302 int dest_component = ffs(dest->write_mask) - 1; 303 assert(dest_component >= 0); 304 305 if (dest->type != ppir_target_pipeline) { 306 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; 307 f->output_en = true; 308 } 309 f->dest_modifier = ppir_codegen_get_outmod(dest->modifier); 310 311 switch (node->op) { 312 case ppir_op_mul: 313 f->op = shift_to_op(alu->shift); 314 break; 315 case ppir_op_mov: 316 f->op = ppir_codegen_float_mul_op_mov; 317 break; 318 case ppir_op_max: 319 f->op = ppir_codegen_float_mul_op_max; 320 break; 321 case ppir_op_min: 322 f->op = ppir_codegen_float_mul_op_min; 323 break; 324 case ppir_op_and: 325 f->op = ppir_codegen_float_mul_op_and; 326 break; 327 case ppir_op_or: 328 f->op = ppir_codegen_float_mul_op_or; 329 break; 330 case ppir_op_xor: 331 f->op = ppir_codegen_float_mul_op_xor; 332 break; 333 case ppir_op_gt: 334 f->op = ppir_codegen_float_mul_op_gt; 335 break; 336 case ppir_op_ge: 337 f->op = ppir_codegen_float_mul_op_ge; 338 break; 339 case ppir_op_eq: 340 f->op = ppir_codegen_float_mul_op_eq; 341 break; 342 case ppir_op_ne: 343 f->op = ppir_codegen_float_mul_op_ne; 344 break; 345 case ppir_op_not: 346 f->op = ppir_codegen_float_mul_op_not; 347 break; 348 default: 349 break; 350 } 351 352 ppir_src *src = alu->src; 353 f->arg0_source = get_scl_reg_index(src, dest_component); 354 f->arg0_absolute = src->absolute; 355 f->arg0_negate = src->negate; 356 357 if (alu->num_src == 2) { 358 src = alu->src + 1; 359 f->arg1_source = get_scl_reg_index(src, dest_component); 360 f->arg1_absolute = src->absolute; 361 f->arg1_negate = src->negate; 362 } 363} 364 365static void ppir_codegen_encode_vec_add(ppir_node *node, void *code) 366{ 367 ppir_codegen_field_vec4_acc *f = code; 368 ppir_alu_node *alu = ppir_node_to_alu(node); 369 370 ppir_dest *dest = &alu->dest; 371 int index = ppir_target_get_dest_reg_index(dest); 372 int dest_shift = index & 0x3; 373 f->dest = index >> 2; 374 f->mask = dest->write_mask << dest_shift; 375 f->dest_modifier = ppir_codegen_get_outmod(dest->modifier); 376 377 switch (node->op) { 378 case ppir_op_add: 379 f->op = ppir_codegen_vec4_acc_op_add; 380 break; 381 case ppir_op_mov: 382 f->op = ppir_codegen_vec4_acc_op_mov; 383 break; 384 case ppir_op_sum3: 385 f->op = ppir_codegen_vec4_acc_op_sum3; 386 dest_shift = 0; 387 break; 388 case ppir_op_sum4: 389 f->op = ppir_codegen_vec4_acc_op_sum4; 390 dest_shift = 0; 391 break; 392 case ppir_op_floor: 393 f->op = ppir_codegen_vec4_acc_op_floor; 394 break; 395 case ppir_op_ceil: 396 f->op = ppir_codegen_vec4_acc_op_ceil; 397 break; 398 case ppir_op_fract: 399 f->op = ppir_codegen_vec4_acc_op_fract; 400 break; 401 case ppir_op_gt: 402 f->op = ppir_codegen_vec4_acc_op_gt; 403 break; 404 case ppir_op_ge: 405 f->op = ppir_codegen_vec4_acc_op_ge; 406 break; 407 case ppir_op_eq: 408 f->op = ppir_codegen_vec4_acc_op_eq; 409 break; 410 case ppir_op_ne: 411 f->op = ppir_codegen_vec4_acc_op_ne; 412 break; 413 case ppir_op_select: 414 f->op = ppir_codegen_vec4_acc_op_sel; 415 break; 416 case ppir_op_max: 417 f->op = ppir_codegen_vec4_acc_op_max; 418 break; 419 case ppir_op_min: 420 f->op = ppir_codegen_vec4_acc_op_min; 421 break; 422 case ppir_op_ddx: 423 f->op = ppir_codegen_vec4_acc_op_dFdx; 424 break; 425 case ppir_op_ddy: 426 f->op = ppir_codegen_vec4_acc_op_dFdy; 427 break; 428 default: 429 break; 430 } 431 432 ppir_src *src = node->op == ppir_op_select ? alu->src + 1 : alu->src; 433 index = ppir_target_get_src_reg_index(src); 434 435 if (src->type == ppir_target_pipeline && 436 src->pipeline == ppir_pipeline_reg_vmul) 437 f->mul_in = true; 438 else 439 f->arg0_source = index >> 2; 440 441 f->arg0_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 442 f->arg0_absolute = src->absolute; 443 f->arg0_negate = src->negate; 444 445 if (++src < alu->src + alu->num_src) { 446 index = ppir_target_get_src_reg_index(src); 447 f->arg1_source = index >> 2; 448 f->arg1_swizzle = encode_swizzle(src->swizzle, index & 0x3, dest_shift); 449 f->arg1_absolute = src->absolute; 450 f->arg1_negate = src->negate; 451 } 452} 453 454static void ppir_codegen_encode_scl_add(ppir_node *node, void *code) 455{ 456 ppir_codegen_field_float_acc *f = code; 457 ppir_alu_node *alu = ppir_node_to_alu(node); 458 459 ppir_dest *dest = &alu->dest; 460 int dest_component = ffs(dest->write_mask) - 1; 461 assert(dest_component >= 0); 462 463 f->dest = ppir_target_get_dest_reg_index(dest) + dest_component; 464 f->output_en = true; 465 f->dest_modifier = ppir_codegen_get_outmod(dest->modifier); 466 467 switch (node->op) { 468 case ppir_op_add: 469 f->op = shift_to_op(alu->shift); 470 break; 471 case ppir_op_mov: 472 f->op = ppir_codegen_float_acc_op_mov; 473 break; 474 case ppir_op_max: 475 f->op = ppir_codegen_float_acc_op_max; 476 break; 477 case ppir_op_min: 478 f->op = ppir_codegen_float_acc_op_min; 479 break; 480 case ppir_op_floor: 481 f->op = ppir_codegen_float_acc_op_floor; 482 break; 483 case ppir_op_ceil: 484 f->op = ppir_codegen_float_acc_op_ceil; 485 break; 486 case ppir_op_fract: 487 f->op = ppir_codegen_float_acc_op_fract; 488 break; 489 case ppir_op_gt: 490 f->op = ppir_codegen_float_acc_op_gt; 491 break; 492 case ppir_op_ge: 493 f->op = ppir_codegen_float_acc_op_ge; 494 break; 495 case ppir_op_eq: 496 f->op = ppir_codegen_float_acc_op_eq; 497 break; 498 case ppir_op_ne: 499 f->op = ppir_codegen_float_acc_op_ne; 500 break; 501 case ppir_op_select: 502 f->op = ppir_codegen_float_acc_op_sel; 503 break; 504 case ppir_op_ddx: 505 f->op = ppir_codegen_float_acc_op_dFdx; 506 break; 507 case ppir_op_ddy: 508 f->op = ppir_codegen_float_acc_op_dFdy; 509 break; 510 default: 511 break; 512 } 513 514 ppir_src *src = node->op == ppir_op_select ? alu->src + 1: alu->src; 515 if (src->type == ppir_target_pipeline && 516 src->pipeline == ppir_pipeline_reg_fmul) 517 f->mul_in = true; 518 else 519 f->arg0_source = get_scl_reg_index(src, dest_component); 520 f->arg0_absolute = src->absolute; 521 f->arg0_negate = src->negate; 522 523 if (++src < alu->src + alu->num_src) { 524 f->arg1_source = get_scl_reg_index(src, dest_component); 525 f->arg1_absolute = src->absolute; 526 f->arg1_negate = src->negate; 527 } 528} 529 530static void ppir_codegen_encode_combine(ppir_node *node, void *code) 531{ 532 ppir_codegen_field_combine *f = code; 533 ppir_alu_node *alu = ppir_node_to_alu(node); 534 535 switch (node->op) { 536 case ppir_op_rsqrt: 537 case ppir_op_log2: 538 case ppir_op_exp2: 539 case ppir_op_rcp: 540 case ppir_op_sqrt: 541 case ppir_op_sin: 542 case ppir_op_cos: 543 { 544 f->scalar.dest_vec = false; 545 f->scalar.arg1_en = false; 546 547 ppir_dest *dest = &alu->dest; 548 int dest_component = ffs(dest->write_mask) - 1; 549 assert(dest_component >= 0); 550 f->scalar.dest = ppir_target_get_dest_reg_index(dest) + dest_component; 551 f->scalar.dest_modifier = ppir_codegen_get_outmod(dest->modifier); 552 553 ppir_src *src = alu->src; 554 f->scalar.arg0_src = get_scl_reg_index(src, dest_component); 555 f->scalar.arg0_absolute = src->absolute; 556 f->scalar.arg0_negate = src->negate; 557 558 switch (node->op) { 559 case ppir_op_rsqrt: 560 f->scalar.op = ppir_codegen_combine_scalar_op_rsqrt; 561 break; 562 case ppir_op_log2: 563 f->scalar.op = ppir_codegen_combine_scalar_op_log2; 564 break; 565 case ppir_op_exp2: 566 f->scalar.op = ppir_codegen_combine_scalar_op_exp2; 567 break; 568 case ppir_op_rcp: 569 f->scalar.op = ppir_codegen_combine_scalar_op_rcp; 570 break; 571 case ppir_op_sqrt: 572 f->scalar.op = ppir_codegen_combine_scalar_op_sqrt; 573 break; 574 case ppir_op_sin: 575 f->scalar.op = ppir_codegen_combine_scalar_op_sin; 576 break; 577 case ppir_op_cos: 578 f->scalar.op = ppir_codegen_combine_scalar_op_cos; 579 break; 580 default: 581 break; 582 } 583 break; 584 } 585 default: 586 break; 587 } 588} 589 590static void ppir_codegen_encode_store_temp(ppir_node *node, void *code) 591{ 592 assert(node->op == ppir_op_store_temp); 593 594 ppir_codegen_field_temp_write *f = code; 595 ppir_store_node *snode = ppir_node_to_store(node); 596 int num_components = snode->num_components; 597 598 f->temp_write.dest = 0x03; // 11 - temporary 599 f->temp_write.source = snode->src.reg->index; 600 601 int alignment = num_components == 4 ? 2 : num_components - 1; 602 f->temp_write.alignment = alignment; 603 f->temp_write.index = snode->index << (2 - alignment); 604 605 f->temp_write.offset_reg = snode->index >> 2; 606} 607 608static void ppir_codegen_encode_const(ppir_const *constant, uint16_t *code) 609{ 610 for (int i = 0; i < constant->num; i++) 611 code[i] = _mesa_float_to_half(constant->value[i].f); 612} 613 614static void ppir_codegen_encode_discard(ppir_node *node, void *code) 615{ 616 ppir_codegen_field_branch *b = code; 617 assert(node->op == ppir_op_discard); 618 619 b->discard.word0 = PPIR_CODEGEN_DISCARD_WORD0; 620 b->discard.word1 = PPIR_CODEGEN_DISCARD_WORD1; 621 b->discard.word2 = PPIR_CODEGEN_DISCARD_WORD2; 622} 623 624static void ppir_codegen_encode_branch(ppir_node *node, void *code) 625{ 626 ppir_codegen_field_branch *b = code; 627 ppir_branch_node *branch; 628 ppir_instr *target_instr; 629 ppir_block *target; 630 if (node->op == ppir_op_discard) { 631 ppir_codegen_encode_discard(node, code); 632 return; 633 } 634 635 assert(node->op == ppir_op_branch); 636 branch = ppir_node_to_branch(node); 637 638 b->branch.unknown_0 = 0x0; 639 b->branch.unknown_1 = 0x0; 640 641 if (branch->num_src == 2) { 642 b->branch.arg0_source = get_scl_reg_index(&branch->src[0], 0); 643 b->branch.arg1_source = get_scl_reg_index(&branch->src[1], 0); 644 b->branch.cond_gt = branch->cond_gt; 645 b->branch.cond_eq = branch->cond_eq; 646 b->branch.cond_lt = branch->cond_lt; 647 } else if (branch->num_src == 0) { 648 /* Unconditional branch */ 649 b->branch.arg0_source = 0; 650 b->branch.arg1_source = 0; 651 b->branch.cond_gt = true; 652 b->branch.cond_eq = true; 653 b->branch.cond_lt = true; 654 } else { 655 assert(false); 656 } 657 658 target = branch->target; 659 while (list_is_empty(&target->instr_list)) { 660 if (!target->list.next) 661 break; 662 target = list_entry(target->list.next, ppir_block, list); 663 } 664 665 assert(!list_is_empty(&target->instr_list)); 666 667 target_instr = list_first_entry(&target->instr_list, ppir_instr, list); 668 b->branch.target = target_instr->offset - node->instr->offset; 669 b->branch.next_count = target_instr->encode_size; 670} 671 672typedef void (*ppir_codegen_instr_slot_encode_func)(ppir_node *, void *); 673 674static const ppir_codegen_instr_slot_encode_func 675ppir_codegen_encode_slot[PPIR_INSTR_SLOT_NUM] = { 676 [PPIR_INSTR_SLOT_VARYING] = ppir_codegen_encode_varying, 677 [PPIR_INSTR_SLOT_TEXLD] = ppir_codegen_encode_texld, 678 [PPIR_INSTR_SLOT_UNIFORM] = ppir_codegen_encode_uniform, 679 [PPIR_INSTR_SLOT_ALU_VEC_MUL] = ppir_codegen_encode_vec_mul, 680 [PPIR_INSTR_SLOT_ALU_SCL_MUL] = ppir_codegen_encode_scl_mul, 681 [PPIR_INSTR_SLOT_ALU_VEC_ADD] = ppir_codegen_encode_vec_add, 682 [PPIR_INSTR_SLOT_ALU_SCL_ADD] = ppir_codegen_encode_scl_add, 683 [PPIR_INSTR_SLOT_ALU_COMBINE] = ppir_codegen_encode_combine, 684 [PPIR_INSTR_SLOT_STORE_TEMP] = ppir_codegen_encode_store_temp, 685 [PPIR_INSTR_SLOT_BRANCH] = ppir_codegen_encode_branch, 686}; 687 688static const int ppir_codegen_field_size[] = { 689 34, 62, 41, 43, 30, 44, 31, 30, 41, 73 690}; 691 692static inline int align_to_word(int size) 693{ 694 return ((size + 0x1f) >> 5); 695} 696 697static int get_instr_encode_size(ppir_instr *instr) 698{ 699 int size = 0; 700 701 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 702 if (instr->slots[i]) 703 size += ppir_codegen_field_size[i]; 704 } 705 706 for (int i = 0; i < 2; i++) { 707 if (instr->constant[i].num) 708 size += 64; 709 } 710 711 return align_to_word(size) + 1; 712} 713 714static void bitcopy(void *dst, int dst_offset, void *src, int src_size) 715{ 716 unsigned char *cpy_dst = dst, *cpy_src = src; 717 int off1 = dst_offset & 0x07; 718 719 cpy_dst += (dst_offset >> 3); 720 721 if (off1) { 722 int off2 = 0x08 - off1; 723 int cpy_size = 0; 724 while (1) { 725 *cpy_dst |= *cpy_src << off1; 726 cpy_dst++; 727 728 cpy_size += off2; 729 if (cpy_size >= src_size) 730 break; 731 732 *cpy_dst |= *cpy_src >> off2; 733 cpy_src++; 734 735 cpy_size += off1; 736 if (cpy_size >= src_size) 737 break; 738 } 739 } 740 else 741 memcpy(cpy_dst, cpy_src, align_to_word(src_size) * 4); 742} 743 744static int encode_instr(ppir_instr *instr, void *code, void *last_code) 745{ 746 int size = 0; 747 ppir_codegen_ctrl *ctrl = code; 748 749 for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) { 750 if (instr->slots[i]) { 751 /* max field size (73), align to dword */ 752 uint8_t output[12] = {0}; 753 754 ppir_codegen_encode_slot[i](instr->slots[i], output); 755 bitcopy(ctrl + 1, size, output, ppir_codegen_field_size[i]); 756 757 size += ppir_codegen_field_size[i]; 758 ctrl->fields |= 1 << i; 759 } 760 } 761 762 if (instr->slots[PPIR_INSTR_SLOT_TEXLD]) 763 ctrl->sync = true; 764 765 if (instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]) { 766 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_VEC_ADD]; 767 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy) 768 ctrl->sync = true; 769 } 770 771 if (instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]) { 772 ppir_node *node = instr->slots[PPIR_INSTR_SLOT_ALU_SCL_ADD]; 773 if (node->op == ppir_op_ddx || node->op == ppir_op_ddy) 774 ctrl->sync = true; 775 } 776 777 for (int i = 0; i < 2; i++) { 778 if (instr->constant[i].num) { 779 uint16_t output[4] = {0}; 780 781 ppir_codegen_encode_const(instr->constant + i, output); 782 bitcopy(ctrl + 1, size, output, instr->constant[i].num * 16); 783 784 size += 64; 785 ctrl->fields |= 1 << (ppir_codegen_field_shift_vec4_const_0 + i); 786 } 787 } 788 789 size = align_to_word(size) + 1; 790 791 ctrl->count = size; 792 if (instr->stop) 793 ctrl->stop = true; 794 795 if (last_code) { 796 ppir_codegen_ctrl *last_ctrl = last_code; 797 last_ctrl->next_count = size; 798 last_ctrl->prefetch = true; 799 } 800 801 return size; 802} 803 804static void ppir_codegen_print_prog(ppir_compiler *comp) 805{ 806 uint32_t *prog = comp->prog->shader; 807 unsigned offset = 0; 808 809 printf("========ppir codegen========\n"); 810 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 811 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 812 printf("%03d (@%6d): ", instr->index, instr->offset); 813 int n = prog[0] & 0x1f; 814 for (int i = 0; i < n; i++) { 815 if (i && i % 6 == 0) 816 printf("\n "); 817 printf("%08x ", prog[i]); 818 } 819 printf("\n"); 820 ppir_disassemble_instr(prog, offset, stdout); 821 prog += n; 822 offset += n; 823 } 824 } 825 printf("-----------------------\n"); 826} 827 828bool ppir_codegen_prog(ppir_compiler *comp) 829{ 830 int size = 0; 831 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 832 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 833 instr->offset = size; 834 instr->encode_size = get_instr_encode_size(instr); 835 size += instr->encode_size; 836 } 837 /* Set stop flag for the last instruction if block has stop flag */ 838 if (block->stop) { 839 ppir_instr *instr = list_last_entry(&block->instr_list, ppir_instr, list); 840 instr->stop = true; 841 } 842 } 843 844 uint32_t *prog = rzalloc_size(comp->prog, size * sizeof(uint32_t)); 845 if (!prog) 846 return false; 847 848 uint32_t *code = prog, *last_code = NULL; 849 list_for_each_entry(ppir_block, block, &comp->block_list, list) { 850 list_for_each_entry(ppir_instr, instr, &block->instr_list, list) { 851 int offset = encode_instr(instr, code, last_code); 852 last_code = code; 853 code += offset; 854 } 855 } 856 857 if (comp->prog->shader) 858 ralloc_free(comp->prog->shader); 859 860 comp->prog->shader = prog; 861 comp->prog->state.shader_size = size * sizeof(uint32_t); 862 863 if (lima_debug & LIMA_DEBUG_PP) 864 ppir_codegen_print_prog(comp); 865 866 return true; 867} 868