1/* 2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27#ifndef SB_BC_H_ 28#define SB_BC_H_ 29 30#include <stdint.h> 31#include "r600_isa.h" 32 33#include <cstdio> 34#include <string> 35#include <vector> 36#include <stack> 37 38struct r600_bytecode; 39struct r600_shader; 40 41namespace r600_sb { 42 43class hw_encoding_format; 44class node; 45class alu_node; 46class cf_node; 47class fetch_node; 48class alu_group_node; 49class region_node; 50class shader; 51class value; 52 53class sb_ostream { 54public: 55 sb_ostream() {} 56 57 virtual void write(const char *s) = 0; 58 59 sb_ostream& operator <<(const char *s) { 60 write(s); 61 return *this; 62 } 63 64 sb_ostream& operator <<(const std::string& s) { 65 return *this << s.c_str(); 66 } 67 68 sb_ostream& operator <<(void *p) { 69 char b[32]; 70 sprintf(b, "%p", p); 71 return *this << b; 72 } 73 74 sb_ostream& operator <<(char c) { 75 char b[2]; 76 sprintf(b, "%c", c); 77 return *this << b; 78 } 79 80 sb_ostream& operator <<(int n) { 81 char b[32]; 82 sprintf(b, "%d", n); 83 return *this << b; 84 } 85 86 sb_ostream& operator <<(unsigned n) { 87 char b[32]; 88 sprintf(b, "%u", n); 89 return *this << b; 90 } 91 92 sb_ostream& operator <<(double d) { 93 char b[32]; 94 snprintf(b, 32, "%g", d); 95 return *this << b; 96 } 97 98 // print as field of specified width, right aligned 99 void print_w(int n, int width) { 100 char b[256],f[8]; 101 sprintf(f, "%%%dd", width); 102 snprintf(b, 256, f, n); 103 write(b); 104 } 105 106 // print as field of specified width, left aligned 107 void print_wl(int n, int width) { 108 char b[256],f[8]; 109 sprintf(f, "%%-%dd", width); 110 snprintf(b, 256, f, n); 111 write(b); 112 } 113 114 // print as field of specified width, left aligned 115 void print_wl(const std::string &s, int width) { 116 write(s.c_str()); 117 int l = s.length(); 118 while (l++ < width) { 119 write(" "); 120 } 121 } 122 123 // print int as field of specified width, right aligned, zero-padded 124 void print_zw(int n, int width) { 125 char b[256],f[8]; 126 sprintf(f, "%%0%dd", width); 127 snprintf(b, 256, f, n); 128 write(b); 129 } 130 131 // print int as field of specified width, right aligned, zero-padded, hex 132 void print_zw_hex(int n, int width) { 133 char b[256],f[8]; 134 sprintf(f, "%%0%dx", width); 135 snprintf(b, 256, f, n); 136 write(b); 137 } 138}; 139 140class sb_ostringstream : public sb_ostream { 141 std::string data; 142public: 143 sb_ostringstream() : data() {} 144 145 virtual void write(const char *s) { 146 data += s; 147 } 148 149 void clear() { data.clear(); } 150 151 const char* c_str() { return data.c_str(); } 152 std::string& str() { return data; } 153}; 154 155class sb_log : public sb_ostream { 156 FILE *o; 157public: 158 sb_log() : o(stderr) {} 159 160 virtual void write(const char *s) { 161 fputs(s, o); 162 } 163}; 164 165extern sb_log sblog; 166 167enum shader_target 168{ 169 TARGET_UNKNOWN, 170 TARGET_VS, 171 TARGET_ES, 172 TARGET_PS, 173 TARGET_GS, 174 TARGET_GS_COPY, 175 TARGET_COMPUTE, 176 TARGET_FETCH, 177 TARGET_HS, 178 TARGET_LS, 179 180 TARGET_NUM 181}; 182 183enum sb_hw_class_bits 184{ 185 HB_R6 = (1<<0), 186 HB_R7 = (1<<1), 187 HB_EG = (1<<2), 188 HB_CM = (1<<3), 189 190 HB_R6R7 = (HB_R6 | HB_R7), 191 HB_EGCM = (HB_EG | HB_CM), 192 HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG), 193 HB_R7EGCM = (HB_R7 | HB_EG | HB_CM), 194 195 HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM) 196}; 197 198enum sb_hw_chip 199{ 200 HW_CHIP_UNKNOWN, 201 HW_CHIP_R600, 202 HW_CHIP_RV610, 203 HW_CHIP_RV630, 204 HW_CHIP_RV670, 205 HW_CHIP_RV620, 206 HW_CHIP_RV635, 207 HW_CHIP_RS780, 208 HW_CHIP_RS880, 209 HW_CHIP_RV770, 210 HW_CHIP_RV730, 211 HW_CHIP_RV710, 212 HW_CHIP_RV740, 213 HW_CHIP_CEDAR, 214 HW_CHIP_REDWOOD, 215 HW_CHIP_JUNIPER, 216 HW_CHIP_CYPRESS, 217 HW_CHIP_HEMLOCK, 218 HW_CHIP_PALM, 219 HW_CHIP_SUMO, 220 HW_CHIP_SUMO2, 221 HW_CHIP_BARTS, 222 HW_CHIP_TURKS, 223 HW_CHIP_CAICOS, 224 HW_CHIP_CAYMAN, 225 HW_CHIP_ARUBA 226}; 227 228enum sb_hw_class 229{ 230 HW_CLASS_UNKNOWN, 231 HW_CLASS_R600, 232 HW_CLASS_R700, 233 HW_CLASS_EVERGREEN, 234 HW_CLASS_CAYMAN 235}; 236 237enum alu_slots { 238 SLOT_X = 0, 239 SLOT_Y = 1, 240 SLOT_Z = 2, 241 SLOT_W = 3, 242 SLOT_TRANS = 4 243}; 244 245enum misc_consts { 246 MAX_ALU_LITERALS = 4, 247 MAX_ALU_SLOTS = 128, 248 MAX_GPR = 128, 249 MAX_CHAN = 4 250 251}; 252 253enum alu_src_sel { 254 255 ALU_SRC_LDS_OQ_A = 219, 256 ALU_SRC_LDS_OQ_B = 220, 257 ALU_SRC_LDS_OQ_A_POP = 221, 258 ALU_SRC_LDS_OQ_B_POP = 222, 259 ALU_SRC_LDS_DIRECT_A = 223, 260 ALU_SRC_LDS_DIRECT_B = 224, 261 ALU_SRC_TIME_HI = 227, 262 ALU_SRC_TIME_LO = 228, 263 ALU_SRC_MASK_HI = 229, 264 ALU_SRC_MASK_LO = 230, 265 ALU_SRC_HW_WAVE_ID = 231, 266 ALU_SRC_SIMD_ID = 232, 267 ALU_SRC_SE_ID = 233, 268 ALU_SRC_HW_THREADGRP_ID = 234, 269 ALU_SRC_WAVE_ID_IN_GRP = 235, 270 ALU_SRC_NUM_THREADGRP_WAVES = 236, 271 ALU_SRC_HW_ALU_ODD = 237, 272 ALU_SRC_LOOP_IDX = 238, 273 ALU_SRC_PARAM_BASE_ADDR = 240, 274 ALU_SRC_NEW_PRIM_MASK = 241, 275 ALU_SRC_PRIM_MASK_HI = 242, 276 ALU_SRC_PRIM_MASK_LO = 243, 277 ALU_SRC_1_DBL_L = 244, 278 ALU_SRC_1_DBL_M = 245, 279 ALU_SRC_0_5_DBL_L = 246, 280 ALU_SRC_0_5_DBL_M = 247, 281 ALU_SRC_0 = 248, 282 ALU_SRC_1 = 249, 283 ALU_SRC_1_INT = 250, 284 ALU_SRC_M_1_INT = 251, 285 ALU_SRC_0_5 = 252, 286 ALU_SRC_LITERAL = 253, 287 ALU_SRC_PV = 254, 288 ALU_SRC_PS = 255, 289 290 ALU_SRC_PARAM_OFFSET = 448 291}; 292 293enum alu_predicate_select 294{ 295 PRED_SEL_OFF = 0, 296// RESERVED = 1, 297 PRED_SEL_0 = 2, 298 PRED_SEL_1 = 3 299}; 300 301 302enum alu_omod { 303 OMOD_OFF = 0, 304 OMOD_M2 = 1, 305 OMOD_M4 = 2, 306 OMOD_D2 = 3 307}; 308 309enum alu_index_mode { 310 INDEX_AR_X = 0, 311 INDEX_AR_Y_R600 = 1, 312 INDEX_AR_Z_R600 = 2, 313 INDEX_AR_W_R600 = 3, 314 315 INDEX_LOOP = 4, 316 INDEX_GLOBAL = 5, 317 INDEX_GLOBAL_AR_X = 6 318}; 319 320enum alu_cayman_mova_dst { 321 CM_MOVADST_AR_X, 322 CM_MOVADST_PC, 323 CM_MOVADST_IDX0, 324 CM_MOVADST_IDX1, 325 CM_MOVADST_CG0, // clause-global byte 0 326 CM_MOVADST_CG1, 327 CM_MOVADST_CG2, 328 CM_MOVADST_CG3 329}; 330 331enum alu_cayman_exec_mask_op { 332 CM_EMO_DEACTIVATE, 333 CM_EMO_BREAK, 334 CM_EMO_CONTINUE, 335 CM_EMO_KILL 336}; 337 338 339enum cf_exp_type { 340 EXP_PIXEL, 341 EXP_POS, 342 EXP_PARAM, 343 344 EXP_TYPE_COUNT 345}; 346 347enum cf_mem_type { 348 MEM_WRITE, 349 MEM_WRITE_IND, 350 MEM_WRITE_ACK, 351 MEM_WRITE_IND_ACK 352}; 353 354 355enum alu_kcache_mode { 356 KC_LOCK_NONE, 357 KC_LOCK_1, 358 KC_LOCK_2, 359 KC_LOCK_LOOP 360}; 361 362enum alu_kcache_index_mode { 363 KC_INDEX_NONE, 364 KC_INDEX_0, 365 KC_INDEX_1, 366 KC_INDEX_INVALID 367}; 368 369enum chan_select { 370 SEL_X = 0, 371 SEL_Y = 1, 372 SEL_Z = 2, 373 SEL_W = 3, 374 SEL_0 = 4, 375 SEL_1 = 5, 376// RESERVED = 6, 377 SEL_MASK = 7 378}; 379 380enum bank_swizzle { 381 VEC_012 = 0, 382 VEC_021 = 1, 383 VEC_120 = 2, 384 VEC_102 = 3, 385 VEC_201 = 4, 386 VEC_210 = 5, 387 388 VEC_NUM = 6, 389 390 SCL_210 = 0, 391 SCL_122 = 1, 392 SCL_212 = 2, 393 SCL_221 = 3, 394 395 SCL_NUM = 4 396 397}; 398 399enum sched_queue_id { 400 SQ_CF, 401 SQ_ALU, 402 SQ_TEX, 403 SQ_VTX, 404 SQ_GDS, 405 406 SQ_NUM 407}; 408 409struct literal { 410 union { 411 int32_t i; 412 uint32_t u; 413 float f; 414 }; 415 416 literal(int32_t i = 0) : i(i) {} 417 literal(uint32_t u) : u(u) {} 418 literal(float f) : f(f) {} 419 literal(double f) : f(f) {} 420 operator uint32_t() const { return u; } 421 bool operator ==(literal l) { return u == l.u; } 422 bool operator ==(int v_int) { return i == v_int; } 423 bool operator ==(unsigned v_uns) { return u == v_uns; } 424}; 425 426struct bc_kcache { 427 unsigned mode; 428 unsigned bank; 429 unsigned addr; 430 unsigned index_mode; 431} ; 432 433// TODO optimize bc structures 434 435struct bc_cf { 436 437 bc_kcache kc[4]; 438 439 unsigned id; 440 441 442 const cf_op_info * op_ptr; 443 unsigned op; 444 445 unsigned addr:32; 446 447 unsigned alt_const:1; 448 unsigned uses_waterfall:1; 449 450 unsigned barrier:1; 451 unsigned count:7; 452 unsigned pop_count:3; 453 unsigned call_count:6; 454 unsigned whole_quad_mode:1; 455 unsigned valid_pixel_mode:1; 456 457 unsigned jumptable_sel:3; 458 unsigned cf_const:5; 459 unsigned cond:2; 460 unsigned end_of_program:1; 461 462 unsigned array_base:13; 463 unsigned elem_size:2; 464 unsigned index_gpr:7; 465 unsigned rw_gpr:7; 466 unsigned rw_rel:1; 467 unsigned type:2; 468 469 unsigned burst_count:4; 470 unsigned mark:1; 471 unsigned sel[4]; 472 473 unsigned array_size:12; 474 unsigned comp_mask:4; 475 476 unsigned rat_id:4; 477 unsigned rat_inst:6; 478 unsigned rat_index_mode:2; 479 480 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); } 481 482 bool is_alu_extended() { 483 assert(op_ptr->flags & CF_ALU); 484 return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE || 485 kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE || 486 kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE; 487 } 488 489}; 490 491struct bc_alu_src { 492 unsigned sel:9; 493 unsigned chan:2; 494 unsigned neg:1; 495 unsigned abs:1; 496 unsigned rel:1; 497 literal value; 498 499 void clear() { 500 sel = 0; 501 chan = 0; 502 neg = 0; 503 abs = 0; 504 rel = 0; 505 value = 0; 506 } 507}; 508 509struct bc_alu { 510 const alu_op_info * op_ptr; 511 unsigned op; 512 513 bc_alu_src src[3]; 514 515 unsigned dst_gpr:7; 516 unsigned dst_chan:2; 517 unsigned dst_rel:1; 518 unsigned clamp:1; 519 unsigned omod:2; 520 unsigned bank_swizzle:3; 521 522 unsigned index_mode:3; 523 unsigned last:1; 524 unsigned pred_sel:2; 525 526 unsigned fog_merge:1; 527 unsigned write_mask:1; 528 unsigned update_exec_mask:1; 529 unsigned update_pred:1; 530 531 unsigned slot:3; 532 533 unsigned lds_idx_offset:6; 534 535 alu_op_flags slot_flags; 536 537 void set_op(unsigned op) { 538 this->op = op; 539 op_ptr = r600_isa_alu(op); 540 } 541 void clear() { 542 op_ptr = nullptr; 543 op = 0; 544 for (int i = 0; i < 3; ++i) 545 src[i].clear(); 546 dst_gpr = 0; 547 dst_chan = 0; 548 dst_rel = 0; 549 clamp = 0; 550 omod = 0; 551 bank_swizzle = 0; 552 index_mode = 0; 553 last = 0; 554 pred_sel = 0; 555 fog_merge = 0; 556 write_mask = 0; 557 update_exec_mask = 0; 558 update_pred = 0; 559 slot = 0; 560 lds_idx_offset = 0; 561 slot_flags = AF_NONE; 562 } 563 bc_alu() { 564 clear(); 565 } 566}; 567 568struct bc_fetch { 569 const fetch_op_info * op_ptr; 570 unsigned op; 571 572 unsigned bc_frac_mode:1; 573 unsigned fetch_whole_quad:1; 574 unsigned resource_id:8; 575 576 unsigned src_gpr:7; 577 unsigned src_rel:1; 578 unsigned src_rel_global:1; /* for GDS ops */ 579 unsigned src_sel[4]; 580 581 unsigned dst_gpr:7; 582 unsigned dst_rel:1; 583 unsigned dst_rel_global:1; /* for GDS ops */ 584 unsigned dst_sel[4]; 585 586 unsigned alt_const:1; 587 588 unsigned inst_mod:2; 589 unsigned resource_index_mode:2; 590 unsigned sampler_index_mode:2; 591 592 unsigned coord_type[4]; 593 unsigned lod_bias:7; 594 595 unsigned offset[3]; 596 597 unsigned sampler_id:5; 598 599 600 unsigned fetch_type:2; 601 unsigned mega_fetch_count:6; 602 unsigned coalesced_read:1; 603 unsigned structured_read:2; 604 unsigned lds_req:1; 605 606 unsigned data_format:6; 607 unsigned format_comp_all:1; 608 unsigned num_format_all:2; 609 unsigned semantic_id:8; 610 unsigned srf_mode_all:1; 611 unsigned use_const_fields:1; 612 613 unsigned const_buf_no_stride:1; 614 unsigned endian_swap:2; 615 unsigned mega_fetch:1; 616 617 unsigned src2_gpr:7; /* for GDS */ 618 unsigned alloc_consume:1; 619 unsigned uav_id:4; 620 unsigned uav_index_mode:2; 621 unsigned bcast_first_req:1; 622 623 /* for MEM ops */ 624 unsigned elem_size:2; 625 unsigned uncached:1; 626 unsigned indexed:1; 627 unsigned burst_count:4; 628 unsigned array_base:13; 629 unsigned array_size:12; 630 631 void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); } 632}; 633 634struct shader_stats { 635 unsigned ndw; 636 unsigned ngpr; 637 unsigned nstack; 638 639 unsigned cf; // clause instructions not included 640 unsigned alu; 641 unsigned alu_clauses; 642 unsigned fetch_clauses; 643 unsigned fetch; 644 unsigned alu_groups; 645 646 unsigned shaders; // number of shaders (for accumulated stats) 647 648 shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(), 649 fetch_clauses(), fetch(), alu_groups(), shaders() {} 650 651 void collect(node *n); 652 void accumulate(shader_stats &s); 653 void dump(); 654 void dump_diff(shader_stats &s); 655}; 656 657class sb_context { 658 659public: 660 661 shader_stats src_stats, opt_stats; 662 663 r600_isa *isa; 664 665 sb_hw_chip hw_chip; 666 sb_hw_class hw_class; 667 668 unsigned alu_temp_gprs; 669 unsigned max_fetch; 670 bool has_trans; 671 unsigned vtx_src_num; 672 unsigned num_slots; 673 bool uses_mova_gpr; 674 675 bool r6xx_gpr_index_workaround; 676 677 bool stack_workaround_8xx; 678 bool stack_workaround_9xx; 679 680 unsigned wavefront_size; 681 unsigned stack_entry_size; 682 683 static unsigned dump_pass; 684 static unsigned dump_stat; 685 686 static unsigned dry_run; 687 static unsigned no_fallback; 688 static unsigned safe_math; 689 690 static unsigned dskip_start; 691 static unsigned dskip_end; 692 static unsigned dskip_mode; 693 694 sb_context() : src_stats(), opt_stats(), isa(0), 695 hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN), 696 alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0), 697 num_slots(0), uses_mova_gpr(false), 698 r6xx_gpr_index_workaround(false), stack_workaround_8xx(false), 699 stack_workaround_9xx(false), wavefront_size(0), 700 stack_entry_size(0) {} 701 702 int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass); 703 704 bool is_r600() {return hw_class == HW_CLASS_R600;} 705 bool is_r700() {return hw_class == HW_CLASS_R700;} 706 bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;} 707 bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;} 708 bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;} 709 710 bool needs_8xx_stack_workaround() { 711 if (!is_evergreen()) 712 return false; 713 714 switch (hw_chip) { 715 case HW_CHIP_HEMLOCK: 716 case HW_CHIP_CYPRESS: 717 case HW_CHIP_JUNIPER: 718 return false; 719 default: 720 return true; 721 } 722 } 723 724 bool needs_9xx_stack_workaround() { 725 return is_cayman(); 726 } 727 728 sb_hw_class_bits hw_class_bit() { 729 switch (hw_class) { 730 case HW_CLASS_R600:return HB_R6; 731 case HW_CLASS_R700:return HB_R7; 732 case HW_CLASS_EVERGREEN:return HB_EG; 733 case HW_CLASS_CAYMAN:return HB_CM; 734 default: assert(!"unknown hw class"); return (sb_hw_class_bits)0; 735 736 } 737 } 738 739 unsigned cf_opcode(unsigned op) { 740 return r600_isa_cf_opcode(isa->hw_class, op); 741 } 742 743 unsigned alu_opcode(unsigned op) { 744 return r600_isa_alu_opcode(isa->hw_class, op); 745 } 746 747 unsigned alu_slots(unsigned op) { 748 return r600_isa_alu_slots(isa->hw_class, op); 749 } 750 751 unsigned alu_slots(const alu_op_info * op_ptr) { 752 return op_ptr->slots[isa->hw_class]; 753 } 754 755 unsigned alu_slots_mask(const alu_op_info * op_ptr) { 756 unsigned mask = 0; 757 unsigned slot_flags = alu_slots(op_ptr); 758 if (slot_flags & AF_V) 759 mask = 0x0F; 760 if (!is_cayman() && (slot_flags & AF_S)) 761 mask |= 0x10; 762 /* Force LDS_IDX ops into SLOT_X */ 763 if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11)) 764 mask = 0x01; 765 return mask; 766 } 767 768 unsigned fetch_opcode(unsigned op) { 769 return r600_isa_fetch_opcode(isa->hw_class, op); 770 } 771 772 bool is_kcache_sel(unsigned sel) { 773 return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320)); 774 } 775 776 bool is_lds_oq(unsigned sel) { 777 return (sel >= 0xdb && sel <= 0xde); 778 } 779 780 const char * get_hw_class_name(); 781 const char * get_hw_chip_name(); 782 783}; 784 785#define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0) 786#define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0) 787 788class bc_decoder { 789 790 sb_context &ctx; 791 792 uint32_t* dw; 793 unsigned ndw; 794 795public: 796 797 bc_decoder(sb_context &sctx, uint32_t *data, unsigned size) 798 : ctx(sctx), dw(data), ndw(size) {} 799 800 int decode_cf(unsigned &i, bc_cf &bc); 801 int decode_alu(unsigned &i, bc_alu &bc); 802 int decode_fetch(unsigned &i, bc_fetch &bc); 803 804private: 805 int decode_cf_alu(unsigned &i, bc_cf &bc); 806 int decode_cf_exp(unsigned &i, bc_cf &bc); 807 int decode_cf_mem(unsigned &i, bc_cf &bc); 808 809 int decode_fetch_vtx(unsigned &i, bc_fetch &bc); 810 int decode_fetch_gds(unsigned &i, bc_fetch &bc); 811 int decode_fetch_mem(unsigned &i, bc_fetch &bc); 812}; 813 814// bytecode format definition 815 816class hw_encoding_format { 817 const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing 818 hw_encoding_format(); 819protected: 820 uint32_t value; 821public: 822 hw_encoding_format(sb_hw_class_bits hw) 823 : hw_target(hw), value(0) {} 824 hw_encoding_format(uint32_t v, sb_hw_class_bits hw) 825 : hw_target(hw), value(v) {} 826 uint32_t get_value(sb_hw_class_bits hw) const { 827 assert((hw & hw_target) == hw); 828 return value; 829 } 830}; 831 832#define BC_FORMAT_BEGIN_HW(fmt, hwset) \ 833class fmt##_##hwset : public hw_encoding_format {\ 834 typedef fmt##_##hwset thistype; \ 835public: \ 836 fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \ 837 fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {}; 838 839#define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL) 840 841#define BC_FORMAT_END(fmt) }; 842 843// bytecode format field definition 844 845#define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \ 846 thistype & name(unsigned v) { \ 847 value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \ 848 return *this; \ 849 } \ 850 unsigned get_##name() const { \ 851 return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \ 852 } 853 854#define BC_RSRVD(fmt, last_bit, first_bit) 855 856// CLAMP macro defined elsewhere interferes with bytecode field name 857#undef CLAMP 858#include "sb_bc_fmt_def.inc" 859 860#undef BC_FORMAT_BEGIN 861#undef BC_FORMAT_END 862#undef BC_FIELD 863#undef BC_RSRVD 864 865class bc_parser { 866 sb_context & ctx; 867 868 bc_decoder *dec; 869 870 r600_bytecode *bc; 871 r600_shader *pshader; 872 873 uint32_t *dw; 874 unsigned bc_ndw; 875 876 unsigned max_cf; 877 878 shader *sh; 879 880 int error; 881 882 alu_node *slots[2][5]; 883 unsigned cgroup; 884 885 typedef std::vector<cf_node*> id_cf_map; 886 id_cf_map cf_map; 887 888 typedef std::stack<region_node*> region_stack; 889 region_stack loop_stack; 890 891 bool gpr_reladdr; 892 893 // Note: currently relies on input emitting SET_CF in same basic block as uses 894 value *cf_index_value[2]; 895 alu_node *mova; 896public: 897 898 bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) : 899 ctx(sctx), dec(), bc(bc), pshader(pshader), 900 dw(), bc_ndw(), max_cf(), 901 sh(), error(), slots(), cgroup(), 902 cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { } 903 904 int decode(); 905 int prepare(); 906 907 shader* get_shader() { assert(!error); return sh; } 908 909private: 910 911 int decode_shader(); 912 913 int parse_decls(); 914 915 int decode_cf(unsigned &i, bool &eop); 916 917 int decode_alu_clause(cf_node *cf); 918 int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt); 919 920 int decode_fetch_clause(cf_node *cf); 921 922 int prepare_ir(); 923 int prepare_alu_clause(cf_node *cf); 924 int prepare_alu_group(cf_node* cf, alu_group_node *g); 925 int prepare_fetch_clause(cf_node *cf); 926 927 int prepare_loop(cf_node *c); 928 int prepare_if(cf_node *c); 929 930 void save_set_cf_index(value *val, unsigned idx); 931 value *get_cf_index_value(unsigned idx); 932 void save_mova(alu_node *mova); 933 alu_node *get_mova(); 934}; 935 936 937 938 939class bytecode { 940 typedef std::vector<uint32_t> bc_vector; 941 sb_hw_class_bits hw_class_bit; 942 943 bc_vector bc; 944 945 unsigned pos; 946 947public: 948 949 bytecode(sb_hw_class_bits hw, unsigned rdw = 256) 950 : hw_class_bit(hw), pos(0) { bc.reserve(rdw); } 951 952 unsigned ndw() { return bc.size(); } 953 954 void write_data(uint32_t* dst) { 955 std::copy(bc.begin(), bc.end(), dst); 956 } 957 958 void align(unsigned a) { 959 unsigned size = bc.size(); 960 size = (size + a - 1) & ~(a-1); 961 bc.resize(size); 962 } 963 964 void set_size(unsigned sz) { 965 assert(sz >= bc.size()); 966 bc.resize(sz); 967 } 968 969 void seek(unsigned p) { 970 if (p != pos) { 971 if (p > bc.size()) { 972 bc.resize(p); 973 } 974 pos = p; 975 } 976 } 977 978 unsigned get_pos() { return pos; } 979 uint32_t *data() { return &bc[0]; } 980 981 bytecode & operator <<(uint32_t v) { 982 if (pos == ndw()) { 983 bc.push_back(v); 984 } else 985 bc.at(pos) = v; 986 ++pos; 987 return *this; 988 } 989 990 bytecode & operator <<(const hw_encoding_format &e) { 991 *this << e.get_value(hw_class_bit); 992 return *this; 993 } 994 995 bytecode & operator <<(const bytecode &b) { 996 bc.insert(bc.end(), b.bc.begin(), b.bc.end()); 997 return *this; 998 } 999 1000 uint32_t at(unsigned dw_id) { return bc.at(dw_id); } 1001}; 1002 1003 1004class bc_builder { 1005 shader &sh; 1006 sb_context &ctx; 1007 bytecode bb; 1008 int error; 1009 1010public: 1011 1012 bc_builder(shader &s); 1013 int build(); 1014 bytecode& get_bytecode() { assert(!error); return bb; } 1015 1016private: 1017 1018 int build_cf(cf_node *n); 1019 1020 int build_cf_alu(cf_node *n); 1021 int build_cf_mem(cf_node *n); 1022 int build_cf_exp(cf_node *n); 1023 1024 int build_alu_clause(cf_node *n); 1025 int build_alu_group(alu_group_node *n); 1026 int build_alu(alu_node *n); 1027 1028 int build_fetch_clause(cf_node *n); 1029 int build_fetch_tex(fetch_node *n); 1030 int build_fetch_vtx(fetch_node *n); 1031 int build_fetch_gds(fetch_node *n); 1032 int build_fetch_mem(fetch_node* n); 1033}; 1034 1035} // namespace r600_sb 1036 1037#endif /* SB_BC_H_ */ 1038