1/* 2 * Copyright © 2020 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include "aco_ir.h" 26 27#include "util/crc32.h" 28 29#include <algorithm> 30#include <deque> 31#include <set> 32#include <vector> 33 34namespace aco { 35 36/* sgpr_presched/vgpr_presched */ 37void 38collect_presched_stats(Program* program) 39{ 40 RegisterDemand presched_demand; 41 for (Block& block : program->blocks) 42 presched_demand.update(block.register_demand); 43 program->statistics[statistic_sgpr_presched] = presched_demand.sgpr; 44 program->statistics[statistic_vgpr_presched] = presched_demand.vgpr; 45} 46 47class BlockCycleEstimator { 48public: 49 enum resource { 50 null = 0, 51 scalar, 52 branch_sendmsg, 53 valu, 54 valu_complex, 55 lds, 56 export_gds, 57 vmem, 58 resource_count, 59 }; 60 61 BlockCycleEstimator(Program* program_) : program(program_) {} 62 63 Program* program; 64 65 int32_t cur_cycle = 0; 66 int32_t res_available[(int)BlockCycleEstimator::resource_count] = {0}; 67 unsigned res_usage[(int)BlockCycleEstimator::resource_count] = {0}; 68 int32_t reg_available[512] = {0}; 69 std::deque<int32_t> lgkm; 70 std::deque<int32_t> exp; 71 std::deque<int32_t> vm; 72 std::deque<int32_t> vs; 73 74 unsigned predict_cost(aco_ptr<Instruction>& instr); 75 void add(aco_ptr<Instruction>& instr); 76 void join(const BlockCycleEstimator& other); 77 78private: 79 unsigned get_waitcnt_cost(wait_imm imm); 80 unsigned get_dependency_cost(aco_ptr<Instruction>& instr); 81 82 void use_resources(aco_ptr<Instruction>& instr); 83 int32_t cycles_until_res_available(aco_ptr<Instruction>& instr); 84}; 85 86struct wait_counter_info { 87 wait_counter_info(unsigned vm_, unsigned exp_, unsigned lgkm_, unsigned vs_) 88 : vm(vm_), exp(exp_), lgkm(lgkm_), vs(vs_) 89 {} 90 91 unsigned vm; 92 unsigned exp; 93 unsigned lgkm; 94 unsigned vs; 95}; 96 97struct perf_info { 98 int latency; 99 100 BlockCycleEstimator::resource rsrc0; 101 unsigned cost0; 102 103 BlockCycleEstimator::resource rsrc1; 104 unsigned cost1; 105}; 106 107static perf_info 108get_perf_info(Program* program, aco_ptr<Instruction>& instr) 109{ 110 instr_class cls = instr_info.classes[(int)instr->opcode]; 111 112#define WAIT(res) BlockCycleEstimator::res, 0 113#define WAIT_USE(res, cnt) BlockCycleEstimator::res, cnt 114 115 if (program->gfx_level >= GFX10) { 116 /* fp64 might be incorrect */ 117 switch (cls) { 118 case instr_class::valu32: 119 case instr_class::valu_convert32: 120 case instr_class::valu_fma: return {5, WAIT_USE(valu, 1)}; 121 case instr_class::valu64: return {6, WAIT_USE(valu, 2), WAIT_USE(valu_complex, 2)}; 122 case instr_class::valu_quarter_rate32: 123 return {8, WAIT_USE(valu, 4), WAIT_USE(valu_complex, 4)}; 124 case instr_class::valu_transcendental32: 125 return {10, WAIT_USE(valu, 1), WAIT_USE(valu_complex, 4)}; 126 case instr_class::valu_double: return {22, WAIT_USE(valu, 16), WAIT_USE(valu_complex, 16)}; 127 case instr_class::valu_double_add: 128 return {22, WAIT_USE(valu, 16), WAIT_USE(valu_complex, 16)}; 129 case instr_class::valu_double_convert: 130 return {22, WAIT_USE(valu, 16), WAIT_USE(valu_complex, 16)}; 131 case instr_class::valu_double_transcendental: 132 return {24, WAIT_USE(valu, 16), WAIT_USE(valu_complex, 16)}; 133 case instr_class::salu: return {2, WAIT_USE(scalar, 1)}; 134 case instr_class::smem: return {0, WAIT_USE(scalar, 1)}; 135 case instr_class::branch: 136 case instr_class::sendmsg: return {0, WAIT_USE(branch_sendmsg, 1)}; 137 case instr_class::ds: 138 return instr->ds().gds ? perf_info{0, WAIT_USE(export_gds, 1)} 139 : perf_info{0, WAIT_USE(lds, 1)}; 140 case instr_class::exp: return {0, WAIT_USE(export_gds, 1)}; 141 case instr_class::vmem: return {0, WAIT_USE(vmem, 1)}; 142 case instr_class::barrier: 143 case instr_class::waitcnt: 144 case instr_class::other: 145 default: return {0}; 146 } 147 } else { 148 switch (cls) { 149 case instr_class::valu32: return {4, WAIT_USE(valu, 4)}; 150 case instr_class::valu_convert32: return {16, WAIT_USE(valu, 16)}; 151 case instr_class::valu64: return {8, WAIT_USE(valu, 8)}; 152 case instr_class::valu_quarter_rate32: return {16, WAIT_USE(valu, 16)}; 153 case instr_class::valu_fma: 154 return program->dev.has_fast_fma32 ? perf_info{4, WAIT_USE(valu, 4)} 155 : perf_info{16, WAIT_USE(valu, 16)}; 156 case instr_class::valu_transcendental32: return {16, WAIT_USE(valu, 16)}; 157 case instr_class::valu_double: return {64, WAIT_USE(valu, 64)}; 158 case instr_class::valu_double_add: return {32, WAIT_USE(valu, 32)}; 159 case instr_class::valu_double_convert: return {16, WAIT_USE(valu, 16)}; 160 case instr_class::valu_double_transcendental: return {64, WAIT_USE(valu, 64)}; 161 case instr_class::salu: return {4, WAIT_USE(scalar, 4)}; 162 case instr_class::smem: return {4, WAIT_USE(scalar, 4)}; 163 case instr_class::branch: 164 return {8, WAIT_USE(branch_sendmsg, 8)}; 165 return {4, WAIT_USE(branch_sendmsg, 4)}; 166 case instr_class::ds: 167 return instr->ds().gds ? perf_info{4, WAIT_USE(export_gds, 4)} 168 : perf_info{4, WAIT_USE(lds, 4)}; 169 case instr_class::exp: return {16, WAIT_USE(export_gds, 16)}; 170 case instr_class::vmem: return {4, WAIT_USE(vmem, 4)}; 171 case instr_class::barrier: 172 case instr_class::waitcnt: 173 case instr_class::other: 174 default: return {4}; 175 } 176 } 177 178#undef WAIT_USE 179#undef WAIT 180} 181 182void 183BlockCycleEstimator::use_resources(aco_ptr<Instruction>& instr) 184{ 185 perf_info perf = get_perf_info(program, instr); 186 187 if (perf.rsrc0 != resource_count) { 188 res_available[(int)perf.rsrc0] = cur_cycle + perf.cost0; 189 res_usage[(int)perf.rsrc0] += perf.cost0; 190 } 191 192 if (perf.rsrc1 != resource_count) { 193 res_available[(int)perf.rsrc1] = cur_cycle + perf.cost1; 194 res_usage[(int)perf.rsrc1] += perf.cost1; 195 } 196} 197 198int32_t 199BlockCycleEstimator::cycles_until_res_available(aco_ptr<Instruction>& instr) 200{ 201 perf_info perf = get_perf_info(program, instr); 202 203 int32_t cost = 0; 204 if (perf.rsrc0 != resource_count) 205 cost = MAX2(cost, res_available[(int)perf.rsrc0] - cur_cycle); 206 if (perf.rsrc1 != resource_count) 207 cost = MAX2(cost, res_available[(int)perf.rsrc1] - cur_cycle); 208 209 return cost; 210} 211 212static wait_counter_info 213get_wait_counter_info(aco_ptr<Instruction>& instr) 214{ 215 /* These numbers are all a bit nonsense. LDS/VMEM/SMEM/EXP performance 216 * depends a lot on the situation. */ 217 218 if (instr->isEXP()) 219 return wait_counter_info(0, 16, 0, 0); 220 221 if (instr->isFlatLike()) { 222 unsigned lgkm = instr->isFlat() ? 20 : 0; 223 if (!instr->definitions.empty()) 224 return wait_counter_info(320, 0, lgkm, 0); 225 else 226 return wait_counter_info(0, 0, lgkm, 320); 227 } 228 229 if (instr->isSMEM()) { 230 if (instr->definitions.empty()) 231 return wait_counter_info(0, 0, 200, 0); 232 if (instr->operands.empty()) /* s_memtime and s_memrealtime */ 233 return wait_counter_info(0, 0, 1, 0); 234 235 bool likely_desc_load = instr->operands[0].size() == 2; 236 bool soe = instr->operands.size() >= (!instr->definitions.empty() ? 3 : 4); 237 bool const_offset = 238 instr->operands[1].isConstant() && (!soe || instr->operands.back().isConstant()); 239 240 if (likely_desc_load || const_offset) 241 return wait_counter_info(0, 0, 30, 0); /* likely to hit L0 cache */ 242 243 return wait_counter_info(0, 0, 200, 0); 244 } 245 246 if (instr->format == Format::DS) 247 return wait_counter_info(0, 0, 20, 0); 248 249 if (instr->isVMEM() && !instr->definitions.empty()) 250 return wait_counter_info(320, 0, 0, 0); 251 252 if (instr->isVMEM() && instr->definitions.empty()) 253 return wait_counter_info(0, 0, 0, 320); 254 255 return wait_counter_info(0, 0, 0, 0); 256} 257 258static wait_imm 259get_wait_imm(Program* program, aco_ptr<Instruction>& instr) 260{ 261 if (instr->opcode == aco_opcode::s_endpgm) { 262 return wait_imm(0, 0, 0, 0); 263 } else if (instr->opcode == aco_opcode::s_waitcnt) { 264 return wait_imm(GFX10_3, instr->sopp().imm); 265 } else if (instr->opcode == aco_opcode::s_waitcnt_vscnt) { 266 return wait_imm(0, 0, 0, instr->sopk().imm); 267 } else { 268 unsigned max_lgkm_cnt = program->gfx_level >= GFX10 ? 62 : 14; 269 unsigned max_exp_cnt = 6; 270 unsigned max_vm_cnt = program->gfx_level >= GFX9 ? 62 : 14; 271 unsigned max_vs_cnt = 62; 272 273 wait_counter_info wait_info = get_wait_counter_info(instr); 274 wait_imm imm; 275 imm.lgkm = wait_info.lgkm ? max_lgkm_cnt : wait_imm::unset_counter; 276 imm.exp = wait_info.exp ? max_exp_cnt : wait_imm::unset_counter; 277 imm.vm = wait_info.vm ? max_vm_cnt : wait_imm::unset_counter; 278 imm.vs = wait_info.vs ? max_vs_cnt : wait_imm::unset_counter; 279 return imm; 280 } 281} 282 283unsigned 284BlockCycleEstimator::get_dependency_cost(aco_ptr<Instruction>& instr) 285{ 286 int deps_available = cur_cycle; 287 288 wait_imm imm = get_wait_imm(program, instr); 289 if (imm.vm != wait_imm::unset_counter) { 290 for (int i = 0; i < (int)vm.size() - imm.vm; i++) 291 deps_available = MAX2(deps_available, vm[i]); 292 } 293 if (imm.exp != wait_imm::unset_counter) { 294 for (int i = 0; i < (int)exp.size() - imm.exp; i++) 295 deps_available = MAX2(deps_available, exp[i]); 296 } 297 if (imm.lgkm != wait_imm::unset_counter) { 298 for (int i = 0; i < (int)lgkm.size() - imm.lgkm; i++) 299 deps_available = MAX2(deps_available, lgkm[i]); 300 } 301 if (imm.vs != wait_imm::unset_counter) { 302 for (int i = 0; i < (int)vs.size() - imm.vs; i++) 303 deps_available = MAX2(deps_available, vs[i]); 304 } 305 306 if (instr->opcode == aco_opcode::s_endpgm) { 307 for (unsigned i = 0; i < 512; i++) 308 deps_available = MAX2(deps_available, reg_available[i]); 309 } else if (program->gfx_level >= GFX10) { 310 for (Operand& op : instr->operands) { 311 if (op.isConstant() || op.isUndefined()) 312 continue; 313 for (unsigned i = 0; i < op.size(); i++) 314 deps_available = MAX2(deps_available, reg_available[op.physReg().reg() + i]); 315 } 316 } 317 318 if (program->gfx_level < GFX10) 319 deps_available = align(deps_available, 4); 320 321 return deps_available - cur_cycle; 322} 323 324unsigned 325BlockCycleEstimator::predict_cost(aco_ptr<Instruction>& instr) 326{ 327 int32_t dep = get_dependency_cost(instr); 328 return dep + std::max(cycles_until_res_available(instr) - dep, 0); 329} 330 331static bool 332is_vector(aco_opcode op) 333{ 334 switch (instr_info.classes[(int)op]) { 335 case instr_class::valu32: 336 case instr_class::valu_convert32: 337 case instr_class::valu_fma: 338 case instr_class::valu_double: 339 case instr_class::valu_double_add: 340 case instr_class::valu_double_convert: 341 case instr_class::valu_double_transcendental: 342 case instr_class::vmem: 343 case instr_class::ds: 344 case instr_class::exp: 345 case instr_class::valu64: 346 case instr_class::valu_quarter_rate32: 347 case instr_class::valu_transcendental32: return true; 348 default: return false; 349 } 350} 351 352void 353BlockCycleEstimator::add(aco_ptr<Instruction>& instr) 354{ 355 perf_info perf = get_perf_info(program, instr); 356 357 cur_cycle += get_dependency_cost(instr); 358 359 unsigned start; 360 bool dual_issue = program->gfx_level >= GFX10 && program->wave_size == 64 && 361 is_vector(instr->opcode) && program->workgroup_size > 32; 362 for (unsigned i = 0; i < (dual_issue ? 2 : 1); i++) { 363 cur_cycle += cycles_until_res_available(instr); 364 365 start = cur_cycle; 366 use_resources(instr); 367 368 /* GCN is in-order and doesn't begin the next instruction until the current one finishes */ 369 cur_cycle += program->gfx_level >= GFX10 ? 1 : perf.latency; 370 } 371 372 wait_imm imm = get_wait_imm(program, instr); 373 while (lgkm.size() > imm.lgkm) 374 lgkm.pop_front(); 375 while (exp.size() > imm.exp) 376 exp.pop_front(); 377 while (vm.size() > imm.vm) 378 vm.pop_front(); 379 while (vs.size() > imm.vs) 380 vs.pop_front(); 381 382 wait_counter_info wait_info = get_wait_counter_info(instr); 383 if (wait_info.exp) 384 exp.push_back(cur_cycle + wait_info.exp); 385 if (wait_info.lgkm) 386 lgkm.push_back(cur_cycle + wait_info.lgkm); 387 if (wait_info.vm) 388 vm.push_back(cur_cycle + wait_info.vm); 389 if (wait_info.vs) 390 vs.push_back(cur_cycle + wait_info.vs); 391 392 /* This is inaccurate but shouldn't affect anything after waitcnt insertion. 393 * Before waitcnt insertion, this is necessary to consider memory operations. 394 */ 395 int latency = MAX3(wait_info.exp, wait_info.lgkm, wait_info.vm); 396 int32_t result_available = start + MAX2(perf.latency, latency); 397 398 for (Definition& def : instr->definitions) { 399 int32_t* available = ®_available[def.physReg().reg()]; 400 for (unsigned i = 0; i < def.size(); i++) 401 available[i] = MAX2(available[i], result_available); 402 } 403} 404 405static void 406join_queue(std::deque<int32_t>& queue, const std::deque<int32_t>& pred, int cycle_diff) 407{ 408 for (unsigned i = 0; i < MIN2(queue.size(), pred.size()); i++) 409 queue.rbegin()[i] = MAX2(queue.rbegin()[i], pred.rbegin()[i] + cycle_diff); 410 for (int i = pred.size() - queue.size() - 1; i >= 0; i--) 411 queue.push_front(pred[i] + cycle_diff); 412} 413 414void 415BlockCycleEstimator::join(const BlockCycleEstimator& pred) 416{ 417 assert(cur_cycle == 0); 418 419 for (unsigned i = 0; i < (unsigned)resource_count; i++) { 420 assert(res_usage[i] == 0); 421 res_available[i] = MAX2(res_available[i], pred.res_available[i] - pred.cur_cycle); 422 } 423 424 for (unsigned i = 0; i < 512; i++) 425 reg_available[i] = MAX2(reg_available[i], pred.reg_available[i] - pred.cur_cycle + cur_cycle); 426 427 join_queue(lgkm, pred.lgkm, -pred.cur_cycle); 428 join_queue(exp, pred.exp, -pred.cur_cycle); 429 join_queue(vm, pred.vm, -pred.cur_cycle); 430 join_queue(vs, pred.vs, -pred.cur_cycle); 431} 432 433/* instructions/branches/vmem_clauses/smem_clauses/cycles */ 434void 435collect_preasm_stats(Program* program) 436{ 437 for (Block& block : program->blocks) { 438 std::set<Instruction*> vmem_clause; 439 std::set<Instruction*> smem_clause; 440 441 program->statistics[statistic_instructions] += block.instructions.size(); 442 443 for (aco_ptr<Instruction>& instr : block.instructions) { 444 if (instr->isSOPP() && instr->sopp().block != -1) 445 program->statistics[statistic_branches]++; 446 447 if (instr->opcode == aco_opcode::p_constaddr) 448 program->statistics[statistic_instructions] += 2; 449 450 if ((instr->isVMEM() || instr->isScratch() || instr->isGlobal()) && 451 !instr->operands.empty()) { 452 if (std::none_of(vmem_clause.begin(), vmem_clause.end(), 453 [&](Instruction* other) 454 { return should_form_clause(instr.get(), other); })) 455 program->statistics[statistic_vmem_clauses]++; 456 vmem_clause.insert(instr.get()); 457 } else { 458 vmem_clause.clear(); 459 } 460 461 if (instr->isSMEM() && !instr->operands.empty()) { 462 if (std::none_of(smem_clause.begin(), smem_clause.end(), 463 [&](Instruction* other) 464 { return should_form_clause(instr.get(), other); })) 465 program->statistics[statistic_smem_clauses]++; 466 smem_clause.insert(instr.get()); 467 } else { 468 smem_clause.clear(); 469 } 470 } 471 } 472 473 double latency = 0; 474 double usage[(int)BlockCycleEstimator::resource_count] = {0}; 475 std::vector<BlockCycleEstimator> blocks(program->blocks.size(), program); 476 477 if (program->stage.has(SWStage::VS) && program->info.vs.has_prolog) { 478 unsigned vs_input_latency = 320; 479 for (Definition def : program->vs_inputs) { 480 blocks[0].vm.push_back(vs_input_latency); 481 for (unsigned i = 0; i < def.size(); i++) 482 blocks[0].reg_available[def.physReg().reg() + i] = vs_input_latency; 483 } 484 } 485 486 for (Block& block : program->blocks) { 487 BlockCycleEstimator& block_est = blocks[block.index]; 488 for (unsigned pred : block.linear_preds) 489 block_est.join(blocks[pred]); 490 491 for (aco_ptr<Instruction>& instr : block.instructions) { 492 unsigned before = block_est.cur_cycle; 493 block_est.add(instr); 494 instr->pass_flags = block_est.cur_cycle - before; 495 } 496 497 /* TODO: it would be nice to be able to consider estimated loop trip 498 * counts used for loop unrolling. 499 */ 500 501 /* TODO: estimate the trip_count of divergent loops (those which break 502 * divergent) higher than of uniform loops 503 */ 504 505 /* Assume loops execute 8-2 times, uniform branches are taken 50% the time, 506 * and any lane in the wave takes a side of a divergent branch 75% of the 507 * time. 508 */ 509 double iter = 1.0; 510 iter *= block.loop_nest_depth > 0 ? 8.0 : 1.0; 511 iter *= block.loop_nest_depth > 1 ? 4.0 : 1.0; 512 iter *= block.loop_nest_depth > 2 ? pow(2.0, block.loop_nest_depth - 2) : 1.0; 513 iter *= pow(0.5, block.uniform_if_depth); 514 iter *= pow(0.75, block.divergent_if_logical_depth); 515 516 bool divergent_if_linear_else = 517 block.logical_preds.empty() && block.linear_preds.size() == 1 && 518 block.linear_succs.size() == 1 && 519 program->blocks[block.linear_preds[0]].kind & (block_kind_branch | block_kind_invert); 520 if (divergent_if_linear_else) 521 iter *= 0.25; 522 523 latency += block_est.cur_cycle * iter; 524 for (unsigned i = 0; i < (unsigned)BlockCycleEstimator::resource_count; i++) 525 usage[i] += block_est.res_usage[i] * iter; 526 } 527 528 /* This likely exaggerates the effectiveness of parallelism because it 529 * ignores instruction ordering. It can assume there might be SALU/VALU/etc 530 * work to from other waves while one is idle but that might not be the case 531 * because those other waves have not reached such a point yet. 532 */ 533 534 double parallelism = program->num_waves; 535 for (unsigned i = 0; i < (unsigned)BlockCycleEstimator::resource_count; i++) { 536 if (usage[i] > 0.0) 537 parallelism = MIN2(parallelism, latency / usage[i]); 538 } 539 double waves_per_cycle = 1.0 / latency * parallelism; 540 double wave64_per_cycle = waves_per_cycle * (program->wave_size / 64.0); 541 542 double max_utilization = 1.0; 543 if (program->workgroup_size != UINT_MAX) 544 max_utilization = 545 program->workgroup_size / (double)align(program->workgroup_size, program->wave_size); 546 wave64_per_cycle *= max_utilization; 547 548 program->statistics[statistic_latency] = round(latency); 549 program->statistics[statistic_inv_throughput] = round(1.0 / wave64_per_cycle); 550 551 if (debug_flags & DEBUG_PERF_INFO) { 552 aco_print_program(program, stderr, print_no_ssa | print_perf_info); 553 554 fprintf(stderr, "num_waves: %u\n", program->num_waves); 555 fprintf(stderr, "salu_smem_usage: %f\n", usage[(int)BlockCycleEstimator::scalar]); 556 fprintf(stderr, "branch_sendmsg_usage: %f\n", 557 usage[(int)BlockCycleEstimator::branch_sendmsg]); 558 fprintf(stderr, "valu_usage: %f\n", usage[(int)BlockCycleEstimator::valu]); 559 fprintf(stderr, "valu_complex_usage: %f\n", usage[(int)BlockCycleEstimator::valu_complex]); 560 fprintf(stderr, "lds_usage: %f\n", usage[(int)BlockCycleEstimator::lds]); 561 fprintf(stderr, "export_gds_usage: %f\n", usage[(int)BlockCycleEstimator::export_gds]); 562 fprintf(stderr, "vmem_usage: %f\n", usage[(int)BlockCycleEstimator::vmem]); 563 fprintf(stderr, "latency: %f\n", latency); 564 fprintf(stderr, "parallelism: %f\n", parallelism); 565 fprintf(stderr, "max_utilization: %f\n", max_utilization); 566 fprintf(stderr, "wave64_per_cycle: %f\n", wave64_per_cycle); 567 fprintf(stderr, "\n"); 568 } 569} 570 571void 572collect_postasm_stats(Program* program, const std::vector<uint32_t>& code) 573{ 574 program->statistics[aco::statistic_hash] = util_hash_crc32(code.data(), code.size() * 4); 575} 576 577} // namespace aco 578