1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2020 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "brw_eu.h" 25bf215546Sopenharmony_ci#include "brw_fs.h" 26bf215546Sopenharmony_ci#include "brw_vec4.h" 27bf215546Sopenharmony_ci#include "brw_cfg.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ciusing namespace brw; 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cinamespace { 32bf215546Sopenharmony_ci /** 33bf215546Sopenharmony_ci * Enumeration representing the various asynchronous units that can run 34bf215546Sopenharmony_ci * computations in parallel on behalf of a shader thread. 35bf215546Sopenharmony_ci */ 36bf215546Sopenharmony_ci enum intel_eu_unit { 37bf215546Sopenharmony_ci /** EU front-end. */ 38bf215546Sopenharmony_ci EU_UNIT_FE, 39bf215546Sopenharmony_ci /** EU FPU0 (Note that co-issue to FPU1 is currently not modeled here). */ 40bf215546Sopenharmony_ci EU_UNIT_FPU, 41bf215546Sopenharmony_ci /** Extended Math unit (AKA FPU1 on Gfx8-11, part of the EU on Gfx6+). */ 42bf215546Sopenharmony_ci EU_UNIT_EM, 43bf215546Sopenharmony_ci /** Sampler shared function. */ 44bf215546Sopenharmony_ci EU_UNIT_SAMPLER, 45bf215546Sopenharmony_ci /** Pixel Interpolator shared function. */ 46bf215546Sopenharmony_ci EU_UNIT_PI, 47bf215546Sopenharmony_ci /** Unified Return Buffer shared function. */ 48bf215546Sopenharmony_ci EU_UNIT_URB, 49bf215546Sopenharmony_ci /** Data Port Data Cache shared function. */ 50bf215546Sopenharmony_ci EU_UNIT_DP_DC, 51bf215546Sopenharmony_ci /** Data Port Render Cache shared function. */ 52bf215546Sopenharmony_ci EU_UNIT_DP_RC, 53bf215546Sopenharmony_ci /** Data Port Constant Cache shared function. */ 54bf215546Sopenharmony_ci EU_UNIT_DP_CC, 55bf215546Sopenharmony_ci /** Message Gateway shared function. */ 56bf215546Sopenharmony_ci EU_UNIT_GATEWAY, 57bf215546Sopenharmony_ci /** Thread Spawner shared function. */ 58bf215546Sopenharmony_ci EU_UNIT_SPAWNER, 59bf215546Sopenharmony_ci /* EU_UNIT_VME, */ 60bf215546Sopenharmony_ci /* EU_UNIT_CRE, */ 61bf215546Sopenharmony_ci /** Number of asynchronous units currently tracked. */ 62bf215546Sopenharmony_ci EU_NUM_UNITS, 63bf215546Sopenharmony_ci /** Dummy unit for instructions that don't consume runtime from the above. */ 64bf215546Sopenharmony_ci EU_UNIT_NULL = EU_NUM_UNITS 65bf215546Sopenharmony_ci }; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci /** 68bf215546Sopenharmony_ci * Enumeration representing a computation result another computation can 69bf215546Sopenharmony_ci * potentially depend on. 70bf215546Sopenharmony_ci */ 71bf215546Sopenharmony_ci enum intel_eu_dependency_id { 72bf215546Sopenharmony_ci /* Register part of the GRF. */ 73bf215546Sopenharmony_ci EU_DEPENDENCY_ID_GRF0 = 0, 74bf215546Sopenharmony_ci /* Register part of the MRF. Only used on Gfx4-6. */ 75bf215546Sopenharmony_ci EU_DEPENDENCY_ID_MRF0 = EU_DEPENDENCY_ID_GRF0 + BRW_MAX_GRF, 76bf215546Sopenharmony_ci /* Address register part of the ARF. */ 77bf215546Sopenharmony_ci EU_DEPENDENCY_ID_ADDR0 = EU_DEPENDENCY_ID_MRF0 + 24, 78bf215546Sopenharmony_ci /* Accumulator register part of the ARF. */ 79bf215546Sopenharmony_ci EU_DEPENDENCY_ID_ACCUM0 = EU_DEPENDENCY_ID_ADDR0 + 1, 80bf215546Sopenharmony_ci /* Flag register part of the ARF. */ 81bf215546Sopenharmony_ci EU_DEPENDENCY_ID_FLAG0 = EU_DEPENDENCY_ID_ACCUM0 + 12, 82bf215546Sopenharmony_ci /* SBID token write completion. Only used on Gfx12+. */ 83bf215546Sopenharmony_ci EU_DEPENDENCY_ID_SBID_WR0 = EU_DEPENDENCY_ID_FLAG0 + 8, 84bf215546Sopenharmony_ci /* SBID token read completion. Only used on Gfx12+. */ 85bf215546Sopenharmony_ci EU_DEPENDENCY_ID_SBID_RD0 = EU_DEPENDENCY_ID_SBID_WR0 + 16, 86bf215546Sopenharmony_ci /* Number of computation dependencies currently tracked. */ 87bf215546Sopenharmony_ci EU_NUM_DEPENDENCY_IDS = EU_DEPENDENCY_ID_SBID_RD0 + 16 88bf215546Sopenharmony_ci }; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci /** 91bf215546Sopenharmony_ci * State of our modeling of the program execution. 92bf215546Sopenharmony_ci */ 93bf215546Sopenharmony_ci struct state { 94bf215546Sopenharmony_ci state() : unit_ready(), dep_ready(), unit_busy(), weight(1.0) {} 95bf215546Sopenharmony_ci /** 96bf215546Sopenharmony_ci * Time at which a given unit will be ready to execute the next 97bf215546Sopenharmony_ci * computation, in clock units. 98bf215546Sopenharmony_ci */ 99bf215546Sopenharmony_ci unsigned unit_ready[EU_NUM_UNITS]; 100bf215546Sopenharmony_ci /** 101bf215546Sopenharmony_ci * Time at which an instruction dependent on a given dependency ID will 102bf215546Sopenharmony_ci * be ready to execute, in clock units. 103bf215546Sopenharmony_ci */ 104bf215546Sopenharmony_ci unsigned dep_ready[EU_NUM_DEPENDENCY_IDS]; 105bf215546Sopenharmony_ci /** 106bf215546Sopenharmony_ci * Aggregated utilization of a given unit excluding idle cycles, 107bf215546Sopenharmony_ci * in clock units. 108bf215546Sopenharmony_ci */ 109bf215546Sopenharmony_ci float unit_busy[EU_NUM_UNITS]; 110bf215546Sopenharmony_ci /** 111bf215546Sopenharmony_ci * Factor of the overhead of a computation accounted for in the 112bf215546Sopenharmony_ci * aggregated utilization calculation. 113bf215546Sopenharmony_ci */ 114bf215546Sopenharmony_ci float weight; 115bf215546Sopenharmony_ci }; 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci /** 118bf215546Sopenharmony_ci * Information derived from an IR instruction used to compute performance 119bf215546Sopenharmony_ci * estimates. Allows the timing calculation to work on both FS and VEC4 120bf215546Sopenharmony_ci * instructions. 121bf215546Sopenharmony_ci */ 122bf215546Sopenharmony_ci struct instruction_info { 123bf215546Sopenharmony_ci instruction_info(const struct brw_isa_info *isa, const fs_inst *inst) : 124bf215546Sopenharmony_ci isa(isa), devinfo(isa->devinfo), op(inst->opcode), 125bf215546Sopenharmony_ci td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)), 126bf215546Sopenharmony_ci tx(get_exec_type(inst)), sx(0), ss(0), 127bf215546Sopenharmony_ci sc(has_bank_conflict(isa, inst) ? sd : 0), 128bf215546Sopenharmony_ci desc(inst->desc), sfid(inst->sfid) 129bf215546Sopenharmony_ci { 130bf215546Sopenharmony_ci /* We typically want the maximum source size, except for split send 131bf215546Sopenharmony_ci * messages which require the total size. 132bf215546Sopenharmony_ci */ 133bf215546Sopenharmony_ci if (inst->opcode == SHADER_OPCODE_SEND) { 134bf215546Sopenharmony_ci ss = DIV_ROUND_UP(inst->size_read(2), REG_SIZE) + 135bf215546Sopenharmony_ci DIV_ROUND_UP(inst->size_read(3), REG_SIZE); 136bf215546Sopenharmony_ci } else { 137bf215546Sopenharmony_ci for (unsigned i = 0; i < inst->sources; i++) 138bf215546Sopenharmony_ci ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(i), REG_SIZE)); 139bf215546Sopenharmony_ci } 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci /* Convert the execution size to GRF units. */ 142bf215546Sopenharmony_ci sx = DIV_ROUND_UP(inst->exec_size * type_sz(tx), REG_SIZE); 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci /* 32x32 integer multiplication has half the usual ALU throughput. 145bf215546Sopenharmony_ci * Treat it as double-precision. 146bf215546Sopenharmony_ci */ 147bf215546Sopenharmony_ci if ((inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD) && 148bf215546Sopenharmony_ci !brw_reg_type_is_floating_point(tx) && type_sz(tx) == 4 && 149bf215546Sopenharmony_ci type_sz(inst->src[0].type) == type_sz(inst->src[1].type)) 150bf215546Sopenharmony_ci tx = brw_int_type(8, tx == BRW_REGISTER_TYPE_D); 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci instruction_info(const struct brw_isa_info *isa, 154bf215546Sopenharmony_ci const vec4_instruction *inst) : 155bf215546Sopenharmony_ci isa(isa), devinfo(isa->devinfo), op(inst->opcode), 156bf215546Sopenharmony_ci td(inst->dst.type), sd(DIV_ROUND_UP(inst->size_written, REG_SIZE)), 157bf215546Sopenharmony_ci tx(get_exec_type(inst)), sx(0), ss(0), sc(0), 158bf215546Sopenharmony_ci desc(inst->desc), sfid(inst->sfid) 159bf215546Sopenharmony_ci { 160bf215546Sopenharmony_ci /* Compute the maximum source size. */ 161bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) 162bf215546Sopenharmony_ci ss = MAX2(ss, DIV_ROUND_UP(inst->size_read(i), REG_SIZE)); 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci /* Convert the execution size to GRF units. */ 165bf215546Sopenharmony_ci sx = DIV_ROUND_UP(inst->exec_size * type_sz(tx), REG_SIZE); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* 32x32 integer multiplication has half the usual ALU throughput. 168bf215546Sopenharmony_ci * Treat it as double-precision. 169bf215546Sopenharmony_ci */ 170bf215546Sopenharmony_ci if ((inst->opcode == BRW_OPCODE_MUL || inst->opcode == BRW_OPCODE_MAD) && 171bf215546Sopenharmony_ci !brw_reg_type_is_floating_point(tx) && type_sz(tx) == 4 && 172bf215546Sopenharmony_ci type_sz(inst->src[0].type) == type_sz(inst->src[1].type)) 173bf215546Sopenharmony_ci tx = brw_int_type(8, tx == BRW_REGISTER_TYPE_D); 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci /** ISA encoding information */ 177bf215546Sopenharmony_ci const struct brw_isa_info *isa; 178bf215546Sopenharmony_ci /** Device information. */ 179bf215546Sopenharmony_ci const struct intel_device_info *devinfo; 180bf215546Sopenharmony_ci /** Instruction opcode. */ 181bf215546Sopenharmony_ci opcode op; 182bf215546Sopenharmony_ci /** Destination type. */ 183bf215546Sopenharmony_ci brw_reg_type td; 184bf215546Sopenharmony_ci /** Destination size in GRF units. */ 185bf215546Sopenharmony_ci unsigned sd; 186bf215546Sopenharmony_ci /** Execution type. */ 187bf215546Sopenharmony_ci brw_reg_type tx; 188bf215546Sopenharmony_ci /** Execution size in GRF units. */ 189bf215546Sopenharmony_ci unsigned sx; 190bf215546Sopenharmony_ci /** Source size. */ 191bf215546Sopenharmony_ci unsigned ss; 192bf215546Sopenharmony_ci /** Bank conflict penalty size in GRF units (equal to sd if non-zero). */ 193bf215546Sopenharmony_ci unsigned sc; 194bf215546Sopenharmony_ci /** Send message descriptor. */ 195bf215546Sopenharmony_ci uint32_t desc; 196bf215546Sopenharmony_ci /** Send message shared function ID. */ 197bf215546Sopenharmony_ci uint8_t sfid; 198bf215546Sopenharmony_ci }; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci /** 201bf215546Sopenharmony_ci * Timing information of an instruction used to estimate the performance of 202bf215546Sopenharmony_ci * the program. 203bf215546Sopenharmony_ci */ 204bf215546Sopenharmony_ci struct perf_desc { 205bf215546Sopenharmony_ci perf_desc(enum intel_eu_unit u, int df, int db, 206bf215546Sopenharmony_ci int ls, int ld, int la, int lf) : 207bf215546Sopenharmony_ci u(u), df(df), db(db), ls(ls), ld(ld), la(la), lf(lf) {} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci /** 210bf215546Sopenharmony_ci * Back-end unit its runtime shall be accounted to, in addition to the 211bf215546Sopenharmony_ci * EU front-end which is always assumed to be involved. 212bf215546Sopenharmony_ci */ 213bf215546Sopenharmony_ci enum intel_eu_unit u; 214bf215546Sopenharmony_ci /** 215bf215546Sopenharmony_ci * Overhead cycles from the time that the EU front-end starts executing 216bf215546Sopenharmony_ci * the instruction until it's ready to execute the next instruction. 217bf215546Sopenharmony_ci */ 218bf215546Sopenharmony_ci int df; 219bf215546Sopenharmony_ci /** 220bf215546Sopenharmony_ci * Overhead cycles from the time that the back-end starts executing the 221bf215546Sopenharmony_ci * instruction until it's ready to execute the next instruction. 222bf215546Sopenharmony_ci */ 223bf215546Sopenharmony_ci int db; 224bf215546Sopenharmony_ci /** 225bf215546Sopenharmony_ci * Latency cycles from the time that the back-end starts executing the 226bf215546Sopenharmony_ci * instruction until its sources have been read from the register file. 227bf215546Sopenharmony_ci */ 228bf215546Sopenharmony_ci int ls; 229bf215546Sopenharmony_ci /** 230bf215546Sopenharmony_ci * Latency cycles from the time that the back-end starts executing the 231bf215546Sopenharmony_ci * instruction until its regular destination has been written to the 232bf215546Sopenharmony_ci * register file. 233bf215546Sopenharmony_ci */ 234bf215546Sopenharmony_ci int ld; 235bf215546Sopenharmony_ci /** 236bf215546Sopenharmony_ci * Latency cycles from the time that the back-end starts executing the 237bf215546Sopenharmony_ci * instruction until its accumulator destination has been written to the 238bf215546Sopenharmony_ci * ARF file. 239bf215546Sopenharmony_ci * 240bf215546Sopenharmony_ci * Note that this is an approximation of the real behavior of 241bf215546Sopenharmony_ci * accumulating instructions in the hardware: Instead of modeling a pair 242bf215546Sopenharmony_ci * of back-to-back accumulating instructions as a first computation with 243bf215546Sopenharmony_ci * latency equal to ld followed by another computation with a 244bf215546Sopenharmony_ci * mid-pipeline stall (e.g. after the "M" part of a MAC instruction), we 245bf215546Sopenharmony_ci * model the stall as if it occurred at the top of the pipeline, with 246bf215546Sopenharmony_ci * the latency of the accumulator computation offset accordingly. 247bf215546Sopenharmony_ci */ 248bf215546Sopenharmony_ci int la; 249bf215546Sopenharmony_ci /** 250bf215546Sopenharmony_ci * Latency cycles from the time that the back-end starts executing the 251bf215546Sopenharmony_ci * instruction until its flag destination has been written to the ARF 252bf215546Sopenharmony_ci * file. 253bf215546Sopenharmony_ci */ 254bf215546Sopenharmony_ci int lf; 255bf215546Sopenharmony_ci }; 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci /** 258bf215546Sopenharmony_ci * Compute the timing information of an instruction based on any relevant 259bf215546Sopenharmony_ci * information from the IR and a number of parameters specifying a linear 260bf215546Sopenharmony_ci * approximation: Parameter X_Y specifies the derivative of timing X 261bf215546Sopenharmony_ci * relative to info field Y, while X_1 specifies the independent term of 262bf215546Sopenharmony_ci * the approximation of timing X. 263bf215546Sopenharmony_ci */ 264bf215546Sopenharmony_ci perf_desc 265bf215546Sopenharmony_ci calculate_desc(const instruction_info &info, enum intel_eu_unit u, 266bf215546Sopenharmony_ci int df_1, int df_sd, int df_sc, 267bf215546Sopenharmony_ci int db_1, int db_sx, 268bf215546Sopenharmony_ci int ls_1, int ld_1, int la_1, int lf_1, 269bf215546Sopenharmony_ci int l_ss, int l_sd) 270bf215546Sopenharmony_ci { 271bf215546Sopenharmony_ci return perf_desc(u, df_1 + df_sd * int(info.sd) + df_sc * int(info.sc), 272bf215546Sopenharmony_ci db_1 + db_sx * int(info.sx), 273bf215546Sopenharmony_ci ls_1 + l_ss * int(info.ss), 274bf215546Sopenharmony_ci ld_1 + l_ss * int(info.ss) + l_sd * int(info.sd), 275bf215546Sopenharmony_ci la_1, lf_1); 276bf215546Sopenharmony_ci } 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci /** 279bf215546Sopenharmony_ci * Compute the timing information of an instruction based on any relevant 280bf215546Sopenharmony_ci * information from the IR and a number of linear approximation parameters 281bf215546Sopenharmony_ci * hard-coded for each IR instruction. 282bf215546Sopenharmony_ci * 283bf215546Sopenharmony_ci * Most timing parameters are obtained from the multivariate linear 284bf215546Sopenharmony_ci * regression of a sample of empirical timings measured using the tm0 285bf215546Sopenharmony_ci * register (as can be done today by using the shader_time debugging 286bf215546Sopenharmony_ci * option). The Gfx4-5 math timings are obtained from BSpec Volume 5c.3 287bf215546Sopenharmony_ci * "Shared Functions - Extended Math", Section 3.2 "Performance". 288bf215546Sopenharmony_ci * Parameters marked XXX shall be considered low-quality, they're possibly 289bf215546Sopenharmony_ci * high variance or completely guessed in cases where experimental data was 290bf215546Sopenharmony_ci * unavailable. 291bf215546Sopenharmony_ci */ 292bf215546Sopenharmony_ci const perf_desc 293bf215546Sopenharmony_ci instruction_desc(const instruction_info &info) 294bf215546Sopenharmony_ci { 295bf215546Sopenharmony_ci const struct intel_device_info *devinfo = info.devinfo; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci switch (info.op) { 298bf215546Sopenharmony_ci case BRW_OPCODE_SYNC: 299bf215546Sopenharmony_ci case BRW_OPCODE_SEL: 300bf215546Sopenharmony_ci case BRW_OPCODE_NOT: 301bf215546Sopenharmony_ci case BRW_OPCODE_AND: 302bf215546Sopenharmony_ci case BRW_OPCODE_OR: 303bf215546Sopenharmony_ci case BRW_OPCODE_XOR: 304bf215546Sopenharmony_ci case BRW_OPCODE_SHR: 305bf215546Sopenharmony_ci case BRW_OPCODE_SHL: 306bf215546Sopenharmony_ci case BRW_OPCODE_DIM: 307bf215546Sopenharmony_ci case BRW_OPCODE_ASR: 308bf215546Sopenharmony_ci case BRW_OPCODE_CMPN: 309bf215546Sopenharmony_ci case BRW_OPCODE_F16TO32: 310bf215546Sopenharmony_ci case BRW_OPCODE_BFREV: 311bf215546Sopenharmony_ci case BRW_OPCODE_BFI1: 312bf215546Sopenharmony_ci case BRW_OPCODE_AVG: 313bf215546Sopenharmony_ci case BRW_OPCODE_FRC: 314bf215546Sopenharmony_ci case BRW_OPCODE_RNDU: 315bf215546Sopenharmony_ci case BRW_OPCODE_RNDD: 316bf215546Sopenharmony_ci case BRW_OPCODE_RNDE: 317bf215546Sopenharmony_ci case BRW_OPCODE_RNDZ: 318bf215546Sopenharmony_ci case BRW_OPCODE_MAC: 319bf215546Sopenharmony_ci case BRW_OPCODE_MACH: 320bf215546Sopenharmony_ci case BRW_OPCODE_LZD: 321bf215546Sopenharmony_ci case BRW_OPCODE_FBH: 322bf215546Sopenharmony_ci case BRW_OPCODE_FBL: 323bf215546Sopenharmony_ci case BRW_OPCODE_CBIT: 324bf215546Sopenharmony_ci case BRW_OPCODE_ADDC: 325bf215546Sopenharmony_ci case BRW_OPCODE_ROR: 326bf215546Sopenharmony_ci case BRW_OPCODE_ROL: 327bf215546Sopenharmony_ci case BRW_OPCODE_SUBB: 328bf215546Sopenharmony_ci case BRW_OPCODE_SAD2: 329bf215546Sopenharmony_ci case BRW_OPCODE_SADA2: 330bf215546Sopenharmony_ci case BRW_OPCODE_LINE: 331bf215546Sopenharmony_ci case BRW_OPCODE_NOP: 332bf215546Sopenharmony_ci case SHADER_OPCODE_CLUSTER_BROADCAST: 333bf215546Sopenharmony_ci case SHADER_OPCODE_SCRATCH_HEADER: 334bf215546Sopenharmony_ci case FS_OPCODE_DDX_COARSE: 335bf215546Sopenharmony_ci case FS_OPCODE_DDX_FINE: 336bf215546Sopenharmony_ci case FS_OPCODE_DDY_COARSE: 337bf215546Sopenharmony_ci case FS_OPCODE_PIXEL_X: 338bf215546Sopenharmony_ci case FS_OPCODE_PIXEL_Y: 339bf215546Sopenharmony_ci case FS_OPCODE_SET_SAMPLE_ID: 340bf215546Sopenharmony_ci case VEC4_OPCODE_MOV_BYTES: 341bf215546Sopenharmony_ci case VEC4_OPCODE_UNPACK_UNIFORM: 342bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_F32: 343bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_D32: 344bf215546Sopenharmony_ci case VEC4_OPCODE_DOUBLE_TO_U32: 345bf215546Sopenharmony_ci case VEC4_OPCODE_TO_DOUBLE: 346bf215546Sopenharmony_ci case VEC4_OPCODE_PICK_LOW_32BIT: 347bf215546Sopenharmony_ci case VEC4_OPCODE_PICK_HIGH_32BIT: 348bf215546Sopenharmony_ci case VEC4_OPCODE_SET_LOW_32BIT: 349bf215546Sopenharmony_ci case VEC4_OPCODE_SET_HIGH_32BIT: 350bf215546Sopenharmony_ci case VEC4_OPCODE_ZERO_OOB_PUSH_REGS: 351bf215546Sopenharmony_ci case GS_OPCODE_SET_DWORD_2: 352bf215546Sopenharmony_ci case GS_OPCODE_SET_WRITE_OFFSET: 353bf215546Sopenharmony_ci case GS_OPCODE_SET_VERTEX_COUNT: 354bf215546Sopenharmony_ci case GS_OPCODE_PREPARE_CHANNEL_MASKS: 355bf215546Sopenharmony_ci case GS_OPCODE_SET_CHANNEL_MASKS: 356bf215546Sopenharmony_ci case GS_OPCODE_GET_INSTANCE_ID: 357bf215546Sopenharmony_ci case GS_OPCODE_SET_PRIMITIVE_ID: 358bf215546Sopenharmony_ci case GS_OPCODE_SVB_SET_DST_INDEX: 359bf215546Sopenharmony_ci case TCS_OPCODE_SRC0_010_IS_ZERO: 360bf215546Sopenharmony_ci case TCS_OPCODE_GET_PRIMITIVE_ID: 361bf215546Sopenharmony_ci case TES_OPCODE_GET_PRIMITIVE_ID: 362bf215546Sopenharmony_ci case SHADER_OPCODE_READ_SR_REG: 363bf215546Sopenharmony_ci if (devinfo->ver >= 11) { 364bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 365bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 14, 0, 0); 366bf215546Sopenharmony_ci } else if (devinfo->ver >= 8) { 367bf215546Sopenharmony_ci if (type_sz(info.tx) > 4) 368bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 369bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); 370bf215546Sopenharmony_ci else 371bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 372bf215546Sopenharmony_ci 0, 8, 4, 12, 0, 0); 373bf215546Sopenharmony_ci } else if (devinfo->verx10 >= 75) { 374bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 375bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16, 0, 0); 376bf215546Sopenharmony_ci } else { 377bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 378bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18, 0, 0); 379bf215546Sopenharmony_ci } 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci case BRW_OPCODE_MOV: 382bf215546Sopenharmony_ci case BRW_OPCODE_CMP: 383bf215546Sopenharmony_ci case BRW_OPCODE_ADD: 384bf215546Sopenharmony_ci case BRW_OPCODE_ADD3: 385bf215546Sopenharmony_ci case BRW_OPCODE_MUL: 386bf215546Sopenharmony_ci case SHADER_OPCODE_MOV_RELOC_IMM: 387bf215546Sopenharmony_ci case VEC4_OPCODE_MOV_FOR_SCRATCH: 388bf215546Sopenharmony_ci if (devinfo->ver >= 11) { 389bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 390bf215546Sopenharmony_ci 0, 10, 6, 14, 0, 0); 391bf215546Sopenharmony_ci } else if (devinfo->ver >= 8) { 392bf215546Sopenharmony_ci if (type_sz(info.tx) > 4) 393bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 394bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); 395bf215546Sopenharmony_ci else 396bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 397bf215546Sopenharmony_ci 0, 8, 4, 12, 0, 0); 398bf215546Sopenharmony_ci } else if (devinfo->verx10 >= 75) { 399bf215546Sopenharmony_ci if (info.tx == BRW_REGISTER_TYPE_F) 400bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 401bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18, 0, 0); 402bf215546Sopenharmony_ci else 403bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 404bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16, 0, 0); 405bf215546Sopenharmony_ci } else if (devinfo->ver >= 7) { 406bf215546Sopenharmony_ci if (info.tx == BRW_REGISTER_TYPE_F) 407bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 408bf215546Sopenharmony_ci 0, 14, 10 /* XXX */, 20, 0, 0); 409bf215546Sopenharmony_ci else 410bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 411bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18, 0, 0); 412bf215546Sopenharmony_ci } else { 413bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2 /* XXX */, 0, 414bf215546Sopenharmony_ci 0, 2 /* XXX */, 415bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 18 /* XXX */, 416bf215546Sopenharmony_ci 0, 0); 417bf215546Sopenharmony_ci } 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci case BRW_OPCODE_BFE: 420bf215546Sopenharmony_ci case BRW_OPCODE_BFI2: 421bf215546Sopenharmony_ci case BRW_OPCODE_CSEL: 422bf215546Sopenharmony_ci if (devinfo->ver >= 11) 423bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 424bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); 425bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 426bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 427bf215546Sopenharmony_ci 0, 8, 4 /* XXX */, 12 /* XXX */, 0, 0); 428bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 429bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 430bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16 /* XXX */, 0, 0); 431bf215546Sopenharmony_ci else if (devinfo->ver >= 7) 432bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 433bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18 /* XXX */, 0, 0); 434bf215546Sopenharmony_ci else 435bf215546Sopenharmony_ci abort(); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci case BRW_OPCODE_MAD: 438bf215546Sopenharmony_ci if (devinfo->ver >= 11) { 439bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 440bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); 441bf215546Sopenharmony_ci } else if (devinfo->ver >= 8) { 442bf215546Sopenharmony_ci if (type_sz(info.tx) > 4) 443bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 1, 0, 4, 444bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); 445bf215546Sopenharmony_ci else 446bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 447bf215546Sopenharmony_ci 0, 8, 4 /* XXX */, 12 /* XXX */, 0, 0); 448bf215546Sopenharmony_ci } else if (devinfo->verx10 >= 75) { 449bf215546Sopenharmony_ci if (info.tx == BRW_REGISTER_TYPE_F) 450bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 451bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18, 0, 0); 452bf215546Sopenharmony_ci else 453bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 454bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16, 0, 0); 455bf215546Sopenharmony_ci } else if (devinfo->ver >= 7) { 456bf215546Sopenharmony_ci if (info.tx == BRW_REGISTER_TYPE_F) 457bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 458bf215546Sopenharmony_ci 0, 14, 10 /* XXX */, 20, 0, 0); 459bf215546Sopenharmony_ci else 460bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 461bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18, 0, 0); 462bf215546Sopenharmony_ci } else if (devinfo->ver >= 6) { 463bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2 /* XXX */, 1 /* XXX */, 464bf215546Sopenharmony_ci 0, 2 /* XXX */, 465bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 18 /* XXX */, 466bf215546Sopenharmony_ci 0, 0); 467bf215546Sopenharmony_ci } else { 468bf215546Sopenharmony_ci abort(); 469bf215546Sopenharmony_ci } 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci case BRW_OPCODE_F32TO16: 472bf215546Sopenharmony_ci if (devinfo->ver >= 11) 473bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 474bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); 475bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 476bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 477bf215546Sopenharmony_ci 0, 8, 4 /* XXX */, 12 /* XXX */, 0, 0); 478bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 479bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 480bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16 /* XXX */, 0, 0); 481bf215546Sopenharmony_ci else if (devinfo->ver >= 7) 482bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 483bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18 /* XXX */, 0, 0); 484bf215546Sopenharmony_ci else 485bf215546Sopenharmony_ci abort(); 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci case BRW_OPCODE_DP4: 488bf215546Sopenharmony_ci case BRW_OPCODE_DPH: 489bf215546Sopenharmony_ci case BRW_OPCODE_DP3: 490bf215546Sopenharmony_ci case BRW_OPCODE_DP2: 491bf215546Sopenharmony_ci if (devinfo->ver >= 8) 492bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 493bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); 494bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 495bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 496bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16 /* XXX */, 0, 0); 497bf215546Sopenharmony_ci else 498bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 499bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18 /* XXX */, 0, 0); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci case BRW_OPCODE_DP4A: 502bf215546Sopenharmony_ci if (devinfo->ver >= 12) 503bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 504bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); 505bf215546Sopenharmony_ci else 506bf215546Sopenharmony_ci abort(); 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci case SHADER_OPCODE_RCP: 509bf215546Sopenharmony_ci case SHADER_OPCODE_RSQ: 510bf215546Sopenharmony_ci case SHADER_OPCODE_SQRT: 511bf215546Sopenharmony_ci case SHADER_OPCODE_EXP2: 512bf215546Sopenharmony_ci case SHADER_OPCODE_LOG2: 513bf215546Sopenharmony_ci case SHADER_OPCODE_SIN: 514bf215546Sopenharmony_ci case SHADER_OPCODE_COS: 515bf215546Sopenharmony_ci case SHADER_OPCODE_POW: 516bf215546Sopenharmony_ci case SHADER_OPCODE_INT_QUOTIENT: 517bf215546Sopenharmony_ci case SHADER_OPCODE_INT_REMAINDER: 518bf215546Sopenharmony_ci if (devinfo->ver >= 6) { 519bf215546Sopenharmony_ci switch (info.op) { 520bf215546Sopenharmony_ci case SHADER_OPCODE_RCP: 521bf215546Sopenharmony_ci case SHADER_OPCODE_RSQ: 522bf215546Sopenharmony_ci case SHADER_OPCODE_SQRT: 523bf215546Sopenharmony_ci case SHADER_OPCODE_EXP2: 524bf215546Sopenharmony_ci case SHADER_OPCODE_LOG2: 525bf215546Sopenharmony_ci case SHADER_OPCODE_SIN: 526bf215546Sopenharmony_ci case SHADER_OPCODE_COS: 527bf215546Sopenharmony_ci if (devinfo->ver >= 8) 528bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, -2, 4, 0, 0, 4, 529bf215546Sopenharmony_ci 0, 16, 0, 0, 0, 0); 530bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 531bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 0, 2, 0, 0, 2, 532bf215546Sopenharmony_ci 0, 12, 0, 0, 0, 0); 533bf215546Sopenharmony_ci else 534bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 0, 2, 0, 0, 2, 535bf215546Sopenharmony_ci 0, 14, 0, 0, 0, 0); 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci case SHADER_OPCODE_POW: 538bf215546Sopenharmony_ci if (devinfo->ver >= 8) 539bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, -2, 4, 0, 0, 8, 540bf215546Sopenharmony_ci 0, 24, 0, 0, 0, 0); 541bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 542bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 0, 2, 0, 0, 4, 543bf215546Sopenharmony_ci 0, 20, 0, 0, 0, 0); 544bf215546Sopenharmony_ci else 545bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 0, 2, 0, 0, 4, 546bf215546Sopenharmony_ci 0, 22, 0, 0, 0, 0); 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci case SHADER_OPCODE_INT_QUOTIENT: 549bf215546Sopenharmony_ci case SHADER_OPCODE_INT_REMAINDER: 550bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 2, 0, 0, 26, 0, 551bf215546Sopenharmony_ci 0, 28 /* XXX */, 0, 0, 0, 0); 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci default: 554bf215546Sopenharmony_ci abort(); 555bf215546Sopenharmony_ci } 556bf215546Sopenharmony_ci } else { 557bf215546Sopenharmony_ci switch (info.op) { 558bf215546Sopenharmony_ci case SHADER_OPCODE_RCP: 559bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 2, 0, 0, 0, 8, 560bf215546Sopenharmony_ci 0, 22, 0, 0, 0, 8); 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci case SHADER_OPCODE_RSQ: 563bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 2, 0, 0, 0, 16, 564bf215546Sopenharmony_ci 0, 44, 0, 0, 0, 8); 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci case SHADER_OPCODE_INT_QUOTIENT: 567bf215546Sopenharmony_ci case SHADER_OPCODE_SQRT: 568bf215546Sopenharmony_ci case SHADER_OPCODE_LOG2: 569bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 2, 0, 0, 0, 24, 570bf215546Sopenharmony_ci 0, 66, 0, 0, 0, 8); 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci case SHADER_OPCODE_INT_REMAINDER: 573bf215546Sopenharmony_ci case SHADER_OPCODE_EXP2: 574bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 2, 0, 0, 0, 32, 575bf215546Sopenharmony_ci 0, 88, 0, 0, 0, 8); 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci case SHADER_OPCODE_SIN: 578bf215546Sopenharmony_ci case SHADER_OPCODE_COS: 579bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 2, 0, 0, 0, 48, 580bf215546Sopenharmony_ci 0, 132, 0, 0, 0, 8); 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci case SHADER_OPCODE_POW: 583bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_EM, 2, 0, 0, 0, 64, 584bf215546Sopenharmony_ci 0, 176, 0, 0, 0, 8); 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_ci default: 587bf215546Sopenharmony_ci abort(); 588bf215546Sopenharmony_ci } 589bf215546Sopenharmony_ci } 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci case BRW_OPCODE_DO: 592bf215546Sopenharmony_ci if (devinfo->ver >= 6) 593bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_NULL, 0, 0, 0, 0, 0, 594bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 595bf215546Sopenharmony_ci else 596bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_NULL, 2 /* XXX */, 0, 0, 0, 0, 597bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci case BRW_OPCODE_IF: 600bf215546Sopenharmony_ci case BRW_OPCODE_ELSE: 601bf215546Sopenharmony_ci case BRW_OPCODE_ENDIF: 602bf215546Sopenharmony_ci case BRW_OPCODE_WHILE: 603bf215546Sopenharmony_ci case BRW_OPCODE_BREAK: 604bf215546Sopenharmony_ci case BRW_OPCODE_CONTINUE: 605bf215546Sopenharmony_ci case BRW_OPCODE_HALT: 606bf215546Sopenharmony_ci if (devinfo->ver >= 8) 607bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_NULL, 8, 0, 0, 0, 0, 608bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 609bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 610bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_NULL, 6, 0, 0, 0, 0, 611bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 612bf215546Sopenharmony_ci else 613bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_NULL, 2, 0, 0, 0, 0, 614bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci case FS_OPCODE_LINTERP: 617bf215546Sopenharmony_ci if (devinfo->ver >= 8) 618bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 0, 0, 4, 619bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); 620bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 621bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 622bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16 /* XXX */, 0, 0); 623bf215546Sopenharmony_ci else 624bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 625bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18 /* XXX */, 0, 0); 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci case BRW_OPCODE_LRP: 628bf215546Sopenharmony_ci if (devinfo->ver >= 8) 629bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 4, 1, 0, 4, 630bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 16 /* XXX */, 0, 0); 631bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 632bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 633bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16 /* XXX */, 0, 0); 634bf215546Sopenharmony_ci else if (devinfo->ver >= 6) 635bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 1, 0, 2, 636bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18 /* XXX */, 0, 0); 637bf215546Sopenharmony_ci else 638bf215546Sopenharmony_ci abort(); 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci case FS_OPCODE_PACK_HALF_2x16_SPLIT: 641bf215546Sopenharmony_ci if (devinfo->ver >= 11) 642bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 20, 6, 0, 0, 6, 643bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 644bf215546Sopenharmony_ci 14 /* XXX */, 0, 0); 645bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 646bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 16, 6, 0, 0, 6, 647bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 648bf215546Sopenharmony_ci 12 /* XXX */, 0, 0); 649bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 650bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 20, 6, 0, 0, 6, 651bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 652bf215546Sopenharmony_ci 16 /* XXX */, 0, 0); 653bf215546Sopenharmony_ci else if (devinfo->ver >= 7) 654bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 24, 6, 0, 0, 6, 655bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 656bf215546Sopenharmony_ci 18 /* XXX */, 0, 0); 657bf215546Sopenharmony_ci else 658bf215546Sopenharmony_ci abort(); 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci case SHADER_OPCODE_MOV_INDIRECT: 661bf215546Sopenharmony_ci if (devinfo->ver >= 11) 662bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 34, 0, 0, 34, 0, 663bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 664bf215546Sopenharmony_ci 14 /* XXX */, 0, 0); 665bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 666bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 34, 0, 0, 34, 0, 667bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 668bf215546Sopenharmony_ci 12 /* XXX */, 0, 0); 669bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 670bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 34, 0, 0, 34, 0, 671bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 672bf215546Sopenharmony_ci 16 /* XXX */, 0, 0); 673bf215546Sopenharmony_ci else 674bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 34, 0, 0, 34, 0, 675bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 676bf215546Sopenharmony_ci 18 /* XXX */, 0, 0); 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci case SHADER_OPCODE_BROADCAST: 679bf215546Sopenharmony_ci if (devinfo->ver >= 11) 680bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 20 /* XXX */, 0, 0, 4, 0, 681bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); 682bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 683bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 18, 0, 0, 4, 0, 684bf215546Sopenharmony_ci 0, 8, 4 /* XXX */, 12 /* XXX */, 0, 0); 685bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 686bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 18, 0, 0, 4, 0, 687bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16 /* XXX */, 0, 0); 688bf215546Sopenharmony_ci else if (devinfo->ver >= 7) 689bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 20, 0, 0, 4, 0, 690bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18 /* XXX */, 0, 0); 691bf215546Sopenharmony_ci else 692bf215546Sopenharmony_ci abort(); 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci case SHADER_OPCODE_FIND_LIVE_CHANNEL: 695bf215546Sopenharmony_ci case SHADER_OPCODE_FIND_LAST_LIVE_CHANNEL: 696bf215546Sopenharmony_ci if (devinfo->ver >= 11) 697bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 2, 0, 0, 2, 0, 698bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); 699bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 700bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 2, 0, 0, 2, 0, 701bf215546Sopenharmony_ci 0, 8, 4 /* XXX */, 12 /* XXX */, 0, 0); 702bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 703bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 36, 0, 0, 6, 0, 704bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 16 /* XXX */, 0, 0); 705bf215546Sopenharmony_ci else if (devinfo->ver >= 7) 706bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 40, 0, 0, 6, 0, 707bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18 /* XXX */, 0, 0); 708bf215546Sopenharmony_ci else 709bf215546Sopenharmony_ci abort(); 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci case SHADER_OPCODE_RND_MODE: 712bf215546Sopenharmony_ci case SHADER_OPCODE_FLOAT_CONTROL_MODE: 713bf215546Sopenharmony_ci if (devinfo->ver >= 11) 714bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 24 /* XXX */, 0, 0, 715bf215546Sopenharmony_ci 4 /* XXX */, 0, 716bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 717bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 718bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 20 /* XXX */, 0, 0, 719bf215546Sopenharmony_ci 4 /* XXX */, 0, 720bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 721bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 722bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 24 /* XXX */, 0, 0, 723bf215546Sopenharmony_ci 4 /* XXX */, 0, 724bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 725bf215546Sopenharmony_ci else if (devinfo->ver >= 6) 726bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 28 /* XXX */, 0, 0, 727bf215546Sopenharmony_ci 4 /* XXX */, 0, 728bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 729bf215546Sopenharmony_ci else 730bf215546Sopenharmony_ci abort(); 731bf215546Sopenharmony_ci 732bf215546Sopenharmony_ci case SHADER_OPCODE_SHUFFLE: 733bf215546Sopenharmony_ci if (devinfo->ver >= 11) 734bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 44 /* XXX */, 0, 0, 735bf215546Sopenharmony_ci 44 /* XXX */, 0, 736bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 737bf215546Sopenharmony_ci 14 /* XXX */, 0, 0); 738bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 739bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 42 /* XXX */, 0, 0, 740bf215546Sopenharmony_ci 42 /* XXX */, 0, 741bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 742bf215546Sopenharmony_ci 12 /* XXX */, 0, 0); 743bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 744bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 44 /* XXX */, 0, 745bf215546Sopenharmony_ci 0, 44 /* XXX */, 746bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 747bf215546Sopenharmony_ci 16 /* XXX */, 0, 0); 748bf215546Sopenharmony_ci else if (devinfo->ver >= 6) 749bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 46 /* XXX */, 0, 750bf215546Sopenharmony_ci 0, 46 /* XXX */, 751bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 752bf215546Sopenharmony_ci 18 /* XXX */, 0, 0); 753bf215546Sopenharmony_ci else 754bf215546Sopenharmony_ci abort(); 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci case SHADER_OPCODE_SEL_EXEC: 757bf215546Sopenharmony_ci if (devinfo->ver >= 11) 758bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 10 /* XXX */, 4 /* XXX */, 0, 759bf215546Sopenharmony_ci 0, 4 /* XXX */, 760bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 761bf215546Sopenharmony_ci 14 /* XXX */, 0, 0); 762bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 763bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 8 /* XXX */, 4 /* XXX */, 0, 764bf215546Sopenharmony_ci 0, 4 /* XXX */, 765bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 766bf215546Sopenharmony_ci 12 /* XXX */, 0, 0); 767bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 768bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 10 /* XXX */, 4 /* XXX */, 0, 769bf215546Sopenharmony_ci 0, 4 /* XXX */, 770bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 771bf215546Sopenharmony_ci 16 /* XXX */, 0, 0); 772bf215546Sopenharmony_ci else 773bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 12 /* XXX */, 4 /* XXX */, 0, 774bf215546Sopenharmony_ci 0, 4 /* XXX */, 775bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 776bf215546Sopenharmony_ci 18 /* XXX */, 0, 0); 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci case SHADER_OPCODE_QUAD_SWIZZLE: 779bf215546Sopenharmony_ci if (devinfo->ver >= 11) 780bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0 /* XXX */, 8 /* XXX */, 0, 781bf215546Sopenharmony_ci 0, 8 /* XXX */, 782bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 783bf215546Sopenharmony_ci 14 /* XXX */, 0, 0); 784bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 785bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0 /* XXX */, 8 /* XXX */, 0, 786bf215546Sopenharmony_ci 0, 8 /* XXX */, 787bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 788bf215546Sopenharmony_ci 12 /* XXX */, 0, 0); 789bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 790bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0 /* XXX */, 8 /* XXX */, 0, 791bf215546Sopenharmony_ci 0, 8 /* XXX */, 792bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 793bf215546Sopenharmony_ci 16 /* XXX */, 0, 0); 794bf215546Sopenharmony_ci else 795bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0 /* XXX */, 8 /* XXX */, 0, 796bf215546Sopenharmony_ci 0, 8 /* XXX */, 797bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 798bf215546Sopenharmony_ci 18 /* XXX */, 0, 0); 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci case FS_OPCODE_DDY_FINE: 801bf215546Sopenharmony_ci if (devinfo->ver >= 11) 802bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 14, 0, 0, 4, 803bf215546Sopenharmony_ci 0, 10, 6 /* XXX */, 14 /* XXX */, 0, 0); 804bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 805bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 806bf215546Sopenharmony_ci 0, 8, 4 /* XXX */, 12 /* XXX */, 0, 0); 807bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 808bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 809bf215546Sopenharmony_ci 0, 12, 8 /* XXX */, 18 /* XXX */, 0, 0); 810bf215546Sopenharmony_ci else 811bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2, 812bf215546Sopenharmony_ci 0, 14, 10 /* XXX */, 20 /* XXX */, 0, 0); 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci case FS_OPCODE_LOAD_LIVE_CHANNELS: 815bf215546Sopenharmony_ci if (devinfo->ver >= 11) 816bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 2 /* XXX */, 0, 0, 817bf215546Sopenharmony_ci 2 /* XXX */, 0, 818bf215546Sopenharmony_ci 0, 0, 0, 10 /* XXX */, 0, 0); 819bf215546Sopenharmony_ci else if (devinfo->ver >= 8) 820bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 0, 2 /* XXX */, 0, 821bf215546Sopenharmony_ci 0, 2 /* XXX */, 822bf215546Sopenharmony_ci 0, 0, 0, 8 /* XXX */, 0, 0); 823bf215546Sopenharmony_ci else 824bf215546Sopenharmony_ci abort(); 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_ci case VEC4_OPCODE_PACK_BYTES: 827bf215546Sopenharmony_ci if (devinfo->ver >= 8) 828bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 4 /* XXX */, 0, 0, 829bf215546Sopenharmony_ci 4 /* XXX */, 0, 830bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */, 831bf215546Sopenharmony_ci 0, 0); 832bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 833bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 4 /* XXX */, 0, 0, 834bf215546Sopenharmony_ci 4 /* XXX */, 0, 835bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 16 /* XXX */, 836bf215546Sopenharmony_ci 0, 0); 837bf215546Sopenharmony_ci else 838bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 4 /* XXX */, 0, 0, 839bf215546Sopenharmony_ci 4 /* XXX */, 0, 840bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 18 /* XXX */, 841bf215546Sopenharmony_ci 0, 0); 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci case VS_OPCODE_UNPACK_FLAGS_SIMD4X2: 844bf215546Sopenharmony_ci case TCS_OPCODE_GET_INSTANCE_ID: 845bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_SET_INPUT_URB_OFFSETS: 846bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_SET_OUTPUT_URB_OFFSETS: 847bf215546Sopenharmony_ci case TES_OPCODE_CREATE_INPUT_READ_HEADER: 848bf215546Sopenharmony_ci if (devinfo->ver >= 8) 849bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 22 /* XXX */, 0, 0, 850bf215546Sopenharmony_ci 6 /* XXX */, 0, 851bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */, 852bf215546Sopenharmony_ci 0, 0); 853bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 854bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 26 /* XXX */, 0, 0, 855bf215546Sopenharmony_ci 6 /* XXX */, 0, 856bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 16 /* XXX */, 857bf215546Sopenharmony_ci 0, 0); 858bf215546Sopenharmony_ci else 859bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 30 /* XXX */, 0, 0, 860bf215546Sopenharmony_ci 6 /* XXX */, 0, 861bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 18 /* XXX */, 862bf215546Sopenharmony_ci 0, 0); 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: 865bf215546Sopenharmony_ci case TCS_OPCODE_CREATE_BARRIER_HEADER: 866bf215546Sopenharmony_ci if (devinfo->ver >= 8) 867bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 32 /* XXX */, 0, 0, 868bf215546Sopenharmony_ci 8 /* XXX */, 0, 869bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */, 870bf215546Sopenharmony_ci 0, 0); 871bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 872bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 38 /* XXX */, 0, 0, 873bf215546Sopenharmony_ci 8 /* XXX */, 0, 874bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 16 /* XXX */, 875bf215546Sopenharmony_ci 0, 0); 876bf215546Sopenharmony_ci else if (devinfo->ver >= 6) 877bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 44 /* XXX */, 0, 0, 878bf215546Sopenharmony_ci 8 /* XXX */, 0, 879bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 18 /* XXX */, 880bf215546Sopenharmony_ci 0, 0); 881bf215546Sopenharmony_ci else 882bf215546Sopenharmony_ci abort(); 883bf215546Sopenharmony_ci 884bf215546Sopenharmony_ci case TES_OPCODE_ADD_INDIRECT_URB_OFFSET: 885bf215546Sopenharmony_ci if (devinfo->ver >= 8) 886bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 12 /* XXX */, 0, 0, 887bf215546Sopenharmony_ci 4 /* XXX */, 0, 888bf215546Sopenharmony_ci 0, 8 /* XXX */, 4 /* XXX */, 12 /* XXX */, 889bf215546Sopenharmony_ci 0, 0); 890bf215546Sopenharmony_ci else if (devinfo->verx10 >= 75) 891bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 14 /* XXX */, 0, 0, 892bf215546Sopenharmony_ci 4 /* XXX */, 0, 893bf215546Sopenharmony_ci 0, 10 /* XXX */, 6 /* XXX */, 16 /* XXX */, 894bf215546Sopenharmony_ci 0, 0); 895bf215546Sopenharmony_ci else if (devinfo->ver >= 7) 896bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_FPU, 16 /* XXX */, 0, 0, 897bf215546Sopenharmony_ci 4 /* XXX */, 0, 898bf215546Sopenharmony_ci 0, 12 /* XXX */, 8 /* XXX */, 18 /* XXX */, 899bf215546Sopenharmony_ci 0, 0); 900bf215546Sopenharmony_ci else 901bf215546Sopenharmony_ci abort(); 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci case SHADER_OPCODE_TEX: 904bf215546Sopenharmony_ci case FS_OPCODE_TXB: 905bf215546Sopenharmony_ci case SHADER_OPCODE_TXD: 906bf215546Sopenharmony_ci case SHADER_OPCODE_TXF: 907bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_LZ: 908bf215546Sopenharmony_ci case SHADER_OPCODE_TXL: 909bf215546Sopenharmony_ci case SHADER_OPCODE_TXL_LZ: 910bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS: 911bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_CMS_W: 912bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_UMS: 913bf215546Sopenharmony_ci case SHADER_OPCODE_TXF_MCS: 914bf215546Sopenharmony_ci case SHADER_OPCODE_TXS: 915bf215546Sopenharmony_ci case SHADER_OPCODE_LOD: 916bf215546Sopenharmony_ci case SHADER_OPCODE_GET_BUFFER_SIZE: 917bf215546Sopenharmony_ci case SHADER_OPCODE_TG4: 918bf215546Sopenharmony_ci case SHADER_OPCODE_TG4_OFFSET: 919bf215546Sopenharmony_ci case SHADER_OPCODE_SAMPLEINFO: 920bf215546Sopenharmony_ci case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GFX4: 921bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_SAMPLER, 2, 0, 0, 0, 16 /* XXX */, 922bf215546Sopenharmony_ci 8 /* XXX */, 750 /* XXX */, 0, 0, 923bf215546Sopenharmony_ci 2 /* XXX */, 0); 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci case VEC4_OPCODE_URB_READ: 926bf215546Sopenharmony_ci case VEC4_VS_OPCODE_URB_WRITE: 927bf215546Sopenharmony_ci case VEC4_GS_OPCODE_URB_WRITE: 928bf215546Sopenharmony_ci case VEC4_GS_OPCODE_URB_WRITE_ALLOCATE: 929bf215546Sopenharmony_ci case GS_OPCODE_THREAD_END: 930bf215546Sopenharmony_ci case GS_OPCODE_FF_SYNC: 931bf215546Sopenharmony_ci case VEC4_TCS_OPCODE_URB_WRITE: 932bf215546Sopenharmony_ci case TCS_OPCODE_RELEASE_INPUT: 933bf215546Sopenharmony_ci case TCS_OPCODE_THREAD_END: 934bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_URB, 2, 0, 0, 0, 6 /* XXX */, 935bf215546Sopenharmony_ci 32 /* XXX */, 200 /* XXX */, 0, 0, 0, 0); 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci case SHADER_OPCODE_MEMORY_FENCE: 938bf215546Sopenharmony_ci case SHADER_OPCODE_INTERLOCK: 939bf215546Sopenharmony_ci switch (info.sfid) { 940bf215546Sopenharmony_ci case GFX6_SFID_DATAPORT_RENDER_CACHE: 941bf215546Sopenharmony_ci if (devinfo->ver >= 7) 942bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_RC, 2, 0, 0, 30 /* XXX */, 0, 943bf215546Sopenharmony_ci 10 /* XXX */, 300 /* XXX */, 0, 0, 0, 0); 944bf215546Sopenharmony_ci else 945bf215546Sopenharmony_ci abort(); 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci case BRW_SFID_URB: 948bf215546Sopenharmony_ci case GFX7_SFID_DATAPORT_DATA_CACHE: 949bf215546Sopenharmony_ci case GFX12_SFID_SLM: 950bf215546Sopenharmony_ci case GFX12_SFID_TGM: 951bf215546Sopenharmony_ci case GFX12_SFID_UGM: 952bf215546Sopenharmony_ci case HSW_SFID_DATAPORT_DATA_CACHE_1: 953bf215546Sopenharmony_ci if (devinfo->ver >= 7) 954bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 30 /* XXX */, 0, 955bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0); 956bf215546Sopenharmony_ci else 957bf215546Sopenharmony_ci abort(); 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_ci default: 960bf215546Sopenharmony_ci abort(); 961bf215546Sopenharmony_ci } 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_ci case SHADER_OPCODE_GFX4_SCRATCH_READ: 964bf215546Sopenharmony_ci case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 965bf215546Sopenharmony_ci case SHADER_OPCODE_GFX7_SCRATCH_READ: 966bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 0, 8 /* XXX */, 967bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0); 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_ATOMIC: 970bf215546Sopenharmony_ci if (devinfo->ver >= 7) 971bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 972bf215546Sopenharmony_ci 30 /* XXX */, 400 /* XXX */, 973bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 974bf215546Sopenharmony_ci 0, 400 /* XXX */); 975bf215546Sopenharmony_ci else 976bf215546Sopenharmony_ci abort(); 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_SURFACE_READ: 979bf215546Sopenharmony_ci case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: 980bf215546Sopenharmony_ci if (devinfo->ver >= 7) 981bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 982bf215546Sopenharmony_ci 0, 20 /* XXX */, 983bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 984bf215546Sopenharmony_ci 0, 0); 985bf215546Sopenharmony_ci else 986bf215546Sopenharmony_ci abort(); 987bf215546Sopenharmony_ci 988bf215546Sopenharmony_ci case FS_OPCODE_FB_WRITE: 989bf215546Sopenharmony_ci case FS_OPCODE_FB_READ: 990bf215546Sopenharmony_ci case FS_OPCODE_REP_FB_WRITE: 991bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_RC, 2, 0, 0, 0, 450 /* XXX */, 992bf215546Sopenharmony_ci 10 /* XXX */, 300 /* XXX */, 0, 0, 0, 0); 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci case GS_OPCODE_SVB_WRITE: 995bf215546Sopenharmony_ci if (devinfo->ver >= 6) 996bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_RC, 2 /* XXX */, 0, 0, 997bf215546Sopenharmony_ci 0, 450 /* XXX */, 998bf215546Sopenharmony_ci 10 /* XXX */, 300 /* XXX */, 0, 0, 999bf215546Sopenharmony_ci 0, 0); 1000bf215546Sopenharmony_ci else 1001bf215546Sopenharmony_ci abort(); 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_ci case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: 1004bf215546Sopenharmony_ci case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GFX7: 1005bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_CC, 2, 0, 0, 0, 16 /* XXX */, 1006bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 0, 0); 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci case VS_OPCODE_PULL_CONSTANT_LOAD: 1009bf215546Sopenharmony_ci case VS_OPCODE_PULL_CONSTANT_LOAD_GFX7: 1010bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_SAMPLER, 2, 0, 0, 0, 16, 1011bf215546Sopenharmony_ci 8, 750, 0, 0, 2, 0); 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci case FS_OPCODE_INTERPOLATE_AT_SAMPLE: 1014bf215546Sopenharmony_ci case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: 1015bf215546Sopenharmony_ci case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: 1016bf215546Sopenharmony_ci if (devinfo->ver >= 7) 1017bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_PI, 2, 0, 0, 14 /* XXX */, 0, 1018bf215546Sopenharmony_ci 0, 90 /* XXX */, 0, 0, 0, 0); 1019bf215546Sopenharmony_ci else 1020bf215546Sopenharmony_ci abort(); 1021bf215546Sopenharmony_ci 1022bf215546Sopenharmony_ci case SHADER_OPCODE_BARRIER: 1023bf215546Sopenharmony_ci if (devinfo->ver >= 7) 1024bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_GATEWAY, 90 /* XXX */, 0, 0, 1025bf215546Sopenharmony_ci 0 /* XXX */, 0, 1026bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 1027bf215546Sopenharmony_ci else 1028bf215546Sopenharmony_ci abort(); 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci case CS_OPCODE_CS_TERMINATE: 1031bf215546Sopenharmony_ci if (devinfo->ver >= 7) 1032bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0, 1033bf215546Sopenharmony_ci 10 /* XXX */, 0, 0, 0, 0, 0); 1034bf215546Sopenharmony_ci else 1035bf215546Sopenharmony_ci abort(); 1036bf215546Sopenharmony_ci 1037bf215546Sopenharmony_ci case SHADER_OPCODE_SEND: 1038bf215546Sopenharmony_ci switch (info.sfid) { 1039bf215546Sopenharmony_ci case GFX6_SFID_DATAPORT_RENDER_CACHE: 1040bf215546Sopenharmony_ci if (devinfo->ver >= 7) { 1041bf215546Sopenharmony_ci switch (brw_dp_desc_msg_type(devinfo, info.desc)) { 1042bf215546Sopenharmony_ci case GFX7_DATAPORT_RC_TYPED_ATOMIC_OP: 1043bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_RC, 2, 0, 0, 1044bf215546Sopenharmony_ci 30 /* XXX */, 450 /* XXX */, 1045bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 1046bf215546Sopenharmony_ci 0, 0, 0, 400 /* XXX */); 1047bf215546Sopenharmony_ci default: 1048bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_RC, 2, 0, 0, 1049bf215546Sopenharmony_ci 0, 450 /* XXX */, 1050bf215546Sopenharmony_ci 10 /* XXX */, 300 /* XXX */, 0, 0, 1051bf215546Sopenharmony_ci 0, 0); 1052bf215546Sopenharmony_ci } 1053bf215546Sopenharmony_ci } else if (devinfo->ver >= 6) { 1054bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_RC, 2 /* XXX */, 0, 0, 1055bf215546Sopenharmony_ci 0, 450 /* XXX */, 1056bf215546Sopenharmony_ci 10 /* XXX */, 300 /* XXX */, 0, 0, 0, 0); 1057bf215546Sopenharmony_ci } else { 1058bf215546Sopenharmony_ci abort(); 1059bf215546Sopenharmony_ci } 1060bf215546Sopenharmony_ci case BRW_SFID_SAMPLER: { 1061bf215546Sopenharmony_ci if (devinfo->ver >= 6) 1062bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_SAMPLER, 2, 0, 0, 0, 16, 1063bf215546Sopenharmony_ci 8, 750, 0, 0, 2, 0); 1064bf215546Sopenharmony_ci else 1065bf215546Sopenharmony_ci abort(); 1066bf215546Sopenharmony_ci } 1067bf215546Sopenharmony_ci case GFX7_SFID_DATAPORT_DATA_CACHE: 1068bf215546Sopenharmony_ci case HSW_SFID_DATAPORT_DATA_CACHE_1: 1069bf215546Sopenharmony_ci if (devinfo->verx10 >= 75) { 1070bf215546Sopenharmony_ci switch (brw_dp_desc_msg_type(devinfo, info.desc)) { 1071bf215546Sopenharmony_ci case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP: 1072bf215546Sopenharmony_ci case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2: 1073bf215546Sopenharmony_ci case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2: 1074bf215546Sopenharmony_ci case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP: 1075bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 1076bf215546Sopenharmony_ci 30 /* XXX */, 400 /* XXX */, 1077bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 1078bf215546Sopenharmony_ci 0, 400 /* XXX */); 1079bf215546Sopenharmony_ci 1080bf215546Sopenharmony_ci default: 1081bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 1082bf215546Sopenharmony_ci 0, 20 /* XXX */, 1083bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 1084bf215546Sopenharmony_ci 0, 0); 1085bf215546Sopenharmony_ci } 1086bf215546Sopenharmony_ci } else if (devinfo->ver >= 7) { 1087bf215546Sopenharmony_ci switch (brw_dp_desc_msg_type(devinfo, info.desc)) { 1088bf215546Sopenharmony_ci case GFX7_DATAPORT_DC_UNTYPED_ATOMIC_OP: 1089bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 1090bf215546Sopenharmony_ci 30 /* XXX */, 400 /* XXX */, 1091bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 1092bf215546Sopenharmony_ci 0, 0, 0, 400 /* XXX */); 1093bf215546Sopenharmony_ci default: 1094bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 1095bf215546Sopenharmony_ci 0, 20 /* XXX */, 1096bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 1097bf215546Sopenharmony_ci 0, 0); 1098bf215546Sopenharmony_ci } 1099bf215546Sopenharmony_ci } else { 1100bf215546Sopenharmony_ci abort(); 1101bf215546Sopenharmony_ci } 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci case GFX12_SFID_UGM: 1104bf215546Sopenharmony_ci case GFX12_SFID_TGM: 1105bf215546Sopenharmony_ci case GFX12_SFID_SLM: 1106bf215546Sopenharmony_ci switch (lsc_msg_desc_opcode(devinfo, info.desc)) { 1107bf215546Sopenharmony_ci case LSC_OP_LOAD: 1108bf215546Sopenharmony_ci case LSC_OP_STORE: 1109bf215546Sopenharmony_ci case LSC_OP_LOAD_CMASK: 1110bf215546Sopenharmony_ci case LSC_OP_STORE_CMASK: 1111bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 1112bf215546Sopenharmony_ci 0, 20 /* XXX */, 1113bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 1114bf215546Sopenharmony_ci 0, 0); 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci case LSC_OP_FENCE: 1117bf215546Sopenharmony_ci case LSC_OP_ATOMIC_INC: 1118bf215546Sopenharmony_ci case LSC_OP_ATOMIC_DEC: 1119bf215546Sopenharmony_ci case LSC_OP_ATOMIC_LOAD: 1120bf215546Sopenharmony_ci case LSC_OP_ATOMIC_STORE: 1121bf215546Sopenharmony_ci case LSC_OP_ATOMIC_ADD: 1122bf215546Sopenharmony_ci case LSC_OP_ATOMIC_SUB: 1123bf215546Sopenharmony_ci case LSC_OP_ATOMIC_MIN: 1124bf215546Sopenharmony_ci case LSC_OP_ATOMIC_MAX: 1125bf215546Sopenharmony_ci case LSC_OP_ATOMIC_UMIN: 1126bf215546Sopenharmony_ci case LSC_OP_ATOMIC_UMAX: 1127bf215546Sopenharmony_ci case LSC_OP_ATOMIC_CMPXCHG: 1128bf215546Sopenharmony_ci case LSC_OP_ATOMIC_FADD: 1129bf215546Sopenharmony_ci case LSC_OP_ATOMIC_FSUB: 1130bf215546Sopenharmony_ci case LSC_OP_ATOMIC_FMIN: 1131bf215546Sopenharmony_ci case LSC_OP_ATOMIC_FMAX: 1132bf215546Sopenharmony_ci case LSC_OP_ATOMIC_FCMPXCHG: 1133bf215546Sopenharmony_ci case LSC_OP_ATOMIC_AND: 1134bf215546Sopenharmony_ci case LSC_OP_ATOMIC_OR: 1135bf215546Sopenharmony_ci case LSC_OP_ATOMIC_XOR: 1136bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_DP_DC, 2, 0, 0, 1137bf215546Sopenharmony_ci 30 /* XXX */, 400 /* XXX */, 1138bf215546Sopenharmony_ci 10 /* XXX */, 100 /* XXX */, 0, 0, 1139bf215546Sopenharmony_ci 0, 400 /* XXX */); 1140bf215546Sopenharmony_ci default: 1141bf215546Sopenharmony_ci abort(); 1142bf215546Sopenharmony_ci } 1143bf215546Sopenharmony_ci 1144bf215546Sopenharmony_ci case GEN_RT_SFID_BINDLESS_THREAD_DISPATCH: 1145bf215546Sopenharmony_ci case GEN_RT_SFID_RAY_TRACE_ACCELERATOR: 1146bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_SPAWNER, 2, 0, 0, 0 /* XXX */, 0, 1147bf215546Sopenharmony_ci 10 /* XXX */, 0, 0, 0, 0, 0); 1148bf215546Sopenharmony_ci 1149bf215546Sopenharmony_ci case BRW_SFID_URB: 1150bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_URB, 2, 0, 0, 0, 6 /* XXX */, 1151bf215546Sopenharmony_ci 32 /* XXX */, 200 /* XXX */, 0, 0, 0, 0); 1152bf215546Sopenharmony_ci 1153bf215546Sopenharmony_ci default: 1154bf215546Sopenharmony_ci abort(); 1155bf215546Sopenharmony_ci } 1156bf215546Sopenharmony_ci 1157bf215546Sopenharmony_ci case SHADER_OPCODE_UNDEF: 1158bf215546Sopenharmony_ci case SHADER_OPCODE_HALT_TARGET: 1159bf215546Sopenharmony_ci case FS_OPCODE_SCHEDULING_FENCE: 1160bf215546Sopenharmony_ci return calculate_desc(info, EU_UNIT_NULL, 0, 0, 0, 0, 0, 1161bf215546Sopenharmony_ci 0, 0, 0, 0, 0, 0); 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci default: 1164bf215546Sopenharmony_ci abort(); 1165bf215546Sopenharmony_ci } 1166bf215546Sopenharmony_ci } 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ci /** 1169bf215546Sopenharmony_ci * Model the performance behavior of a stall on the specified dependency 1170bf215546Sopenharmony_ci * ID. 1171bf215546Sopenharmony_ci */ 1172bf215546Sopenharmony_ci void 1173bf215546Sopenharmony_ci stall_on_dependency(state &st, enum intel_eu_dependency_id id) 1174bf215546Sopenharmony_ci { 1175bf215546Sopenharmony_ci if (id < ARRAY_SIZE(st.dep_ready)) 1176bf215546Sopenharmony_ci st.unit_ready[EU_UNIT_FE] = MAX2(st.unit_ready[EU_UNIT_FE], 1177bf215546Sopenharmony_ci st.dep_ready[id]); 1178bf215546Sopenharmony_ci } 1179bf215546Sopenharmony_ci 1180bf215546Sopenharmony_ci /** 1181bf215546Sopenharmony_ci * Model the performance behavior of the front-end and back-end while 1182bf215546Sopenharmony_ci * executing an instruction with the specified timing information, assuming 1183bf215546Sopenharmony_ci * all dependencies are already clear. 1184bf215546Sopenharmony_ci */ 1185bf215546Sopenharmony_ci void 1186bf215546Sopenharmony_ci execute_instruction(state &st, const perf_desc &perf) 1187bf215546Sopenharmony_ci { 1188bf215546Sopenharmony_ci /* Compute the time at which the front-end will be ready to execute the 1189bf215546Sopenharmony_ci * next instruction. 1190bf215546Sopenharmony_ci */ 1191bf215546Sopenharmony_ci st.unit_ready[EU_UNIT_FE] += perf.df; 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci if (perf.u < EU_NUM_UNITS) { 1194bf215546Sopenharmony_ci /* Wait for the back-end to be ready to execute this instruction. */ 1195bf215546Sopenharmony_ci st.unit_ready[EU_UNIT_FE] = MAX2(st.unit_ready[EU_UNIT_FE], 1196bf215546Sopenharmony_ci st.unit_ready[perf.u]); 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci /* Compute the time at which the back-end will be ready to execute 1199bf215546Sopenharmony_ci * the next instruction, and update the back-end utilization. 1200bf215546Sopenharmony_ci */ 1201bf215546Sopenharmony_ci st.unit_ready[perf.u] = st.unit_ready[EU_UNIT_FE] + perf.db; 1202bf215546Sopenharmony_ci st.unit_busy[perf.u] += perf.db * st.weight; 1203bf215546Sopenharmony_ci } 1204bf215546Sopenharmony_ci } 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci /** 1207bf215546Sopenharmony_ci * Model the performance behavior of a read dependency provided by an 1208bf215546Sopenharmony_ci * instruction. 1209bf215546Sopenharmony_ci */ 1210bf215546Sopenharmony_ci void 1211bf215546Sopenharmony_ci mark_read_dependency(state &st, const perf_desc &perf, 1212bf215546Sopenharmony_ci enum intel_eu_dependency_id id) 1213bf215546Sopenharmony_ci { 1214bf215546Sopenharmony_ci if (id < ARRAY_SIZE(st.dep_ready)) 1215bf215546Sopenharmony_ci st.dep_ready[id] = st.unit_ready[EU_UNIT_FE] + perf.ls; 1216bf215546Sopenharmony_ci } 1217bf215546Sopenharmony_ci 1218bf215546Sopenharmony_ci /** 1219bf215546Sopenharmony_ci * Model the performance behavior of a write dependency provided by an 1220bf215546Sopenharmony_ci * instruction. 1221bf215546Sopenharmony_ci */ 1222bf215546Sopenharmony_ci void 1223bf215546Sopenharmony_ci mark_write_dependency(state &st, const perf_desc &perf, 1224bf215546Sopenharmony_ci enum intel_eu_dependency_id id) 1225bf215546Sopenharmony_ci { 1226bf215546Sopenharmony_ci if (id >= EU_DEPENDENCY_ID_ACCUM0 && id < EU_DEPENDENCY_ID_FLAG0) 1227bf215546Sopenharmony_ci st.dep_ready[id] = st.unit_ready[EU_UNIT_FE] + perf.la; 1228bf215546Sopenharmony_ci else if (id >= EU_DEPENDENCY_ID_FLAG0 && id < EU_DEPENDENCY_ID_SBID_WR0) 1229bf215546Sopenharmony_ci st.dep_ready[id] = st.unit_ready[EU_UNIT_FE] + perf.lf; 1230bf215546Sopenharmony_ci else if (id < ARRAY_SIZE(st.dep_ready)) 1231bf215546Sopenharmony_ci st.dep_ready[id] = st.unit_ready[EU_UNIT_FE] + perf.ld; 1232bf215546Sopenharmony_ci } 1233bf215546Sopenharmony_ci 1234bf215546Sopenharmony_ci /** 1235bf215546Sopenharmony_ci * Return the dependency ID of a backend_reg, offset by \p delta GRFs. 1236bf215546Sopenharmony_ci */ 1237bf215546Sopenharmony_ci enum intel_eu_dependency_id 1238bf215546Sopenharmony_ci reg_dependency_id(const intel_device_info *devinfo, const backend_reg &r, 1239bf215546Sopenharmony_ci const int delta) 1240bf215546Sopenharmony_ci { 1241bf215546Sopenharmony_ci if (r.file == VGRF) { 1242bf215546Sopenharmony_ci const unsigned i = r.nr + r.offset / REG_SIZE + delta; 1243bf215546Sopenharmony_ci assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0); 1244bf215546Sopenharmony_ci return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i); 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_ci } else if (r.file == FIXED_GRF) { 1247bf215546Sopenharmony_ci const unsigned i = r.nr + delta; 1248bf215546Sopenharmony_ci assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0); 1249bf215546Sopenharmony_ci return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i); 1250bf215546Sopenharmony_ci 1251bf215546Sopenharmony_ci } else if (r.file == MRF && devinfo->ver >= 7) { 1252bf215546Sopenharmony_ci const unsigned i = GFX7_MRF_HACK_START + 1253bf215546Sopenharmony_ci r.nr + r.offset / REG_SIZE + delta; 1254bf215546Sopenharmony_ci assert(i < EU_DEPENDENCY_ID_MRF0 - EU_DEPENDENCY_ID_GRF0); 1255bf215546Sopenharmony_ci return intel_eu_dependency_id(EU_DEPENDENCY_ID_GRF0 + i); 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_ci } else if (r.file == MRF && devinfo->ver < 7) { 1258bf215546Sopenharmony_ci const unsigned i = (r.nr & ~BRW_MRF_COMPR4) + 1259bf215546Sopenharmony_ci r.offset / REG_SIZE + delta; 1260bf215546Sopenharmony_ci assert(i < EU_DEPENDENCY_ID_ADDR0 - EU_DEPENDENCY_ID_MRF0); 1261bf215546Sopenharmony_ci return intel_eu_dependency_id(EU_DEPENDENCY_ID_MRF0 + i); 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_ci } else if (r.file == ARF && r.nr >= BRW_ARF_ADDRESS && 1264bf215546Sopenharmony_ci r.nr < BRW_ARF_ACCUMULATOR) { 1265bf215546Sopenharmony_ci assert(delta == 0); 1266bf215546Sopenharmony_ci return EU_DEPENDENCY_ID_ADDR0; 1267bf215546Sopenharmony_ci 1268bf215546Sopenharmony_ci } else if (r.file == ARF && r.nr >= BRW_ARF_ACCUMULATOR && 1269bf215546Sopenharmony_ci r.nr < BRW_ARF_FLAG) { 1270bf215546Sopenharmony_ci const unsigned i = r.nr - BRW_ARF_ACCUMULATOR + delta; 1271bf215546Sopenharmony_ci assert(i < EU_DEPENDENCY_ID_FLAG0 - EU_DEPENDENCY_ID_ACCUM0); 1272bf215546Sopenharmony_ci return intel_eu_dependency_id(EU_DEPENDENCY_ID_ACCUM0 + i); 1273bf215546Sopenharmony_ci 1274bf215546Sopenharmony_ci } else { 1275bf215546Sopenharmony_ci return EU_NUM_DEPENDENCY_IDS; 1276bf215546Sopenharmony_ci } 1277bf215546Sopenharmony_ci } 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_ci /** 1280bf215546Sopenharmony_ci * Return the dependency ID of flag register starting at offset \p i. 1281bf215546Sopenharmony_ci */ 1282bf215546Sopenharmony_ci enum intel_eu_dependency_id 1283bf215546Sopenharmony_ci flag_dependency_id(unsigned i) 1284bf215546Sopenharmony_ci { 1285bf215546Sopenharmony_ci assert(i < EU_DEPENDENCY_ID_SBID_WR0 - EU_DEPENDENCY_ID_FLAG0); 1286bf215546Sopenharmony_ci return intel_eu_dependency_id(EU_DEPENDENCY_ID_FLAG0 + i); 1287bf215546Sopenharmony_ci } 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci /** 1290bf215546Sopenharmony_ci * Return the dependency ID corresponding to the SBID read completion 1291bf215546Sopenharmony_ci * condition of a Gfx12+ SWSB. 1292bf215546Sopenharmony_ci */ 1293bf215546Sopenharmony_ci enum intel_eu_dependency_id 1294bf215546Sopenharmony_ci tgl_swsb_rd_dependency_id(tgl_swsb swsb) 1295bf215546Sopenharmony_ci { 1296bf215546Sopenharmony_ci if (swsb.mode) { 1297bf215546Sopenharmony_ci assert(swsb.sbid < 1298bf215546Sopenharmony_ci EU_NUM_DEPENDENCY_IDS - EU_DEPENDENCY_ID_SBID_RD0); 1299bf215546Sopenharmony_ci return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_RD0 + swsb.sbid); 1300bf215546Sopenharmony_ci } else { 1301bf215546Sopenharmony_ci return EU_NUM_DEPENDENCY_IDS; 1302bf215546Sopenharmony_ci } 1303bf215546Sopenharmony_ci } 1304bf215546Sopenharmony_ci 1305bf215546Sopenharmony_ci /** 1306bf215546Sopenharmony_ci * Return the dependency ID corresponding to the SBID write completion 1307bf215546Sopenharmony_ci * condition of a Gfx12+ SWSB. 1308bf215546Sopenharmony_ci */ 1309bf215546Sopenharmony_ci enum intel_eu_dependency_id 1310bf215546Sopenharmony_ci tgl_swsb_wr_dependency_id(tgl_swsb swsb) 1311bf215546Sopenharmony_ci { 1312bf215546Sopenharmony_ci if (swsb.mode) { 1313bf215546Sopenharmony_ci assert(swsb.sbid < 1314bf215546Sopenharmony_ci EU_DEPENDENCY_ID_SBID_RD0 - EU_DEPENDENCY_ID_SBID_WR0); 1315bf215546Sopenharmony_ci return intel_eu_dependency_id(EU_DEPENDENCY_ID_SBID_WR0 + swsb.sbid); 1316bf215546Sopenharmony_ci } else { 1317bf215546Sopenharmony_ci return EU_NUM_DEPENDENCY_IDS; 1318bf215546Sopenharmony_ci } 1319bf215546Sopenharmony_ci } 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_ci /** 1322bf215546Sopenharmony_ci * Return the implicit accumulator register accessed by channel \p i of the 1323bf215546Sopenharmony_ci * instruction. 1324bf215546Sopenharmony_ci */ 1325bf215546Sopenharmony_ci unsigned 1326bf215546Sopenharmony_ci accum_reg_of_channel(const intel_device_info *devinfo, 1327bf215546Sopenharmony_ci const backend_instruction *inst, 1328bf215546Sopenharmony_ci brw_reg_type tx, unsigned i) 1329bf215546Sopenharmony_ci { 1330bf215546Sopenharmony_ci assert(inst->reads_accumulator_implicitly() || 1331bf215546Sopenharmony_ci inst->writes_accumulator_implicitly(devinfo)); 1332bf215546Sopenharmony_ci const unsigned offset = (inst->group + i) * type_sz(tx) * 1333bf215546Sopenharmony_ci (devinfo->ver < 7 || brw_reg_type_is_floating_point(tx) ? 1 : 2); 1334bf215546Sopenharmony_ci return offset / REG_SIZE % 2; 1335bf215546Sopenharmony_ci } 1336bf215546Sopenharmony_ci 1337bf215546Sopenharmony_ci /** 1338bf215546Sopenharmony_ci * Model the performance behavior of an FS back-end instruction. 1339bf215546Sopenharmony_ci */ 1340bf215546Sopenharmony_ci void 1341bf215546Sopenharmony_ci issue_fs_inst(state &st, const struct brw_isa_info *isa, 1342bf215546Sopenharmony_ci const backend_instruction *be_inst) 1343bf215546Sopenharmony_ci { 1344bf215546Sopenharmony_ci const struct intel_device_info *devinfo = isa->devinfo; 1345bf215546Sopenharmony_ci const fs_inst *inst = static_cast<const fs_inst *>(be_inst); 1346bf215546Sopenharmony_ci const instruction_info info(isa, inst); 1347bf215546Sopenharmony_ci const perf_desc perf = instruction_desc(info); 1348bf215546Sopenharmony_ci 1349bf215546Sopenharmony_ci /* Stall on any source dependencies. */ 1350bf215546Sopenharmony_ci for (unsigned i = 0; i < inst->sources; i++) { 1351bf215546Sopenharmony_ci for (unsigned j = 0; j < regs_read(inst, i); j++) 1352bf215546Sopenharmony_ci stall_on_dependency( 1353bf215546Sopenharmony_ci st, reg_dependency_id(devinfo, inst->src[i], j)); 1354bf215546Sopenharmony_ci } 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_ci if (inst->reads_accumulator_implicitly()) { 1357bf215546Sopenharmony_ci for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0); 1358bf215546Sopenharmony_ci j <= accum_reg_of_channel(devinfo, inst, info.tx, 1359bf215546Sopenharmony_ci inst->exec_size - 1); j++) 1360bf215546Sopenharmony_ci stall_on_dependency( 1361bf215546Sopenharmony_ci st, reg_dependency_id(devinfo, brw_acc_reg(8), j)); 1362bf215546Sopenharmony_ci } 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_ci if (is_send(inst) && inst->base_mrf != -1) { 1365bf215546Sopenharmony_ci for (unsigned j = 0; j < inst->mlen; j++) 1366bf215546Sopenharmony_ci stall_on_dependency( 1367bf215546Sopenharmony_ci st, reg_dependency_id( 1368bf215546Sopenharmony_ci devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j)); 1369bf215546Sopenharmony_ci } 1370bf215546Sopenharmony_ci 1371bf215546Sopenharmony_ci if (const unsigned mask = inst->flags_read(devinfo)) { 1372bf215546Sopenharmony_ci for (unsigned i = 0; i < sizeof(mask) * CHAR_BIT; i++) { 1373bf215546Sopenharmony_ci if (mask & (1 << i)) 1374bf215546Sopenharmony_ci stall_on_dependency(st, flag_dependency_id(i)); 1375bf215546Sopenharmony_ci } 1376bf215546Sopenharmony_ci } 1377bf215546Sopenharmony_ci 1378bf215546Sopenharmony_ci /* Stall on any write dependencies. */ 1379bf215546Sopenharmony_ci if (!inst->no_dd_check) { 1380bf215546Sopenharmony_ci if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) { 1381bf215546Sopenharmony_ci for (unsigned j = 0; j < regs_written(inst); j++) 1382bf215546Sopenharmony_ci stall_on_dependency( 1383bf215546Sopenharmony_ci st, reg_dependency_id(devinfo, inst->dst, j)); 1384bf215546Sopenharmony_ci } 1385bf215546Sopenharmony_ci 1386bf215546Sopenharmony_ci if (inst->writes_accumulator_implicitly(devinfo)) { 1387bf215546Sopenharmony_ci for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0); 1388bf215546Sopenharmony_ci j <= accum_reg_of_channel(devinfo, inst, info.tx, 1389bf215546Sopenharmony_ci inst->exec_size - 1); j++) 1390bf215546Sopenharmony_ci stall_on_dependency( 1391bf215546Sopenharmony_ci st, reg_dependency_id(devinfo, brw_acc_reg(8), j)); 1392bf215546Sopenharmony_ci } 1393bf215546Sopenharmony_ci 1394bf215546Sopenharmony_ci if (const unsigned mask = inst->flags_written(devinfo)) { 1395bf215546Sopenharmony_ci for (unsigned i = 0; i < sizeof(mask) * CHAR_BIT; i++) { 1396bf215546Sopenharmony_ci if (mask & (1 << i)) 1397bf215546Sopenharmony_ci stall_on_dependency(st, flag_dependency_id(i)); 1398bf215546Sopenharmony_ci } 1399bf215546Sopenharmony_ci } 1400bf215546Sopenharmony_ci } 1401bf215546Sopenharmony_ci 1402bf215546Sopenharmony_ci /* Stall on any SBID dependencies. */ 1403bf215546Sopenharmony_ci if (inst->sched.mode & (TGL_SBID_SET | TGL_SBID_DST)) 1404bf215546Sopenharmony_ci stall_on_dependency(st, tgl_swsb_wr_dependency_id(inst->sched)); 1405bf215546Sopenharmony_ci else if (inst->sched.mode & TGL_SBID_SRC) 1406bf215546Sopenharmony_ci stall_on_dependency(st, tgl_swsb_rd_dependency_id(inst->sched)); 1407bf215546Sopenharmony_ci 1408bf215546Sopenharmony_ci /* Execute the instruction. */ 1409bf215546Sopenharmony_ci execute_instruction(st, perf); 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_ci /* Mark any source dependencies. */ 1412bf215546Sopenharmony_ci if (inst->is_send_from_grf()) { 1413bf215546Sopenharmony_ci for (unsigned i = 0; i < inst->sources; i++) { 1414bf215546Sopenharmony_ci if (inst->is_payload(i)) { 1415bf215546Sopenharmony_ci for (unsigned j = 0; j < regs_read(inst, i); j++) 1416bf215546Sopenharmony_ci mark_read_dependency( 1417bf215546Sopenharmony_ci st, perf, reg_dependency_id(devinfo, inst->src[i], j)); 1418bf215546Sopenharmony_ci } 1419bf215546Sopenharmony_ci } 1420bf215546Sopenharmony_ci } 1421bf215546Sopenharmony_ci 1422bf215546Sopenharmony_ci if (is_send(inst) && inst->base_mrf != -1) { 1423bf215546Sopenharmony_ci for (unsigned j = 0; j < inst->mlen; j++) 1424bf215546Sopenharmony_ci mark_read_dependency(st, perf, 1425bf215546Sopenharmony_ci reg_dependency_id(devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j)); 1426bf215546Sopenharmony_ci } 1427bf215546Sopenharmony_ci 1428bf215546Sopenharmony_ci /* Mark any destination dependencies. */ 1429bf215546Sopenharmony_ci if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) { 1430bf215546Sopenharmony_ci for (unsigned j = 0; j < regs_written(inst); j++) { 1431bf215546Sopenharmony_ci mark_write_dependency(st, perf, 1432bf215546Sopenharmony_ci reg_dependency_id(devinfo, inst->dst, j)); 1433bf215546Sopenharmony_ci } 1434bf215546Sopenharmony_ci } 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_ci if (inst->writes_accumulator_implicitly(devinfo)) { 1437bf215546Sopenharmony_ci for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0); 1438bf215546Sopenharmony_ci j <= accum_reg_of_channel(devinfo, inst, info.tx, 1439bf215546Sopenharmony_ci inst->exec_size - 1); j++) 1440bf215546Sopenharmony_ci mark_write_dependency(st, perf, 1441bf215546Sopenharmony_ci reg_dependency_id(devinfo, brw_acc_reg(8), j)); 1442bf215546Sopenharmony_ci } 1443bf215546Sopenharmony_ci 1444bf215546Sopenharmony_ci if (const unsigned mask = inst->flags_written(devinfo)) { 1445bf215546Sopenharmony_ci for (unsigned i = 0; i < sizeof(mask) * CHAR_BIT; i++) { 1446bf215546Sopenharmony_ci if (mask & (1 << i)) 1447bf215546Sopenharmony_ci mark_write_dependency(st, perf, flag_dependency_id(i)); 1448bf215546Sopenharmony_ci } 1449bf215546Sopenharmony_ci } 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_ci /* Mark any SBID dependencies. */ 1452bf215546Sopenharmony_ci if (inst->sched.mode & TGL_SBID_SET) { 1453bf215546Sopenharmony_ci mark_read_dependency(st, perf, tgl_swsb_rd_dependency_id(inst->sched)); 1454bf215546Sopenharmony_ci mark_write_dependency(st, perf, tgl_swsb_wr_dependency_id(inst->sched)); 1455bf215546Sopenharmony_ci } 1456bf215546Sopenharmony_ci } 1457bf215546Sopenharmony_ci 1458bf215546Sopenharmony_ci /** 1459bf215546Sopenharmony_ci * Model the performance behavior of a VEC4 back-end instruction. 1460bf215546Sopenharmony_ci */ 1461bf215546Sopenharmony_ci void 1462bf215546Sopenharmony_ci issue_vec4_instruction(state &st, const struct brw_isa_info *isa, 1463bf215546Sopenharmony_ci const backend_instruction *be_inst) 1464bf215546Sopenharmony_ci { 1465bf215546Sopenharmony_ci const struct intel_device_info *devinfo = isa->devinfo; 1466bf215546Sopenharmony_ci const vec4_instruction *inst = 1467bf215546Sopenharmony_ci static_cast<const vec4_instruction *>(be_inst); 1468bf215546Sopenharmony_ci const instruction_info info(isa, inst); 1469bf215546Sopenharmony_ci const perf_desc perf = instruction_desc(info); 1470bf215546Sopenharmony_ci 1471bf215546Sopenharmony_ci /* Stall on any source dependencies. */ 1472bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) { 1473bf215546Sopenharmony_ci for (unsigned j = 0; j < regs_read(inst, i); j++) 1474bf215546Sopenharmony_ci stall_on_dependency( 1475bf215546Sopenharmony_ci st, reg_dependency_id(devinfo, inst->src[i], j)); 1476bf215546Sopenharmony_ci } 1477bf215546Sopenharmony_ci 1478bf215546Sopenharmony_ci if (inst->reads_accumulator_implicitly()) { 1479bf215546Sopenharmony_ci for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0); 1480bf215546Sopenharmony_ci j <= accum_reg_of_channel(devinfo, inst, info.tx, 1481bf215546Sopenharmony_ci inst->exec_size - 1); j++) 1482bf215546Sopenharmony_ci stall_on_dependency( 1483bf215546Sopenharmony_ci st, reg_dependency_id(devinfo, brw_acc_reg(8), j)); 1484bf215546Sopenharmony_ci } 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci if (inst->base_mrf != -1) { 1487bf215546Sopenharmony_ci for (unsigned j = 0; j < inst->mlen; j++) 1488bf215546Sopenharmony_ci stall_on_dependency( 1489bf215546Sopenharmony_ci st, reg_dependency_id( 1490bf215546Sopenharmony_ci devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j)); 1491bf215546Sopenharmony_ci } 1492bf215546Sopenharmony_ci 1493bf215546Sopenharmony_ci if (inst->reads_flag()) 1494bf215546Sopenharmony_ci stall_on_dependency(st, EU_DEPENDENCY_ID_FLAG0); 1495bf215546Sopenharmony_ci 1496bf215546Sopenharmony_ci /* Stall on any write dependencies. */ 1497bf215546Sopenharmony_ci if (!inst->no_dd_check) { 1498bf215546Sopenharmony_ci if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) { 1499bf215546Sopenharmony_ci for (unsigned j = 0; j < regs_written(inst); j++) 1500bf215546Sopenharmony_ci stall_on_dependency( 1501bf215546Sopenharmony_ci st, reg_dependency_id(devinfo, inst->dst, j)); 1502bf215546Sopenharmony_ci } 1503bf215546Sopenharmony_ci 1504bf215546Sopenharmony_ci if (inst->writes_accumulator_implicitly(devinfo)) { 1505bf215546Sopenharmony_ci for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0); 1506bf215546Sopenharmony_ci j <= accum_reg_of_channel(devinfo, inst, info.tx, 1507bf215546Sopenharmony_ci inst->exec_size - 1); j++) 1508bf215546Sopenharmony_ci stall_on_dependency( 1509bf215546Sopenharmony_ci st, reg_dependency_id(devinfo, brw_acc_reg(8), j)); 1510bf215546Sopenharmony_ci } 1511bf215546Sopenharmony_ci 1512bf215546Sopenharmony_ci if (inst->writes_flag(devinfo)) 1513bf215546Sopenharmony_ci stall_on_dependency(st, EU_DEPENDENCY_ID_FLAG0); 1514bf215546Sopenharmony_ci } 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci /* Execute the instruction. */ 1517bf215546Sopenharmony_ci execute_instruction(st, perf); 1518bf215546Sopenharmony_ci 1519bf215546Sopenharmony_ci /* Mark any source dependencies. */ 1520bf215546Sopenharmony_ci if (inst->is_send_from_grf()) { 1521bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) { 1522bf215546Sopenharmony_ci for (unsigned j = 0; j < regs_read(inst, i); j++) 1523bf215546Sopenharmony_ci mark_read_dependency( 1524bf215546Sopenharmony_ci st, perf, reg_dependency_id(devinfo, inst->src[i], j)); 1525bf215546Sopenharmony_ci } 1526bf215546Sopenharmony_ci } 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci if (inst->base_mrf != -1) { 1529bf215546Sopenharmony_ci for (unsigned j = 0; j < inst->mlen; j++) 1530bf215546Sopenharmony_ci mark_read_dependency(st, perf, 1531bf215546Sopenharmony_ci reg_dependency_id(devinfo, brw_uvec_mrf(8, inst->base_mrf, 0), j)); 1532bf215546Sopenharmony_ci } 1533bf215546Sopenharmony_ci 1534bf215546Sopenharmony_ci /* Mark any destination dependencies. */ 1535bf215546Sopenharmony_ci if (inst->dst.file != BAD_FILE && !inst->dst.is_null()) { 1536bf215546Sopenharmony_ci for (unsigned j = 0; j < regs_written(inst); j++) { 1537bf215546Sopenharmony_ci mark_write_dependency(st, perf, 1538bf215546Sopenharmony_ci reg_dependency_id(devinfo, inst->dst, j)); 1539bf215546Sopenharmony_ci } 1540bf215546Sopenharmony_ci } 1541bf215546Sopenharmony_ci 1542bf215546Sopenharmony_ci if (inst->writes_accumulator_implicitly(devinfo)) { 1543bf215546Sopenharmony_ci for (unsigned j = accum_reg_of_channel(devinfo, inst, info.tx, 0); 1544bf215546Sopenharmony_ci j <= accum_reg_of_channel(devinfo, inst, info.tx, 1545bf215546Sopenharmony_ci inst->exec_size - 1); j++) 1546bf215546Sopenharmony_ci mark_write_dependency(st, perf, 1547bf215546Sopenharmony_ci reg_dependency_id(devinfo, brw_acc_reg(8), j)); 1548bf215546Sopenharmony_ci } 1549bf215546Sopenharmony_ci 1550bf215546Sopenharmony_ci if (inst->writes_flag(devinfo)) 1551bf215546Sopenharmony_ci mark_write_dependency(st, perf, EU_DEPENDENCY_ID_FLAG0); 1552bf215546Sopenharmony_ci } 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci /** 1555bf215546Sopenharmony_ci * Calculate the maximum possible throughput of the program compatible with 1556bf215546Sopenharmony_ci * the cycle-count utilization estimated for each asynchronous unit, in 1557bf215546Sopenharmony_ci * threads-per-cycle units. 1558bf215546Sopenharmony_ci */ 1559bf215546Sopenharmony_ci float 1560bf215546Sopenharmony_ci calculate_thread_throughput(const state &st, float busy) 1561bf215546Sopenharmony_ci { 1562bf215546Sopenharmony_ci for (unsigned i = 0; i < EU_NUM_UNITS; i++) 1563bf215546Sopenharmony_ci busy = MAX2(busy, st.unit_busy[i]); 1564bf215546Sopenharmony_ci 1565bf215546Sopenharmony_ci return 1.0 / busy; 1566bf215546Sopenharmony_ci } 1567bf215546Sopenharmony_ci 1568bf215546Sopenharmony_ci /** 1569bf215546Sopenharmony_ci * Estimate the performance of the specified shader. 1570bf215546Sopenharmony_ci */ 1571bf215546Sopenharmony_ci void 1572bf215546Sopenharmony_ci calculate_performance(performance &p, const backend_shader *s, 1573bf215546Sopenharmony_ci void (*issue_instruction)( 1574bf215546Sopenharmony_ci state &, const struct brw_isa_info *, 1575bf215546Sopenharmony_ci const backend_instruction *), 1576bf215546Sopenharmony_ci unsigned dispatch_width) 1577bf215546Sopenharmony_ci { 1578bf215546Sopenharmony_ci /* XXX - Note that the previous version of this code used worst-case 1579bf215546Sopenharmony_ci * scenario estimation of branching divergence for SIMD32 shaders, 1580bf215546Sopenharmony_ci * but this heuristic was removed to improve performance in common 1581bf215546Sopenharmony_ci * scenarios. Wider shader variants are less optimal when divergence 1582bf215546Sopenharmony_ci * is high, e.g. when application renders complex scene on a small 1583bf215546Sopenharmony_ci * surface. It is assumed that such renders are short, so their 1584bf215546Sopenharmony_ci * time doesn't matter and when it comes to the overall performance, 1585bf215546Sopenharmony_ci * they are dominated by more optimal larger renders. 1586bf215546Sopenharmony_ci * 1587bf215546Sopenharmony_ci * It's possible that we could do better with divergence analysis 1588bf215546Sopenharmony_ci * by isolating branches which are 100% uniform. 1589bf215546Sopenharmony_ci * 1590bf215546Sopenharmony_ci * Plumbing the trip counts from NIR loop analysis would allow us 1591bf215546Sopenharmony_ci * to do a better job regarding the loop weights. 1592bf215546Sopenharmony_ci * 1593bf215546Sopenharmony_ci * In the meantime use values that roughly match the control flow 1594bf215546Sopenharmony_ci * weights used elsewhere in the compiler back-end. 1595bf215546Sopenharmony_ci * 1596bf215546Sopenharmony_ci * Note that we provide slightly more pessimistic weights on 1597bf215546Sopenharmony_ci * Gfx12+ for SIMD32, since the effective warp size on that 1598bf215546Sopenharmony_ci * platform is 2x the SIMD width due to EU fusion, which increases 1599bf215546Sopenharmony_ci * the likelihood of divergent control flow in comparison to 1600bf215546Sopenharmony_ci * previous generations, giving narrower SIMD modes a performance 1601bf215546Sopenharmony_ci * advantage in several test-cases with non-uniform discard jumps. 1602bf215546Sopenharmony_ci */ 1603bf215546Sopenharmony_ci const float discard_weight = (dispatch_width > 16 || s->devinfo->ver < 12 ? 1604bf215546Sopenharmony_ci 1.0 : 0.5); 1605bf215546Sopenharmony_ci const float loop_weight = 10; 1606bf215546Sopenharmony_ci unsigned halt_count = 0; 1607bf215546Sopenharmony_ci unsigned elapsed = 0; 1608bf215546Sopenharmony_ci state st; 1609bf215546Sopenharmony_ci 1610bf215546Sopenharmony_ci foreach_block(block, s->cfg) { 1611bf215546Sopenharmony_ci const unsigned elapsed0 = elapsed; 1612bf215546Sopenharmony_ci 1613bf215546Sopenharmony_ci foreach_inst_in_block(backend_instruction, inst, block) { 1614bf215546Sopenharmony_ci const unsigned clock0 = st.unit_ready[EU_UNIT_FE]; 1615bf215546Sopenharmony_ci 1616bf215546Sopenharmony_ci issue_instruction(st, &s->compiler->isa, inst); 1617bf215546Sopenharmony_ci 1618bf215546Sopenharmony_ci if (inst->opcode == SHADER_OPCODE_HALT_TARGET && halt_count) 1619bf215546Sopenharmony_ci st.weight /= discard_weight; 1620bf215546Sopenharmony_ci 1621bf215546Sopenharmony_ci elapsed += (st.unit_ready[EU_UNIT_FE] - clock0) * st.weight; 1622bf215546Sopenharmony_ci 1623bf215546Sopenharmony_ci if (inst->opcode == BRW_OPCODE_DO) 1624bf215546Sopenharmony_ci st.weight *= loop_weight; 1625bf215546Sopenharmony_ci else if (inst->opcode == BRW_OPCODE_WHILE) 1626bf215546Sopenharmony_ci st.weight /= loop_weight; 1627bf215546Sopenharmony_ci else if (inst->opcode == BRW_OPCODE_HALT && !halt_count++) 1628bf215546Sopenharmony_ci st.weight *= discard_weight; 1629bf215546Sopenharmony_ci } 1630bf215546Sopenharmony_ci 1631bf215546Sopenharmony_ci p.block_latency[block->num] = elapsed - elapsed0; 1632bf215546Sopenharmony_ci } 1633bf215546Sopenharmony_ci 1634bf215546Sopenharmony_ci p.latency = elapsed; 1635bf215546Sopenharmony_ci p.throughput = dispatch_width * calculate_thread_throughput(st, elapsed); 1636bf215546Sopenharmony_ci } 1637bf215546Sopenharmony_ci} 1638bf215546Sopenharmony_ci 1639bf215546Sopenharmony_cibrw::performance::performance(const fs_visitor *v) : 1640bf215546Sopenharmony_ci block_latency(new unsigned[v->cfg->num_blocks]) 1641bf215546Sopenharmony_ci{ 1642bf215546Sopenharmony_ci calculate_performance(*this, v, issue_fs_inst, v->dispatch_width); 1643bf215546Sopenharmony_ci} 1644bf215546Sopenharmony_ci 1645bf215546Sopenharmony_cibrw::performance::performance(const vec4_visitor *v) : 1646bf215546Sopenharmony_ci block_latency(new unsigned[v->cfg->num_blocks]) 1647bf215546Sopenharmony_ci{ 1648bf215546Sopenharmony_ci calculate_performance(*this, v, issue_vec4_instruction, 8); 1649bf215546Sopenharmony_ci} 1650bf215546Sopenharmony_ci 1651bf215546Sopenharmony_cibrw::performance::~performance() 1652bf215546Sopenharmony_ci{ 1653bf215546Sopenharmony_ci delete[] block_latency; 1654bf215546Sopenharmony_ci} 1655