1/* -*- c++ -*- */ 2/* 3 * Copyright © 2011-2015 Intel Corporation 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 */ 24 25#ifndef BRW_IR_VEC4_H 26#define BRW_IR_VEC4_H 27 28#include "brw_shader.h" 29 30namespace brw { 31 32class dst_reg; 33 34class src_reg : public backend_reg 35{ 36public: 37 DECLARE_RALLOC_CXX_OPERATORS(src_reg) 38 39 void init(); 40 41 src_reg(enum brw_reg_file file, int nr, const glsl_type *type); 42 src_reg(); 43 src_reg(struct ::brw_reg reg); 44 45 bool equals(const src_reg &r) const; 46 bool negative_equals(const src_reg &r) const; 47 48 src_reg(class vec4_visitor *v, const struct glsl_type *type); 49 src_reg(class vec4_visitor *v, const struct glsl_type *type, int size); 50 51 explicit src_reg(const dst_reg ®); 52 53 src_reg *reladdr; 54}; 55 56static inline src_reg 57retype(src_reg reg, enum brw_reg_type type) 58{ 59 reg.type = type; 60 return reg; 61} 62 63namespace detail { 64 65static inline void 66add_byte_offset(backend_reg *reg, unsigned bytes) 67{ 68 switch (reg->file) { 69 case BAD_FILE: 70 break; 71 case VGRF: 72 case ATTR: 73 case UNIFORM: 74 reg->offset += bytes; 75 assert(reg->offset % 16 == 0); 76 break; 77 case MRF: { 78 const unsigned suboffset = reg->offset + bytes; 79 reg->nr += suboffset / REG_SIZE; 80 reg->offset = suboffset % REG_SIZE; 81 assert(reg->offset % 16 == 0); 82 break; 83 } 84 case ARF: 85 case FIXED_GRF: { 86 const unsigned suboffset = reg->subnr + bytes; 87 reg->nr += suboffset / REG_SIZE; 88 reg->subnr = suboffset % REG_SIZE; 89 assert(reg->subnr % 16 == 0); 90 break; 91 } 92 default: 93 assert(bytes == 0); 94 } 95} 96 97} /* namespace detail */ 98 99static inline src_reg 100byte_offset(src_reg reg, unsigned bytes) 101{ 102 detail::add_byte_offset(®, bytes); 103 return reg; 104} 105 106static inline src_reg 107offset(src_reg reg, unsigned width, unsigned delta) 108{ 109 const unsigned stride = (reg.file == UNIFORM ? 0 : 4); 110 const unsigned num_components = MAX2(width / 4 * stride, 4); 111 return byte_offset(reg, num_components * type_sz(reg.type) * delta); 112} 113 114static inline src_reg 115horiz_offset(src_reg reg, unsigned delta) 116{ 117 return byte_offset(reg, delta * type_sz(reg.type)); 118} 119 120/** 121 * Reswizzle a given source register. 122 * \sa brw_swizzle(). 123 */ 124static inline src_reg 125swizzle(src_reg reg, unsigned swizzle) 126{ 127 if (reg.file == IMM) 128 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swizzle); 129 else 130 reg.swizzle = brw_compose_swizzle(swizzle, reg.swizzle); 131 132 return reg; 133} 134 135static inline src_reg 136negate(src_reg reg) 137{ 138 assert(reg.file != IMM); 139 reg.negate = !reg.negate; 140 return reg; 141} 142 143static inline bool 144is_uniform(const src_reg ®) 145{ 146 return (reg.file == IMM || reg.file == UNIFORM || reg.is_null()) && 147 (!reg.reladdr || is_uniform(*reg.reladdr)); 148} 149 150class dst_reg : public backend_reg 151{ 152public: 153 DECLARE_RALLOC_CXX_OPERATORS(dst_reg) 154 155 void init(); 156 157 dst_reg(); 158 dst_reg(enum brw_reg_file file, int nr); 159 dst_reg(enum brw_reg_file file, int nr, const glsl_type *type, 160 unsigned writemask); 161 dst_reg(enum brw_reg_file file, int nr, brw_reg_type type, 162 unsigned writemask); 163 dst_reg(struct ::brw_reg reg); 164 dst_reg(class vec4_visitor *v, const struct glsl_type *type); 165 166 explicit dst_reg(const src_reg ®); 167 168 bool equals(const dst_reg &r) const; 169 170 src_reg *reladdr; 171}; 172 173static inline dst_reg 174retype(dst_reg reg, enum brw_reg_type type) 175{ 176 reg.type = type; 177 return reg; 178} 179 180static inline dst_reg 181byte_offset(dst_reg reg, unsigned bytes) 182{ 183 detail::add_byte_offset(®, bytes); 184 return reg; 185} 186 187static inline dst_reg 188offset(dst_reg reg, unsigned width, unsigned delta) 189{ 190 const unsigned stride = (reg.file == UNIFORM ? 0 : 4); 191 const unsigned num_components = MAX2(width / 4 * stride, 4); 192 return byte_offset(reg, num_components * type_sz(reg.type) * delta); 193} 194 195static inline dst_reg 196horiz_offset(const dst_reg ®, unsigned delta) 197{ 198 if (is_uniform(src_reg(reg))) 199 return reg; 200 else 201 return byte_offset(reg, delta * type_sz(reg.type)); 202} 203 204static inline dst_reg 205writemask(dst_reg reg, unsigned mask) 206{ 207 assert(reg.file != IMM); 208 assert((reg.writemask & mask) != 0); 209 reg.writemask &= mask; 210 return reg; 211} 212 213/** 214 * Return an integer identifying the discrete address space a register is 215 * contained in. A register is by definition fully contained in the single 216 * reg_space it belongs to, so two registers with different reg_space ids are 217 * guaranteed not to overlap. Most register files are a single reg_space of 218 * its own, only the VGRF file is composed of multiple discrete address 219 * spaces, one for each VGRF allocation. 220 */ 221static inline uint32_t 222reg_space(const backend_reg &r) 223{ 224 return r.file << 16 | (r.file == VGRF ? r.nr : 0); 225} 226 227/** 228 * Return the base offset in bytes of a register relative to the start of its 229 * reg_space(). 230 */ 231static inline unsigned 232reg_offset(const backend_reg &r) 233{ 234 return (r.file == VGRF || r.file == IMM ? 0 : r.nr) * 235 (r.file == UNIFORM ? 16 : REG_SIZE) + r.offset + 236 (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0); 237} 238 239/** 240 * Return whether the register region starting at \p r and spanning \p dr 241 * bytes could potentially overlap the register region starting at \p s and 242 * spanning \p ds bytes. 243 */ 244static inline bool 245regions_overlap(const backend_reg &r, unsigned dr, 246 const backend_reg &s, unsigned ds) 247{ 248 if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) { 249 /* COMPR4 regions are translated by the hardware during decompression 250 * into two separate half-regions 4 MRFs apart from each other. 251 */ 252 backend_reg t0 = r; 253 t0.nr &= ~BRW_MRF_COMPR4; 254 backend_reg t1 = t0; 255 t1.offset += 4 * REG_SIZE; 256 return regions_overlap(t0, dr / 2, s, ds) || 257 regions_overlap(t1, dr / 2, s, ds); 258 259 } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) { 260 return regions_overlap(s, ds, r, dr); 261 262 } else { 263 return reg_space(r) == reg_space(s) && 264 !(reg_offset(r) + dr <= reg_offset(s) || 265 reg_offset(s) + ds <= reg_offset(r)); 266 } 267} 268 269class vec4_instruction : public backend_instruction { 270public: 271 DECLARE_RALLOC_CXX_OPERATORS(vec4_instruction) 272 273 vec4_instruction(enum opcode opcode, 274 const dst_reg &dst = dst_reg(), 275 const src_reg &src0 = src_reg(), 276 const src_reg &src1 = src_reg(), 277 const src_reg &src2 = src_reg()); 278 279 dst_reg dst; 280 src_reg src[3]; 281 282 enum brw_urb_write_flags urb_write_flags; 283 284 unsigned sol_binding; /**< gfx6: SOL binding table index */ 285 bool sol_final_write; /**< gfx6: send commit message */ 286 unsigned sol_vertex; /**< gfx6: used for setting dst index in SVB header */ 287 288 bool is_send_from_grf() const; 289 unsigned size_read(unsigned arg) const; 290 bool can_reswizzle(const struct intel_device_info *devinfo, 291 int dst_writemask, 292 int swizzle, int swizzle_mask); 293 void reswizzle(int dst_writemask, int swizzle); 294 bool can_do_source_mods(const struct intel_device_info *devinfo); 295 bool can_do_cmod(); 296 bool can_do_writemask(const struct intel_device_info *devinfo); 297 bool can_change_types() const; 298 bool has_source_and_destination_hazard() const; 299 unsigned implied_mrf_writes() const; 300 301 bool is_align1_partial_write() 302 { 303 return opcode == VEC4_OPCODE_SET_LOW_32BIT || 304 opcode == VEC4_OPCODE_SET_HIGH_32BIT; 305 } 306 307 bool reads_flag() const 308 { 309 return predicate || opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2; 310 } 311 312 bool reads_flag(unsigned c) 313 { 314 if (opcode == VS_OPCODE_UNPACK_FLAGS_SIMD4X2) 315 return true; 316 317 switch (predicate) { 318 case BRW_PREDICATE_NONE: 319 return false; 320 case BRW_PREDICATE_ALIGN16_REPLICATE_X: 321 return c == 0; 322 case BRW_PREDICATE_ALIGN16_REPLICATE_Y: 323 return c == 1; 324 case BRW_PREDICATE_ALIGN16_REPLICATE_Z: 325 return c == 2; 326 case BRW_PREDICATE_ALIGN16_REPLICATE_W: 327 return c == 3; 328 default: 329 return true; 330 } 331 } 332 333 bool writes_flag(const intel_device_info *devinfo) const 334 { 335 return (conditional_mod && ((opcode != BRW_OPCODE_SEL || devinfo->ver <= 5) && 336 opcode != BRW_OPCODE_CSEL && 337 opcode != BRW_OPCODE_IF && 338 opcode != BRW_OPCODE_WHILE)); 339 } 340 341 bool reads_g0_implicitly() const 342 { 343 switch (opcode) { 344 case SHADER_OPCODE_TEX: 345 case SHADER_OPCODE_TXL: 346 case SHADER_OPCODE_TXD: 347 case SHADER_OPCODE_TXF: 348 case SHADER_OPCODE_TXF_CMS_W: 349 case SHADER_OPCODE_TXF_CMS: 350 case SHADER_OPCODE_TXF_MCS: 351 case SHADER_OPCODE_TXS: 352 case SHADER_OPCODE_TG4: 353 case SHADER_OPCODE_TG4_OFFSET: 354 case SHADER_OPCODE_SAMPLEINFO: 355 case VS_OPCODE_PULL_CONSTANT_LOAD: 356 case GS_OPCODE_SET_PRIMITIVE_ID: 357 case GS_OPCODE_GET_INSTANCE_ID: 358 case SHADER_OPCODE_GFX4_SCRATCH_READ: 359 case SHADER_OPCODE_GFX4_SCRATCH_WRITE: 360 return true; 361 default: 362 return false; 363 } 364 } 365}; 366 367/** 368 * Make the execution of \p inst dependent on the evaluation of a possibly 369 * inverted predicate. 370 */ 371inline vec4_instruction * 372set_predicate_inv(enum brw_predicate pred, bool inverse, 373 vec4_instruction *inst) 374{ 375 inst->predicate = pred; 376 inst->predicate_inverse = inverse; 377 return inst; 378} 379 380/** 381 * Make the execution of \p inst dependent on the evaluation of a predicate. 382 */ 383inline vec4_instruction * 384set_predicate(enum brw_predicate pred, vec4_instruction *inst) 385{ 386 return set_predicate_inv(pred, false, inst); 387} 388 389/** 390 * Write the result of evaluating the condition given by \p mod to a flag 391 * register. 392 */ 393inline vec4_instruction * 394set_condmod(enum brw_conditional_mod mod, vec4_instruction *inst) 395{ 396 inst->conditional_mod = mod; 397 return inst; 398} 399 400/** 401 * Clamp the result of \p inst to the saturation range of its destination 402 * datatype. 403 */ 404inline vec4_instruction * 405set_saturate(bool saturate, vec4_instruction *inst) 406{ 407 inst->saturate = saturate; 408 return inst; 409} 410 411/** 412 * Return the number of dataflow registers written by the instruction (either 413 * fully or partially) counted from 'floor(reg_offset(inst->dst) / 414 * register_size)'. The somewhat arbitrary register size unit is 16B for the 415 * UNIFORM and IMM files and 32B for all other files. 416 */ 417inline unsigned 418regs_written(const vec4_instruction *inst) 419{ 420 assert(inst->dst.file != UNIFORM && inst->dst.file != IMM); 421 return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE + inst->size_written, 422 REG_SIZE); 423} 424 425/** 426 * Return the number of dataflow registers read by the instruction (either 427 * fully or partially) counted from 'floor(reg_offset(inst->src[i]) / 428 * register_size)'. The somewhat arbitrary register size unit is 16B for the 429 * UNIFORM and IMM files and 32B for all other files. 430 */ 431inline unsigned 432regs_read(const vec4_instruction *inst, unsigned i) 433{ 434 const unsigned reg_size = 435 inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 16 : REG_SIZE; 436 return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size + inst->size_read(i), 437 reg_size); 438} 439 440static inline enum brw_reg_type 441get_exec_type(const vec4_instruction *inst) 442{ 443 enum brw_reg_type exec_type = BRW_REGISTER_TYPE_B; 444 445 for (int i = 0; i < 3; i++) { 446 if (inst->src[i].file != BAD_FILE) { 447 const brw_reg_type t = get_exec_type(brw_reg_type(inst->src[i].type)); 448 if (type_sz(t) > type_sz(exec_type)) 449 exec_type = t; 450 else if (type_sz(t) == type_sz(exec_type) && 451 brw_reg_type_is_floating_point(t)) 452 exec_type = t; 453 } 454 } 455 456 if (exec_type == BRW_REGISTER_TYPE_B) 457 exec_type = inst->dst.type; 458 459 /* TODO: We need to handle half-float conversions. */ 460 assert(exec_type != BRW_REGISTER_TYPE_HF || 461 inst->dst.type == BRW_REGISTER_TYPE_HF); 462 assert(exec_type != BRW_REGISTER_TYPE_B); 463 464 return exec_type; 465} 466 467static inline unsigned 468get_exec_type_size(const vec4_instruction *inst) 469{ 470 return type_sz(get_exec_type(inst)); 471} 472 473} /* namespace brw */ 474 475#endif 476