1/* 2 Copyright (C) Intel Corp. 2006. All Rights Reserved. 3 Intel funded Tungsten Graphics to 4 develop this 3D driver. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice (including the 15 next paragraph) shall be included in all copies or substantial 16 portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 26 **********************************************************************/ 27 /* 28 * Authors: 29 * Keith Whitwell <keithw@vmware.com> 30 */ 31 32/** @file brw_reg.h 33 * 34 * This file defines struct brw_reg, which is our representation for EU 35 * registers. They're not a hardware specific format, just an abstraction 36 * that intends to capture the full flexibility of the hardware registers. 37 * 38 * The brw_eu_emit.c layer's brw_set_dest/brw_set_src[01] functions encode 39 * the abstract brw_reg type into the actual hardware instruction encoding. 40 */ 41 42#ifndef BRW_REG_H 43#define BRW_REG_H 44 45#include <stdbool.h> 46#include "util/compiler.h" 47#include "main/macros.h" 48#include "program/prog_instruction.h" 49#include "brw_eu_defines.h" 50#include "brw_reg_type.h" 51 52#ifdef __cplusplus 53extern "C" { 54#endif 55 56struct intel_device_info; 57 58/** Number of general purpose registers (VS, WM, etc) */ 59#define BRW_MAX_GRF 128 60 61/** 62 * First GRF used for the MRF hack. 63 * 64 * On gfx7, MRFs are no longer used, and contiguous GRFs are used instead. We 65 * haven't converted our compiler to be aware of this, so it asks for MRFs and 66 * brw_eu_emit.c quietly converts them to be accesses of the top GRFs. The 67 * register allocators have to be careful of this to avoid corrupting the "MRF"s 68 * with actual GRF allocations. 69 */ 70#define GFX7_MRF_HACK_START 112 71 72/** Number of message register file registers */ 73#define BRW_MAX_MRF(gen) (gen == 6 ? 24 : 16) 74 75#define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6)) 76#define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3) 77 78#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3) 79#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3) 80#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0) 81#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1) 82#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2) 83#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3) 84#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1) 85#define BRW_SWIZZLE_YXYX BRW_SWIZZLE4(1,0,1,0) 86#define BRW_SWIZZLE_XZXZ BRW_SWIZZLE4(0,2,0,2) 87#define BRW_SWIZZLE_YZXW BRW_SWIZZLE4(1,2,0,3) 88#define BRW_SWIZZLE_YWYW BRW_SWIZZLE4(1,3,1,3) 89#define BRW_SWIZZLE_ZXYW BRW_SWIZZLE4(2,0,1,3) 90#define BRW_SWIZZLE_ZWZW BRW_SWIZZLE4(2,3,2,3) 91#define BRW_SWIZZLE_WZWZ BRW_SWIZZLE4(3,2,3,2) 92#define BRW_SWIZZLE_WZYX BRW_SWIZZLE4(3,2,1,0) 93#define BRW_SWIZZLE_XXZZ BRW_SWIZZLE4(0,0,2,2) 94#define BRW_SWIZZLE_YYWW BRW_SWIZZLE4(1,1,3,3) 95#define BRW_SWIZZLE_YXWZ BRW_SWIZZLE4(1,0,3,2) 96 97#define BRW_SWZ_COMP_INPUT(comp) (BRW_SWIZZLE_XYZW >> ((comp)*2)) 98#define BRW_SWZ_COMP_OUTPUT(comp) (BRW_SWIZZLE_XYZW << ((comp)*2)) 99 100static inline bool 101brw_is_single_value_swizzle(unsigned swiz) 102{ 103 return (swiz == BRW_SWIZZLE_XXXX || 104 swiz == BRW_SWIZZLE_YYYY || 105 swiz == BRW_SWIZZLE_ZZZZ || 106 swiz == BRW_SWIZZLE_WWWW); 107} 108 109/** 110 * Compute the swizzle obtained from the application of \p swz0 on the result 111 * of \p swz1. The argument ordering is expected to match function 112 * composition. 113 */ 114static inline unsigned 115brw_compose_swizzle(unsigned swz0, unsigned swz1) 116{ 117 return BRW_SWIZZLE4( 118 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 0)), 119 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 1)), 120 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 2)), 121 BRW_GET_SWZ(swz1, BRW_GET_SWZ(swz0, 3))); 122} 123 124/** 125 * Return the result of applying swizzle \p swz to shuffle the bits of \p mask 126 * (AKA image). 127 */ 128static inline unsigned 129brw_apply_swizzle_to_mask(unsigned swz, unsigned mask) 130{ 131 unsigned result = 0; 132 133 for (unsigned i = 0; i < 4; i++) { 134 if (mask & (1 << BRW_GET_SWZ(swz, i))) 135 result |= 1 << i; 136 } 137 138 return result; 139} 140 141/** 142 * Return the result of applying the inverse of swizzle \p swz to shuffle the 143 * bits of \p mask (AKA preimage). Useful to find out which components are 144 * read from a swizzled source given the instruction writemask. 145 */ 146static inline unsigned 147brw_apply_inv_swizzle_to_mask(unsigned swz, unsigned mask) 148{ 149 unsigned result = 0; 150 151 for (unsigned i = 0; i < 4; i++) { 152 if (mask & (1 << i)) 153 result |= 1 << BRW_GET_SWZ(swz, i); 154 } 155 156 return result; 157} 158 159/** 160 * Construct an identity swizzle for the set of enabled channels given by \p 161 * mask. The result will only reference channels enabled in the provided \p 162 * mask, assuming that \p mask is non-zero. The constructed swizzle will 163 * satisfy the property that for any instruction OP and any mask: 164 * 165 * brw_OP(p, brw_writemask(dst, mask), 166 * brw_swizzle(src, brw_swizzle_for_mask(mask))); 167 * 168 * will be equivalent to the same instruction without swizzle: 169 * 170 * brw_OP(p, brw_writemask(dst, mask), src); 171 */ 172static inline unsigned 173brw_swizzle_for_mask(unsigned mask) 174{ 175 unsigned last = (mask ? ffs(mask) - 1 : 0); 176 unsigned swz[4]; 177 178 for (unsigned i = 0; i < 4; i++) 179 last = swz[i] = (mask & (1 << i) ? i : last); 180 181 return BRW_SWIZZLE4(swz[0], swz[1], swz[2], swz[3]); 182} 183 184/** 185 * Construct an identity swizzle for the first \p n components of a vector. 186 * When only a subset of channels of a vec4 are used we don't want to 187 * reference the other channels, as that will tell optimization passes that 188 * those other channels are used. 189 */ 190static inline unsigned 191brw_swizzle_for_size(unsigned n) 192{ 193 return brw_swizzle_for_mask((1 << n) - 1); 194} 195 196/** 197 * Converse of brw_swizzle_for_mask(). Returns the mask of components 198 * accessed by the specified swizzle \p swz. 199 */ 200static inline unsigned 201brw_mask_for_swizzle(unsigned swz) 202{ 203 return brw_apply_inv_swizzle_to_mask(swz, ~0); 204} 205 206uint32_t brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz); 207 208#define REG_SIZE (8*4) 209 210/* These aren't hardware structs, just something useful for us to pass around: 211 * 212 * Align1 operation has a lot of control over input ranges. Used in 213 * WM programs to implement shaders decomposed into "channel serial" 214 * or "structure of array" form: 215 */ 216struct brw_reg { 217 union { 218 struct { 219 enum brw_reg_type type:4; 220 enum brw_reg_file file:3; /* :2 hardware format */ 221 unsigned negate:1; /* source only */ 222 unsigned abs:1; /* source only */ 223 unsigned address_mode:1; /* relative addressing, hopefully! */ 224 unsigned pad0:17; 225 unsigned subnr:5; /* :1 in align16 */ 226 }; 227 uint32_t bits; 228 }; 229 230 union { 231 struct { 232 unsigned nr; 233 unsigned swizzle:8; /* src only, align16 only */ 234 unsigned writemask:4; /* dest only, align16 only */ 235 int indirect_offset:10; /* relative addressing offset */ 236 unsigned vstride:4; /* source only */ 237 unsigned width:3; /* src only, align1 only */ 238 unsigned hstride:2; /* align1 only */ 239 unsigned pad1:1; 240 }; 241 242 double df; 243 uint64_t u64; 244 int64_t d64; 245 float f; 246 int d; 247 unsigned ud; 248 }; 249}; 250 251static inline bool 252brw_regs_equal(const struct brw_reg *a, const struct brw_reg *b) 253{ 254 return a->bits == b->bits && a->u64 == b->u64; 255} 256 257static inline bool 258brw_regs_negative_equal(const struct brw_reg *a, const struct brw_reg *b) 259{ 260 if (a->file == IMM) { 261 if (a->bits != b->bits) 262 return false; 263 264 switch ((enum brw_reg_type) a->type) { 265 case BRW_REGISTER_TYPE_UQ: 266 case BRW_REGISTER_TYPE_Q: 267 return a->d64 == -b->d64; 268 case BRW_REGISTER_TYPE_DF: 269 return a->df == -b->df; 270 case BRW_REGISTER_TYPE_UD: 271 case BRW_REGISTER_TYPE_D: 272 return a->d == -b->d; 273 case BRW_REGISTER_TYPE_F: 274 return a->f == -b->f; 275 case BRW_REGISTER_TYPE_VF: 276 /* It is tempting to treat 0 as a negation of 0 (and -0 as a negation 277 * of -0). There are occasions where 0 or -0 is used and the exact 278 * bit pattern is desired. At the very least, changing this to allow 279 * 0 as a negation of 0 causes some fp64 tests to fail on IVB. 280 */ 281 return a->ud == (b->ud ^ 0x80808080); 282 case BRW_REGISTER_TYPE_UW: 283 case BRW_REGISTER_TYPE_W: 284 case BRW_REGISTER_TYPE_UV: 285 case BRW_REGISTER_TYPE_V: 286 case BRW_REGISTER_TYPE_HF: 287 /* FINISHME: Implement support for these types once there is 288 * something in the compiler that can generate them. Until then, 289 * they cannot be tested. 290 */ 291 return false; 292 case BRW_REGISTER_TYPE_UB: 293 case BRW_REGISTER_TYPE_B: 294 case BRW_REGISTER_TYPE_NF: 295 default: 296 unreachable("not reached"); 297 } 298 } else { 299 struct brw_reg tmp = *a; 300 301 tmp.negate = !tmp.negate; 302 303 return brw_regs_equal(&tmp, b); 304 } 305} 306 307struct brw_indirect { 308 unsigned addr_subnr:4; 309 int addr_offset:10; 310 unsigned pad:18; 311}; 312 313 314static inline unsigned 315type_sz(unsigned type) 316{ 317 switch(type) { 318 case BRW_REGISTER_TYPE_UQ: 319 case BRW_REGISTER_TYPE_Q: 320 case BRW_REGISTER_TYPE_DF: 321 case BRW_REGISTER_TYPE_NF: 322 return 8; 323 case BRW_REGISTER_TYPE_UD: 324 case BRW_REGISTER_TYPE_D: 325 case BRW_REGISTER_TYPE_F: 326 case BRW_REGISTER_TYPE_VF: 327 return 4; 328 case BRW_REGISTER_TYPE_UW: 329 case BRW_REGISTER_TYPE_W: 330 case BRW_REGISTER_TYPE_HF: 331 /* [U]V components are 4-bit, but HW unpacks them to 16-bit (2 bytes) */ 332 case BRW_REGISTER_TYPE_UV: 333 case BRW_REGISTER_TYPE_V: 334 return 2; 335 case BRW_REGISTER_TYPE_UB: 336 case BRW_REGISTER_TYPE_B: 337 return 1; 338 default: 339 unreachable("not reached"); 340 } 341} 342 343static inline enum brw_reg_type 344get_exec_type(const enum brw_reg_type type) 345{ 346 switch (type) { 347 case BRW_REGISTER_TYPE_B: 348 case BRW_REGISTER_TYPE_V: 349 return BRW_REGISTER_TYPE_W; 350 case BRW_REGISTER_TYPE_UB: 351 case BRW_REGISTER_TYPE_UV: 352 return BRW_REGISTER_TYPE_UW; 353 case BRW_REGISTER_TYPE_VF: 354 return BRW_REGISTER_TYPE_F; 355 default: 356 return type; 357 } 358} 359 360/** 361 * Return an integer type of the requested size and signedness. 362 */ 363static inline enum brw_reg_type 364brw_int_type(unsigned sz, bool is_signed) 365{ 366 switch (sz) { 367 case 1: 368 return (is_signed ? BRW_REGISTER_TYPE_B : BRW_REGISTER_TYPE_UB); 369 case 2: 370 return (is_signed ? BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW); 371 case 4: 372 return (is_signed ? BRW_REGISTER_TYPE_D : BRW_REGISTER_TYPE_UD); 373 case 8: 374 return (is_signed ? BRW_REGISTER_TYPE_Q : BRW_REGISTER_TYPE_UQ); 375 default: 376 unreachable("Not reached."); 377 } 378} 379 380/** 381 * Construct a brw_reg. 382 * \param file one of the BRW_x_REGISTER_FILE values 383 * \param nr register number/index 384 * \param subnr register sub number 385 * \param negate register negate modifier 386 * \param abs register abs modifier 387 * \param type one of BRW_REGISTER_TYPE_x 388 * \param vstride one of BRW_VERTICAL_STRIDE_x 389 * \param width one of BRW_WIDTH_x 390 * \param hstride one of BRW_HORIZONTAL_STRIDE_x 391 * \param swizzle one of BRW_SWIZZLE_x 392 * \param writemask WRITEMASK_X/Y/Z/W bitfield 393 */ 394static inline struct brw_reg 395brw_reg(enum brw_reg_file file, 396 unsigned nr, 397 unsigned subnr, 398 unsigned negate, 399 unsigned abs, 400 enum brw_reg_type type, 401 unsigned vstride, 402 unsigned width, 403 unsigned hstride, 404 unsigned swizzle, 405 unsigned writemask) 406{ 407 struct brw_reg reg; 408 if (file == BRW_GENERAL_REGISTER_FILE) 409 assert(nr < BRW_MAX_GRF); 410 else if (file == BRW_ARCHITECTURE_REGISTER_FILE) 411 assert(nr <= BRW_ARF_TIMESTAMP); 412 /* Asserting on the MRF register number requires to know the hardware gen 413 * (gfx6 has 24 MRF registers), which we don't know here, so we assert 414 * for that in the generators and in brw_eu_emit.c 415 */ 416 417 reg.type = type; 418 reg.file = file; 419 reg.negate = negate; 420 reg.abs = abs; 421 reg.address_mode = BRW_ADDRESS_DIRECT; 422 reg.pad0 = 0; 423 reg.subnr = subnr * type_sz(type); 424 reg.nr = nr; 425 426 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to 427 * set swizzle and writemask to W, as the lower bits of subnr will 428 * be lost when converted to align16. This is probably too much to 429 * keep track of as you'd want it adjusted by suboffset(), etc. 430 * Perhaps fix up when converting to align16? 431 */ 432 reg.swizzle = swizzle; 433 reg.writemask = writemask; 434 reg.indirect_offset = 0; 435 reg.vstride = vstride; 436 reg.width = width; 437 reg.hstride = hstride; 438 reg.pad1 = 0; 439 return reg; 440} 441 442/** Construct float[16] register */ 443static inline struct brw_reg 444brw_vec16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 445{ 446 return brw_reg(file, 447 nr, 448 subnr, 449 0, 450 0, 451 BRW_REGISTER_TYPE_F, 452 BRW_VERTICAL_STRIDE_16, 453 BRW_WIDTH_16, 454 BRW_HORIZONTAL_STRIDE_1, 455 BRW_SWIZZLE_XYZW, 456 WRITEMASK_XYZW); 457} 458 459/** Construct float[8] register */ 460static inline struct brw_reg 461brw_vec8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 462{ 463 return brw_reg(file, 464 nr, 465 subnr, 466 0, 467 0, 468 BRW_REGISTER_TYPE_F, 469 BRW_VERTICAL_STRIDE_8, 470 BRW_WIDTH_8, 471 BRW_HORIZONTAL_STRIDE_1, 472 BRW_SWIZZLE_XYZW, 473 WRITEMASK_XYZW); 474} 475 476/** Construct float[4] register */ 477static inline struct brw_reg 478brw_vec4_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 479{ 480 return brw_reg(file, 481 nr, 482 subnr, 483 0, 484 0, 485 BRW_REGISTER_TYPE_F, 486 BRW_VERTICAL_STRIDE_4, 487 BRW_WIDTH_4, 488 BRW_HORIZONTAL_STRIDE_1, 489 BRW_SWIZZLE_XYZW, 490 WRITEMASK_XYZW); 491} 492 493/** Construct float[2] register */ 494static inline struct brw_reg 495brw_vec2_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 496{ 497 return brw_reg(file, 498 nr, 499 subnr, 500 0, 501 0, 502 BRW_REGISTER_TYPE_F, 503 BRW_VERTICAL_STRIDE_2, 504 BRW_WIDTH_2, 505 BRW_HORIZONTAL_STRIDE_1, 506 BRW_SWIZZLE_XYXY, 507 WRITEMASK_XY); 508} 509 510/** Construct float[1] register */ 511static inline struct brw_reg 512brw_vec1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 513{ 514 return brw_reg(file, 515 nr, 516 subnr, 517 0, 518 0, 519 BRW_REGISTER_TYPE_F, 520 BRW_VERTICAL_STRIDE_0, 521 BRW_WIDTH_1, 522 BRW_HORIZONTAL_STRIDE_0, 523 BRW_SWIZZLE_XXXX, 524 WRITEMASK_X); 525} 526 527static inline struct brw_reg 528brw_vecn_reg(unsigned width, enum brw_reg_file file, 529 unsigned nr, unsigned subnr) 530{ 531 switch (width) { 532 case 1: 533 return brw_vec1_reg(file, nr, subnr); 534 case 2: 535 return brw_vec2_reg(file, nr, subnr); 536 case 4: 537 return brw_vec4_reg(file, nr, subnr); 538 case 8: 539 return brw_vec8_reg(file, nr, subnr); 540 case 16: 541 return brw_vec16_reg(file, nr, subnr); 542 default: 543 unreachable("Invalid register width"); 544 } 545} 546 547static inline struct brw_reg 548retype(struct brw_reg reg, enum brw_reg_type type) 549{ 550 reg.type = type; 551 return reg; 552} 553 554static inline struct brw_reg 555firsthalf(struct brw_reg reg) 556{ 557 return reg; 558} 559 560static inline struct brw_reg 561sechalf(struct brw_reg reg) 562{ 563 if (reg.vstride) 564 reg.nr++; 565 return reg; 566} 567 568static inline struct brw_reg 569offset(struct brw_reg reg, unsigned delta) 570{ 571 reg.nr += delta; 572 return reg; 573} 574 575 576static inline struct brw_reg 577byte_offset(struct brw_reg reg, unsigned bytes) 578{ 579 unsigned newoffset = reg.nr * REG_SIZE + reg.subnr + bytes; 580 reg.nr = newoffset / REG_SIZE; 581 reg.subnr = newoffset % REG_SIZE; 582 return reg; 583} 584 585static inline struct brw_reg 586suboffset(struct brw_reg reg, unsigned delta) 587{ 588 return byte_offset(reg, delta * type_sz(reg.type)); 589} 590 591/** Construct unsigned word[16] register */ 592static inline struct brw_reg 593brw_uw16_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 594{ 595 return suboffset(retype(brw_vec16_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 596} 597 598/** Construct unsigned word[8] register */ 599static inline struct brw_reg 600brw_uw8_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 601{ 602 return suboffset(retype(brw_vec8_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 603} 604 605/** Construct unsigned word[1] register */ 606static inline struct brw_reg 607brw_uw1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 608{ 609 return suboffset(retype(brw_vec1_reg(file, nr, 0), BRW_REGISTER_TYPE_UW), subnr); 610} 611 612static inline struct brw_reg 613brw_ud1_reg(enum brw_reg_file file, unsigned nr, unsigned subnr) 614{ 615 return retype(brw_vec1_reg(file, nr, subnr), BRW_REGISTER_TYPE_UD); 616} 617 618static inline struct brw_reg 619brw_imm_reg(enum brw_reg_type type) 620{ 621 return brw_reg(BRW_IMMEDIATE_VALUE, 622 0, 623 0, 624 0, 625 0, 626 type, 627 BRW_VERTICAL_STRIDE_0, 628 BRW_WIDTH_1, 629 BRW_HORIZONTAL_STRIDE_0, 630 0, 631 0); 632} 633 634/** Construct float immediate register */ 635static inline struct brw_reg 636brw_imm_df(double df) 637{ 638 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_DF); 639 imm.df = df; 640 return imm; 641} 642 643static inline struct brw_reg 644brw_imm_u64(uint64_t u64) 645{ 646 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ); 647 imm.u64 = u64; 648 return imm; 649} 650 651static inline struct brw_reg 652brw_imm_f(float f) 653{ 654 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_F); 655 imm.f = f; 656 return imm; 657} 658 659/** Construct int64_t immediate register */ 660static inline struct brw_reg 661brw_imm_q(int64_t q) 662{ 663 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_Q); 664 imm.d64 = q; 665 return imm; 666} 667 668/** Construct int64_t immediate register */ 669static inline struct brw_reg 670brw_imm_uq(uint64_t uq) 671{ 672 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UQ); 673 imm.u64 = uq; 674 return imm; 675} 676 677/** Construct integer immediate register */ 678static inline struct brw_reg 679brw_imm_d(int d) 680{ 681 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_D); 682 imm.d = d; 683 return imm; 684} 685 686/** Construct uint immediate register */ 687static inline struct brw_reg 688brw_imm_ud(unsigned ud) 689{ 690 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UD); 691 imm.ud = ud; 692 return imm; 693} 694 695/** Construct ushort immediate register */ 696static inline struct brw_reg 697brw_imm_uw(uint16_t uw) 698{ 699 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UW); 700 imm.ud = uw | (uw << 16); 701 return imm; 702} 703 704/** Construct short immediate register */ 705static inline struct brw_reg 706brw_imm_w(int16_t w) 707{ 708 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_W); 709 imm.ud = (uint16_t)w | (uint32_t)(uint16_t)w << 16; 710 return imm; 711} 712 713/* brw_imm_b and brw_imm_ub aren't supported by hardware - the type 714 * numbers alias with _V and _VF below: 715 */ 716 717/** Construct vector of eight signed half-byte values */ 718static inline struct brw_reg 719brw_imm_v(unsigned v) 720{ 721 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_V); 722 imm.ud = v; 723 return imm; 724} 725 726/** Construct vector of eight unsigned half-byte values */ 727static inline struct brw_reg 728brw_imm_uv(unsigned uv) 729{ 730 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_UV); 731 imm.ud = uv; 732 return imm; 733} 734 735/** Construct vector of four 8-bit float values */ 736static inline struct brw_reg 737brw_imm_vf(unsigned v) 738{ 739 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); 740 imm.ud = v; 741 return imm; 742} 743 744static inline struct brw_reg 745brw_imm_vf4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) 746{ 747 struct brw_reg imm = brw_imm_reg(BRW_REGISTER_TYPE_VF); 748 imm.vstride = BRW_VERTICAL_STRIDE_0; 749 imm.width = BRW_WIDTH_4; 750 imm.hstride = BRW_HORIZONTAL_STRIDE_1; 751 imm.ud = ((v0 << 0) | (v1 << 8) | (v2 << 16) | (v3 << 24)); 752 return imm; 753} 754 755 756static inline struct brw_reg 757brw_address(struct brw_reg reg) 758{ 759 return brw_imm_uw(reg.nr * REG_SIZE + reg.subnr); 760} 761 762/** Construct float[1] general-purpose register */ 763static inline struct brw_reg 764brw_vec1_grf(unsigned nr, unsigned subnr) 765{ 766 return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 767} 768 769/** Construct float[2] general-purpose register */ 770static inline struct brw_reg 771brw_vec2_grf(unsigned nr, unsigned subnr) 772{ 773 return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 774} 775 776/** Construct float[4] general-purpose register */ 777static inline struct brw_reg 778brw_vec4_grf(unsigned nr, unsigned subnr) 779{ 780 return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 781} 782 783/** Construct float[8] general-purpose register */ 784static inline struct brw_reg 785brw_vec8_grf(unsigned nr, unsigned subnr) 786{ 787 return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 788} 789 790/** Construct float[16] general-purpose register */ 791static inline struct brw_reg 792brw_vec16_grf(unsigned nr, unsigned subnr) 793{ 794 return brw_vec16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 795} 796 797static inline struct brw_reg 798brw_vecn_grf(unsigned width, unsigned nr, unsigned subnr) 799{ 800 return brw_vecn_reg(width, BRW_GENERAL_REGISTER_FILE, nr, subnr); 801} 802 803 804static inline struct brw_reg 805brw_uw8_grf(unsigned nr, unsigned subnr) 806{ 807 return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 808} 809 810static inline struct brw_reg 811brw_uw16_grf(unsigned nr, unsigned subnr) 812{ 813 return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, nr, subnr); 814} 815 816 817/** Construct null register (usually used for setting condition codes) */ 818static inline struct brw_reg 819brw_null_reg(void) 820{ 821 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0); 822} 823 824static inline struct brw_reg 825brw_null_vec(unsigned width) 826{ 827 return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_NULL, 0); 828} 829 830static inline struct brw_reg 831brw_address_reg(unsigned subnr) 832{ 833 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_ADDRESS, subnr); 834} 835 836static inline struct brw_reg 837brw_tdr_reg(void) 838{ 839 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_TDR, 0); 840} 841 842/* If/else instructions break in align16 mode if writemask & swizzle 843 * aren't xyzw. This goes against the convention for other scalar 844 * regs: 845 */ 846static inline struct brw_reg 847brw_ip_reg(void) 848{ 849 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 850 BRW_ARF_IP, 851 0, 852 0, 853 0, 854 BRW_REGISTER_TYPE_UD, 855 BRW_VERTICAL_STRIDE_4, /* ? */ 856 BRW_WIDTH_1, 857 BRW_HORIZONTAL_STRIDE_0, 858 BRW_SWIZZLE_XYZW, /* NOTE! */ 859 WRITEMASK_XYZW); /* NOTE! */ 860} 861 862static inline struct brw_reg 863brw_notification_reg(void) 864{ 865 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE, 866 BRW_ARF_NOTIFICATION_COUNT, 867 0, 868 0, 869 0, 870 BRW_REGISTER_TYPE_UD, 871 BRW_VERTICAL_STRIDE_0, 872 BRW_WIDTH_1, 873 BRW_HORIZONTAL_STRIDE_0, 874 BRW_SWIZZLE_XXXX, 875 WRITEMASK_X); 876} 877 878static inline struct brw_reg 879brw_cr0_reg(unsigned subnr) 880{ 881 return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_CONTROL, subnr); 882} 883 884static inline struct brw_reg 885brw_sr0_reg(unsigned subnr) 886{ 887 return brw_ud1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_STATE, subnr); 888} 889 890static inline struct brw_reg 891brw_acc_reg(unsigned width) 892{ 893 return brw_vecn_reg(width, BRW_ARCHITECTURE_REGISTER_FILE, 894 BRW_ARF_ACCUMULATOR, 0); 895} 896 897static inline struct brw_reg 898brw_flag_reg(int reg, int subreg) 899{ 900 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 901 BRW_ARF_FLAG + reg, subreg); 902} 903 904static inline struct brw_reg 905brw_flag_subreg(unsigned subreg) 906{ 907 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 908 BRW_ARF_FLAG + subreg / 2, subreg % 2); 909} 910 911/** 912 * Return the mask register present in Gfx4-5, or the related register present 913 * in Gfx7.5 and later hardware referred to as "channel enable" register in 914 * the documentation. 915 */ 916static inline struct brw_reg 917brw_mask_reg(unsigned subnr) 918{ 919 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, BRW_ARF_MASK, subnr); 920} 921 922static inline struct brw_reg 923brw_vmask_reg() 924{ 925 return brw_sr0_reg(3); 926} 927 928static inline struct brw_reg 929brw_dmask_reg() 930{ 931 return brw_sr0_reg(2); 932} 933 934static inline struct brw_reg 935brw_mask_stack_reg(unsigned subnr) 936{ 937 return suboffset(retype(brw_vec16_reg(BRW_ARCHITECTURE_REGISTER_FILE, 938 BRW_ARF_MASK_STACK, 0), 939 BRW_REGISTER_TYPE_UB), subnr); 940} 941 942static inline struct brw_reg 943brw_mask_stack_depth_reg(unsigned subnr) 944{ 945 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE, 946 BRW_ARF_MASK_STACK_DEPTH, subnr); 947} 948 949static inline struct brw_reg 950brw_message_reg(unsigned nr) 951{ 952 return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE, nr, 0); 953} 954 955static inline struct brw_reg 956brw_uvec_mrf(unsigned width, unsigned nr, unsigned subnr) 957{ 958 return retype(brw_vecn_reg(width, BRW_MESSAGE_REGISTER_FILE, nr, subnr), 959 BRW_REGISTER_TYPE_UD); 960} 961 962/* This is almost always called with a numeric constant argument, so 963 * make things easy to evaluate at compile time: 964 */ 965static inline unsigned cvt(unsigned val) 966{ 967 switch (val) { 968 case 0: return 0; 969 case 1: return 1; 970 case 2: return 2; 971 case 4: return 3; 972 case 8: return 4; 973 case 16: return 5; 974 case 32: return 6; 975 } 976 return 0; 977} 978 979static inline struct brw_reg 980stride(struct brw_reg reg, unsigned vstride, unsigned width, unsigned hstride) 981{ 982 reg.vstride = cvt(vstride); 983 reg.width = cvt(width) - 1; 984 reg.hstride = cvt(hstride); 985 return reg; 986} 987 988/** 989 * Multiply the vertical and horizontal stride of a register by the given 990 * factor \a s. 991 */ 992static inline struct brw_reg 993spread(struct brw_reg reg, unsigned s) 994{ 995 if (s) { 996 assert(util_is_power_of_two_nonzero(s)); 997 998 if (reg.hstride) 999 reg.hstride += cvt(s) - 1; 1000 1001 if (reg.vstride) 1002 reg.vstride += cvt(s) - 1; 1003 1004 return reg; 1005 } else { 1006 return stride(reg, 0, 1, 0); 1007 } 1008} 1009 1010/** 1011 * Reinterpret each channel of register \p reg as a vector of values of the 1012 * given smaller type and take the i-th subcomponent from each. 1013 */ 1014static inline struct brw_reg 1015subscript(struct brw_reg reg, enum brw_reg_type type, unsigned i) 1016{ 1017 unsigned scale = type_sz(reg.type) / type_sz(type); 1018 assert(scale >= 1 && i < scale); 1019 1020 if (reg.file == IMM) { 1021 unsigned bit_size = type_sz(type) * 8; 1022 reg.u64 >>= i * bit_size; 1023 reg.u64 &= BITFIELD64_MASK(bit_size); 1024 if (bit_size <= 16) 1025 reg.u64 |= reg.u64 << 16; 1026 return retype(reg, type); 1027 } 1028 1029 return suboffset(retype(spread(reg, scale), type), i); 1030} 1031 1032static inline struct brw_reg 1033vec16(struct brw_reg reg) 1034{ 1035 return stride(reg, 16,16,1); 1036} 1037 1038static inline struct brw_reg 1039vec8(struct brw_reg reg) 1040{ 1041 return stride(reg, 8,8,1); 1042} 1043 1044static inline struct brw_reg 1045vec4(struct brw_reg reg) 1046{ 1047 return stride(reg, 4,4,1); 1048} 1049 1050static inline struct brw_reg 1051vec2(struct brw_reg reg) 1052{ 1053 return stride(reg, 2,2,1); 1054} 1055 1056static inline struct brw_reg 1057vec1(struct brw_reg reg) 1058{ 1059 return stride(reg, 0,1,0); 1060} 1061 1062 1063static inline struct brw_reg 1064get_element(struct brw_reg reg, unsigned elt) 1065{ 1066 return vec1(suboffset(reg, elt)); 1067} 1068 1069static inline struct brw_reg 1070get_element_ud(struct brw_reg reg, unsigned elt) 1071{ 1072 return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt)); 1073} 1074 1075static inline struct brw_reg 1076get_element_d(struct brw_reg reg, unsigned elt) 1077{ 1078 return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt)); 1079} 1080 1081static inline struct brw_reg 1082brw_swizzle(struct brw_reg reg, unsigned swz) 1083{ 1084 if (reg.file == BRW_IMMEDIATE_VALUE) 1085 reg.ud = brw_swizzle_immediate(reg.type, reg.ud, swz); 1086 else 1087 reg.swizzle = brw_compose_swizzle(swz, reg.swizzle); 1088 1089 return reg; 1090} 1091 1092static inline struct brw_reg 1093brw_writemask(struct brw_reg reg, unsigned mask) 1094{ 1095 assert(reg.file != BRW_IMMEDIATE_VALUE); 1096 reg.writemask &= mask; 1097 return reg; 1098} 1099 1100static inline struct brw_reg 1101brw_set_writemask(struct brw_reg reg, unsigned mask) 1102{ 1103 assert(reg.file != BRW_IMMEDIATE_VALUE); 1104 reg.writemask = mask; 1105 return reg; 1106} 1107 1108static inline unsigned 1109brw_writemask_for_size(unsigned n) 1110{ 1111 return (1 << n) - 1; 1112} 1113 1114static inline unsigned 1115brw_writemask_for_component_packing(unsigned n, unsigned first_component) 1116{ 1117 assert(first_component + n <= 4); 1118 return (((1 << n) - 1) << first_component); 1119} 1120 1121static inline struct brw_reg 1122negate(struct brw_reg reg) 1123{ 1124 reg.negate ^= 1; 1125 return reg; 1126} 1127 1128static inline struct brw_reg 1129brw_abs(struct brw_reg reg) 1130{ 1131 reg.abs = 1; 1132 reg.negate = 0; 1133 return reg; 1134} 1135 1136/************************************************************************/ 1137 1138static inline struct brw_reg 1139brw_vec4_indirect(unsigned subnr, int offset) 1140{ 1141 struct brw_reg reg = brw_vec4_grf(0, 0); 1142 reg.subnr = subnr; 1143 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; 1144 reg.indirect_offset = offset; 1145 return reg; 1146} 1147 1148static inline struct brw_reg 1149brw_vec1_indirect(unsigned subnr, int offset) 1150{ 1151 struct brw_reg reg = brw_vec1_grf(0, 0); 1152 reg.subnr = subnr; 1153 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; 1154 reg.indirect_offset = offset; 1155 return reg; 1156} 1157 1158static inline struct brw_reg 1159brw_VxH_indirect(unsigned subnr, int offset) 1160{ 1161 struct brw_reg reg = brw_vec1_grf(0, 0); 1162 reg.vstride = BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL; 1163 reg.subnr = subnr; 1164 reg.address_mode = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER; 1165 reg.indirect_offset = offset; 1166 return reg; 1167} 1168 1169static inline struct brw_reg 1170deref_4f(struct brw_indirect ptr, int offset) 1171{ 1172 return brw_vec4_indirect(ptr.addr_subnr, ptr.addr_offset + offset); 1173} 1174 1175static inline struct brw_reg 1176deref_1f(struct brw_indirect ptr, int offset) 1177{ 1178 return brw_vec1_indirect(ptr.addr_subnr, ptr.addr_offset + offset); 1179} 1180 1181static inline struct brw_reg 1182deref_4b(struct brw_indirect ptr, int offset) 1183{ 1184 return retype(deref_4f(ptr, offset), BRW_REGISTER_TYPE_B); 1185} 1186 1187static inline struct brw_reg 1188deref_1uw(struct brw_indirect ptr, int offset) 1189{ 1190 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UW); 1191} 1192 1193static inline struct brw_reg 1194deref_1d(struct brw_indirect ptr, int offset) 1195{ 1196 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_D); 1197} 1198 1199static inline struct brw_reg 1200deref_1ud(struct brw_indirect ptr, int offset) 1201{ 1202 return retype(deref_1f(ptr, offset), BRW_REGISTER_TYPE_UD); 1203} 1204 1205static inline struct brw_reg 1206get_addr_reg(struct brw_indirect ptr) 1207{ 1208 return brw_address_reg(ptr.addr_subnr); 1209} 1210 1211static inline struct brw_indirect 1212brw_indirect_offset(struct brw_indirect ptr, int offset) 1213{ 1214 ptr.addr_offset += offset; 1215 return ptr; 1216} 1217 1218static inline struct brw_indirect 1219brw_indirect(unsigned addr_subnr, int offset) 1220{ 1221 struct brw_indirect ptr; 1222 ptr.addr_subnr = addr_subnr; 1223 ptr.addr_offset = offset; 1224 ptr.pad = 0; 1225 return ptr; 1226} 1227 1228static inline bool 1229region_matches(struct brw_reg reg, enum brw_vertical_stride v, 1230 enum brw_width w, enum brw_horizontal_stride h) 1231{ 1232 return reg.vstride == v && 1233 reg.width == w && 1234 reg.hstride == h; 1235} 1236 1237#define has_scalar_region(reg) \ 1238 region_matches(reg, BRW_VERTICAL_STRIDE_0, BRW_WIDTH_1, \ 1239 BRW_HORIZONTAL_STRIDE_0) 1240 1241/** 1242 * Return the size in bytes per data element of register \p reg on the 1243 * corresponding register file. 1244 */ 1245static inline unsigned 1246element_sz(struct brw_reg reg) 1247{ 1248 if (reg.file == BRW_IMMEDIATE_VALUE || has_scalar_region(reg)) { 1249 return type_sz(reg.type); 1250 1251 } else if (reg.width == BRW_WIDTH_1 && 1252 reg.hstride == BRW_HORIZONTAL_STRIDE_0) { 1253 assert(reg.vstride != BRW_VERTICAL_STRIDE_0); 1254 return type_sz(reg.type) << (reg.vstride - 1); 1255 1256 } else { 1257 assert(reg.hstride != BRW_HORIZONTAL_STRIDE_0); 1258 assert(reg.vstride == reg.hstride + reg.width); 1259 return type_sz(reg.type) << (reg.hstride - 1); 1260 } 1261} 1262 1263/* brw_packed_float.c */ 1264int brw_float_to_vf(float f); 1265float brw_vf_to_float(unsigned char vf); 1266 1267#ifdef __cplusplus 1268} 1269#endif 1270 1271#endif 1272