1/* 2 * Copyright © 2015-2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/** @file brw_eu_validate.c 25 * 26 * This file implements a pass that validates shader assembly. 27 * 28 * The restrictions implemented herein are intended to verify that instructions 29 * in shader assembly do not violate restrictions documented in the graphics 30 * programming reference manuals. 31 * 32 * The restrictions are difficult for humans to quickly verify due to their 33 * complexity and abundance. 34 * 35 * It is critical that this code is thoroughly unit tested because false 36 * results will lead developers astray, which is worse than having no validator 37 * at all. Functional changes to this file without corresponding unit tests (in 38 * test_eu_validate.cpp) will be rejected. 39 */ 40 41#include <stdlib.h> 42#include "brw_eu.h" 43 44/* We're going to do lots of string concatenation, so this should help. */ 45struct string { 46 char *str; 47 size_t len; 48}; 49 50static void 51cat(struct string *dest, const struct string src) 52{ 53 dest->str = realloc(dest->str, dest->len + src.len + 1); 54 memcpy(dest->str + dest->len, src.str, src.len); 55 dest->str[dest->len + src.len] = '\0'; 56 dest->len = dest->len + src.len; 57} 58#define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)}) 59 60static bool 61contains(const struct string haystack, const struct string needle) 62{ 63 return haystack.str && memmem(haystack.str, haystack.len, 64 needle.str, needle.len) != NULL; 65} 66#define CONTAINS(haystack, needle) \ 67 contains(haystack, (struct string){needle, strlen(needle)}) 68 69#define error(str) "\tERROR: " str "\n" 70#define ERROR_INDENT "\t " 71 72#define ERROR(msg) ERROR_IF(true, msg) 73#define ERROR_IF(cond, msg) \ 74 do { \ 75 if ((cond) && !CONTAINS(error_msg, error(msg))) { \ 76 CAT(error_msg, error(msg)); \ 77 } \ 78 } while(0) 79 80#define CHECK(func, args...) \ 81 do { \ 82 struct string __msg = func(isa, inst, ##args); \ 83 if (__msg.str) { \ 84 cat(&error_msg, __msg); \ 85 free(__msg.str); \ 86 } \ 87 } while (0) 88 89#define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0) 90#define WIDTH(width) (1 << (width)) 91 92static bool 93inst_is_send(const struct brw_isa_info *isa, const brw_inst *inst) 94{ 95 switch (brw_inst_opcode(isa, inst)) { 96 case BRW_OPCODE_SEND: 97 case BRW_OPCODE_SENDC: 98 case BRW_OPCODE_SENDS: 99 case BRW_OPCODE_SENDSC: 100 return true; 101 default: 102 return false; 103 } 104} 105 106static bool 107inst_is_split_send(const struct brw_isa_info *isa, const brw_inst *inst) 108{ 109 const struct intel_device_info *devinfo = isa->devinfo; 110 111 if (devinfo->ver >= 12) { 112 return inst_is_send(isa, inst); 113 } else { 114 switch (brw_inst_opcode(isa, inst)) { 115 case BRW_OPCODE_SENDS: 116 case BRW_OPCODE_SENDSC: 117 return true; 118 default: 119 return false; 120 } 121 } 122} 123 124static unsigned 125signed_type(unsigned type) 126{ 127 switch (type) { 128 case BRW_REGISTER_TYPE_UD: return BRW_REGISTER_TYPE_D; 129 case BRW_REGISTER_TYPE_UW: return BRW_REGISTER_TYPE_W; 130 case BRW_REGISTER_TYPE_UB: return BRW_REGISTER_TYPE_B; 131 case BRW_REGISTER_TYPE_UQ: return BRW_REGISTER_TYPE_Q; 132 default: return type; 133 } 134} 135 136static enum brw_reg_type 137inst_dst_type(const struct brw_isa_info *isa, const brw_inst *inst) 138{ 139 const struct intel_device_info *devinfo = isa->devinfo; 140 141 return (devinfo->ver < 12 || !inst_is_send(isa, inst)) ? 142 brw_inst_dst_type(devinfo, inst) : BRW_REGISTER_TYPE_D; 143} 144 145static bool 146inst_is_raw_move(const struct brw_isa_info *isa, const brw_inst *inst) 147{ 148 const struct intel_device_info *devinfo = isa->devinfo; 149 150 unsigned dst_type = signed_type(inst_dst_type(isa, inst)); 151 unsigned src_type = signed_type(brw_inst_src0_type(devinfo, inst)); 152 153 if (brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE) { 154 /* FIXME: not strictly true */ 155 if (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_VF || 156 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UV || 157 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_V) { 158 return false; 159 } 160 } else if (brw_inst_src0_negate(devinfo, inst) || 161 brw_inst_src0_abs(devinfo, inst)) { 162 return false; 163 } 164 165 return brw_inst_opcode(isa, inst) == BRW_OPCODE_MOV && 166 brw_inst_saturate(devinfo, inst) == 0 && 167 dst_type == src_type; 168} 169 170static bool 171dst_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 172{ 173 return brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 174 brw_inst_dst_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 175} 176 177static bool 178src0_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 179{ 180 return brw_inst_src0_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT && 181 brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 182 brw_inst_src0_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 183} 184 185static bool 186src1_is_null(const struct intel_device_info *devinfo, const brw_inst *inst) 187{ 188 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 189 brw_inst_src1_da_reg_nr(devinfo, inst) == BRW_ARF_NULL; 190} 191 192static bool 193src0_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst) 194{ 195 return brw_inst_src0_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 196 (brw_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 197} 198 199static bool 200src1_is_acc(const struct intel_device_info *devinfo, const brw_inst *inst) 201{ 202 return brw_inst_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 203 (brw_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == BRW_ARF_ACCUMULATOR; 204} 205 206static bool 207src0_has_scalar_region(const struct intel_device_info *devinfo, 208 const brw_inst *inst) 209{ 210 return brw_inst_src0_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 211 brw_inst_src0_width(devinfo, inst) == BRW_WIDTH_1 && 212 brw_inst_src0_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 213} 214 215static bool 216src1_has_scalar_region(const struct intel_device_info *devinfo, 217 const brw_inst *inst) 218{ 219 return brw_inst_src1_vstride(devinfo, inst) == BRW_VERTICAL_STRIDE_0 && 220 brw_inst_src1_width(devinfo, inst) == BRW_WIDTH_1 && 221 brw_inst_src1_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0; 222} 223 224static unsigned 225num_sources_from_inst(const struct brw_isa_info *isa, 226 const brw_inst *inst) 227{ 228 const struct intel_device_info *devinfo = isa->devinfo; 229 const struct opcode_desc *desc = 230 brw_opcode_desc(isa, brw_inst_opcode(isa, inst)); 231 unsigned math_function; 232 233 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) { 234 math_function = brw_inst_math_function(devinfo, inst); 235 } else if (devinfo->ver < 6 && 236 brw_inst_opcode(isa, inst) == BRW_OPCODE_SEND) { 237 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_MATH) { 238 /* src1 must be a descriptor (including the information to determine 239 * that the SEND is doing an extended math operation), but src0 can 240 * actually be null since it serves as the source of the implicit GRF 241 * to MRF move. 242 * 243 * If we stop using that functionality, we'll have to revisit this. 244 */ 245 return 2; 246 } else { 247 /* Send instructions are allowed to have null sources since they use 248 * the base_mrf field to specify which message register source. 249 */ 250 return 0; 251 } 252 } else { 253 assert(desc->nsrc < 4); 254 return desc->nsrc; 255 } 256 257 switch (math_function) { 258 case BRW_MATH_FUNCTION_INV: 259 case BRW_MATH_FUNCTION_LOG: 260 case BRW_MATH_FUNCTION_EXP: 261 case BRW_MATH_FUNCTION_SQRT: 262 case BRW_MATH_FUNCTION_RSQ: 263 case BRW_MATH_FUNCTION_SIN: 264 case BRW_MATH_FUNCTION_COS: 265 case BRW_MATH_FUNCTION_SINCOS: 266 case GFX8_MATH_FUNCTION_INVM: 267 case GFX8_MATH_FUNCTION_RSQRTM: 268 return 1; 269 case BRW_MATH_FUNCTION_FDIV: 270 case BRW_MATH_FUNCTION_POW: 271 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 272 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 273 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: 274 return 2; 275 default: 276 unreachable("not reached"); 277 } 278} 279 280static struct string 281invalid_values(const struct brw_isa_info *isa, const brw_inst *inst) 282{ 283 const struct intel_device_info *devinfo = isa->devinfo; 284 285 unsigned num_sources = num_sources_from_inst(isa, inst); 286 struct string error_msg = { .str = NULL, .len = 0 }; 287 288 switch ((enum brw_execution_size) brw_inst_exec_size(devinfo, inst)) { 289 case BRW_EXECUTE_1: 290 case BRW_EXECUTE_2: 291 case BRW_EXECUTE_4: 292 case BRW_EXECUTE_8: 293 case BRW_EXECUTE_16: 294 case BRW_EXECUTE_32: 295 break; 296 default: 297 ERROR("invalid execution size"); 298 break; 299 } 300 301 if (inst_is_send(isa, inst)) 302 return error_msg; 303 304 if (num_sources == 3) { 305 /* Nothing to test: 306 * No 3-src instructions on Gfx4-5 307 * No reg file bits on Gfx6-10 (align16) 308 * No invalid encodings on Gfx10-12 (align1) 309 */ 310 } else { 311 if (devinfo->ver > 6) { 312 ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == MRF || 313 (num_sources > 0 && 314 brw_inst_src0_reg_file(devinfo, inst) == MRF) || 315 (num_sources > 1 && 316 brw_inst_src1_reg_file(devinfo, inst) == MRF), 317 "invalid register file encoding"); 318 } 319 } 320 321 if (error_msg.str) 322 return error_msg; 323 324 if (num_sources == 3) { 325 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 326 if (devinfo->ver >= 10) { 327 ERROR_IF(brw_inst_3src_a1_dst_type (devinfo, inst) == INVALID_REG_TYPE || 328 brw_inst_3src_a1_src0_type(devinfo, inst) == INVALID_REG_TYPE || 329 brw_inst_3src_a1_src1_type(devinfo, inst) == INVALID_REG_TYPE || 330 brw_inst_3src_a1_src2_type(devinfo, inst) == INVALID_REG_TYPE, 331 "invalid register type encoding"); 332 } else { 333 ERROR("Align1 mode not allowed on Gen < 10"); 334 } 335 } else { 336 ERROR_IF(brw_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE || 337 brw_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE, 338 "invalid register type encoding"); 339 } 340 } else { 341 ERROR_IF(brw_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE || 342 (num_sources > 0 && 343 brw_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) || 344 (num_sources > 1 && 345 brw_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE), 346 "invalid register type encoding"); 347 } 348 349 return error_msg; 350} 351 352static struct string 353sources_not_null(const struct brw_isa_info *isa, 354 const brw_inst *inst) 355{ 356 const struct intel_device_info *devinfo = isa->devinfo; 357 unsigned num_sources = num_sources_from_inst(isa, inst); 358 struct string error_msg = { .str = NULL, .len = 0 }; 359 360 /* Nothing to test. 3-src instructions can only have GRF sources, and 361 * there's no bit to control the file. 362 */ 363 if (num_sources == 3) 364 return (struct string){}; 365 366 /* Nothing to test. Split sends can only encode a file in sources that are 367 * allowed to be NULL. 368 */ 369 if (inst_is_split_send(isa, inst)) 370 return (struct string){}; 371 372 if (num_sources >= 1 && brw_inst_opcode(isa, inst) != BRW_OPCODE_SYNC) 373 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null"); 374 375 if (num_sources == 2) 376 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null"); 377 378 return error_msg; 379} 380 381static struct string 382alignment_supported(const struct brw_isa_info *isa, 383 const brw_inst *inst) 384{ 385 const struct intel_device_info *devinfo = isa->devinfo; 386 struct string error_msg = { .str = NULL, .len = 0 }; 387 388 ERROR_IF(devinfo->ver >= 11 && brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16, 389 "Align16 not supported"); 390 391 return error_msg; 392} 393 394static bool 395inst_uses_src_acc(const struct brw_isa_info *isa, 396 const brw_inst *inst) 397{ 398 const struct intel_device_info *devinfo = isa->devinfo; 399 400 /* Check instructions that use implicit accumulator sources */ 401 switch (brw_inst_opcode(isa, inst)) { 402 case BRW_OPCODE_MAC: 403 case BRW_OPCODE_MACH: 404 case BRW_OPCODE_SADA2: 405 return true; 406 default: 407 break; 408 } 409 410 /* FIXME: support 3-src instructions */ 411 unsigned num_sources = num_sources_from_inst(isa, inst); 412 assert(num_sources < 3); 413 414 return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst)); 415} 416 417static struct string 418send_restrictions(const struct brw_isa_info *isa, 419 const brw_inst *inst) 420{ 421 const struct intel_device_info *devinfo = isa->devinfo; 422 423 struct string error_msg = { .str = NULL, .len = 0 }; 424 425 if (inst_is_split_send(isa, inst)) { 426 ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 427 brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL, 428 "src1 of split send must be a GRF or NULL"); 429 430 ERROR_IF(brw_inst_eot(devinfo, inst) && 431 brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 432 "send with EOT must use g112-g127"); 433 ERROR_IF(brw_inst_eot(devinfo, inst) && 434 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && 435 brw_inst_send_src1_reg_nr(devinfo, inst) < 112, 436 "send with EOT must use g112-g127"); 437 438 if (brw_inst_send_src0_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE && 439 brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) { 440 /* Assume minimums if we don't know */ 441 unsigned mlen = 1; 442 if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) { 443 const uint32_t desc = brw_inst_send_desc(devinfo, inst); 444 mlen = brw_message_desc_mlen(devinfo, desc); 445 } 446 447 unsigned ex_mlen = 1; 448 if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) { 449 const uint32_t ex_desc = brw_inst_sends_ex_desc(devinfo, inst); 450 ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc); 451 } 452 const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst); 453 const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst); 454 ERROR_IF((src0_reg_nr <= src1_reg_nr && 455 src1_reg_nr < src0_reg_nr + mlen) || 456 (src1_reg_nr <= src0_reg_nr && 457 src0_reg_nr < src1_reg_nr + ex_mlen), 458 "split send payloads must not overlap"); 459 } 460 } else if (inst_is_send(isa, inst)) { 461 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT, 462 "send must use direct addressing"); 463 464 if (devinfo->ver >= 7) { 465 ERROR_IF(brw_inst_send_src0_reg_file(devinfo, inst) != BRW_GENERAL_REGISTER_FILE, 466 "send from non-GRF"); 467 ERROR_IF(brw_inst_eot(devinfo, inst) && 468 brw_inst_src0_da_reg_nr(devinfo, inst) < 112, 469 "send with EOT must use g112-g127"); 470 } 471 472 if (devinfo->ver >= 8) { 473 ERROR_IF(!dst_is_null(devinfo, inst) && 474 (brw_inst_dst_da_reg_nr(devinfo, inst) + 475 brw_inst_rlen(devinfo, inst) > 127) && 476 (brw_inst_src0_da_reg_nr(devinfo, inst) + 477 brw_inst_mlen(devinfo, inst) > 478 brw_inst_dst_da_reg_nr(devinfo, inst)), 479 "r127 must not be used for return address when there is " 480 "a src and dest overlap"); 481 } 482 } 483 484 return error_msg; 485} 486 487static bool 488is_unsupported_inst(const struct brw_isa_info *isa, 489 const brw_inst *inst) 490{ 491 return brw_inst_opcode(isa, inst) == BRW_OPCODE_ILLEGAL; 492} 493 494/** 495 * Returns whether a combination of two types would qualify as mixed float 496 * operation mode 497 */ 498static inline bool 499types_are_mixed_float(enum brw_reg_type t0, enum brw_reg_type t1) 500{ 501 return (t0 == BRW_REGISTER_TYPE_F && t1 == BRW_REGISTER_TYPE_HF) || 502 (t1 == BRW_REGISTER_TYPE_F && t0 == BRW_REGISTER_TYPE_HF); 503} 504 505static enum brw_reg_type 506execution_type_for_type(enum brw_reg_type type) 507{ 508 switch (type) { 509 case BRW_REGISTER_TYPE_NF: 510 case BRW_REGISTER_TYPE_DF: 511 case BRW_REGISTER_TYPE_F: 512 case BRW_REGISTER_TYPE_HF: 513 return type; 514 515 case BRW_REGISTER_TYPE_VF: 516 return BRW_REGISTER_TYPE_F; 517 518 case BRW_REGISTER_TYPE_Q: 519 case BRW_REGISTER_TYPE_UQ: 520 return BRW_REGISTER_TYPE_Q; 521 522 case BRW_REGISTER_TYPE_D: 523 case BRW_REGISTER_TYPE_UD: 524 return BRW_REGISTER_TYPE_D; 525 526 case BRW_REGISTER_TYPE_W: 527 case BRW_REGISTER_TYPE_UW: 528 case BRW_REGISTER_TYPE_B: 529 case BRW_REGISTER_TYPE_UB: 530 case BRW_REGISTER_TYPE_V: 531 case BRW_REGISTER_TYPE_UV: 532 return BRW_REGISTER_TYPE_W; 533 } 534 unreachable("not reached"); 535} 536 537/** 538 * Returns the execution type of an instruction \p inst 539 */ 540static enum brw_reg_type 541execution_type(const struct brw_isa_info *isa, const brw_inst *inst) 542{ 543 const struct intel_device_info *devinfo = isa->devinfo; 544 545 unsigned num_sources = num_sources_from_inst(isa, inst); 546 enum brw_reg_type src0_exec_type, src1_exec_type; 547 548 /* Execution data type is independent of destination data type, except in 549 * mixed F/HF instructions. 550 */ 551 enum brw_reg_type dst_exec_type = inst_dst_type(isa, inst); 552 553 src0_exec_type = execution_type_for_type(brw_inst_src0_type(devinfo, inst)); 554 if (num_sources == 1) { 555 if (src0_exec_type == BRW_REGISTER_TYPE_HF) 556 return dst_exec_type; 557 return src0_exec_type; 558 } 559 560 src1_exec_type = execution_type_for_type(brw_inst_src1_type(devinfo, inst)); 561 if (types_are_mixed_float(src0_exec_type, src1_exec_type) || 562 types_are_mixed_float(src0_exec_type, dst_exec_type) || 563 types_are_mixed_float(src1_exec_type, dst_exec_type)) { 564 return BRW_REGISTER_TYPE_F; 565 } 566 567 if (src0_exec_type == src1_exec_type) 568 return src0_exec_type; 569 570 if (src0_exec_type == BRW_REGISTER_TYPE_NF || 571 src1_exec_type == BRW_REGISTER_TYPE_NF) 572 return BRW_REGISTER_TYPE_NF; 573 574 /* Mixed operand types where one is float is float on Gen < 6 575 * (and not allowed on later platforms) 576 */ 577 if (devinfo->ver < 6 && 578 (src0_exec_type == BRW_REGISTER_TYPE_F || 579 src1_exec_type == BRW_REGISTER_TYPE_F)) 580 return BRW_REGISTER_TYPE_F; 581 582 if (src0_exec_type == BRW_REGISTER_TYPE_Q || 583 src1_exec_type == BRW_REGISTER_TYPE_Q) 584 return BRW_REGISTER_TYPE_Q; 585 586 if (src0_exec_type == BRW_REGISTER_TYPE_D || 587 src1_exec_type == BRW_REGISTER_TYPE_D) 588 return BRW_REGISTER_TYPE_D; 589 590 if (src0_exec_type == BRW_REGISTER_TYPE_W || 591 src1_exec_type == BRW_REGISTER_TYPE_W) 592 return BRW_REGISTER_TYPE_W; 593 594 if (src0_exec_type == BRW_REGISTER_TYPE_DF || 595 src1_exec_type == BRW_REGISTER_TYPE_DF) 596 return BRW_REGISTER_TYPE_DF; 597 598 unreachable("not reached"); 599} 600 601/** 602 * Returns whether a region is packed 603 * 604 * A region is packed if its elements are adjacent in memory, with no 605 * intervening space, no overlap, and no replicated values. 606 */ 607static bool 608is_packed(unsigned vstride, unsigned width, unsigned hstride) 609{ 610 if (vstride == width) { 611 if (vstride == 1) { 612 return hstride == 0; 613 } else { 614 return hstride == 1; 615 } 616 } 617 618 return false; 619} 620 621/** 622 * Returns whether a region is linear 623 * 624 * A region is linear if its elements do not overlap and are not replicated. 625 * Unlike a packed region, intervening space (i.e. strided values) is allowed. 626 */ 627static bool 628is_linear(unsigned vstride, unsigned width, unsigned hstride) 629{ 630 return vstride == width * hstride || 631 (hstride == 0 && width == 1); 632} 633 634/** 635 * Returns whether an instruction is an explicit or implicit conversion 636 * to/from half-float. 637 */ 638static bool 639is_half_float_conversion(const struct brw_isa_info *isa, 640 const brw_inst *inst) 641{ 642 const struct intel_device_info *devinfo = isa->devinfo; 643 644 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 645 646 unsigned num_sources = num_sources_from_inst(isa, inst); 647 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 648 649 if (dst_type != src0_type && 650 (dst_type == BRW_REGISTER_TYPE_HF || src0_type == BRW_REGISTER_TYPE_HF)) { 651 return true; 652 } else if (num_sources > 1) { 653 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 654 return dst_type != src1_type && 655 (dst_type == BRW_REGISTER_TYPE_HF || 656 src1_type == BRW_REGISTER_TYPE_HF); 657 } 658 659 return false; 660} 661 662/* 663 * Returns whether an instruction is using mixed float operation mode 664 */ 665static bool 666is_mixed_float(const struct brw_isa_info *isa, const brw_inst *inst) 667{ 668 const struct intel_device_info *devinfo = isa->devinfo; 669 670 if (devinfo->ver < 8) 671 return false; 672 673 if (inst_is_send(isa, inst)) 674 return false; 675 676 unsigned opcode = brw_inst_opcode(isa, inst); 677 const struct opcode_desc *desc = brw_opcode_desc(isa, opcode); 678 if (desc->ndst == 0) 679 return false; 680 681 /* FIXME: support 3-src instructions */ 682 unsigned num_sources = num_sources_from_inst(isa, inst); 683 assert(num_sources < 3); 684 685 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 686 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 687 688 if (num_sources == 1) 689 return types_are_mixed_float(src0_type, dst_type); 690 691 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 692 693 return types_are_mixed_float(src0_type, src1_type) || 694 types_are_mixed_float(src0_type, dst_type) || 695 types_are_mixed_float(src1_type, dst_type); 696} 697 698/** 699 * Returns whether an instruction is an explicit or implicit conversion 700 * to/from byte. 701 */ 702static bool 703is_byte_conversion(const struct brw_isa_info *isa, 704 const brw_inst *inst) 705{ 706 const struct intel_device_info *devinfo = isa->devinfo; 707 708 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 709 710 unsigned num_sources = num_sources_from_inst(isa, inst); 711 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 712 713 if (dst_type != src0_type && 714 (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) { 715 return true; 716 } else if (num_sources > 1) { 717 enum brw_reg_type src1_type = brw_inst_src1_type(devinfo, inst); 718 return dst_type != src1_type && 719 (type_sz(dst_type) == 1 || type_sz(src1_type) == 1); 720 } 721 722 return false; 723} 724 725/** 726 * Checks restrictions listed in "General Restrictions Based on Operand Types" 727 * in the "Register Region Restrictions" section. 728 */ 729static struct string 730general_restrictions_based_on_operand_types(const struct brw_isa_info *isa, 731 const brw_inst *inst) 732{ 733 const struct intel_device_info *devinfo = isa->devinfo; 734 735 const struct opcode_desc *desc = 736 brw_opcode_desc(isa, brw_inst_opcode(isa, inst)); 737 unsigned num_sources = num_sources_from_inst(isa, inst); 738 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 739 struct string error_msg = { .str = NULL, .len = 0 }; 740 741 if (inst_is_send(isa, inst)) 742 return error_msg; 743 744 if (devinfo->ver >= 11) { 745 if (num_sources == 3) { 746 ERROR_IF(brw_reg_type_to_size(brw_inst_3src_a1_src1_type(devinfo, inst)) == 1 || 747 brw_reg_type_to_size(brw_inst_3src_a1_src2_type(devinfo, inst)) == 1, 748 "Byte data type is not supported for src1/2 register regioning. This includes " 749 "byte broadcast as well."); 750 } 751 if (num_sources == 2) { 752 ERROR_IF(brw_reg_type_to_size(brw_inst_src1_type(devinfo, inst)) == 1, 753 "Byte data type is not supported for src1 register regioning. This includes " 754 "byte broadcast as well."); 755 } 756 } 757 758 enum brw_reg_type dst_type; 759 760 if (num_sources == 3) { 761 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) 762 dst_type = brw_inst_3src_a1_dst_type(devinfo, inst); 763 else 764 dst_type = brw_inst_3src_a16_dst_type(devinfo, inst); 765 } else { 766 dst_type = inst_dst_type(isa, inst); 767 } 768 769 ERROR_IF(dst_type == BRW_REGISTER_TYPE_DF && 770 !devinfo->has_64bit_float, 771 "64-bit float destination, but platform does not support it"); 772 773 ERROR_IF((dst_type == BRW_REGISTER_TYPE_Q || 774 dst_type == BRW_REGISTER_TYPE_UQ) && 775 !devinfo->has_64bit_int, 776 "64-bit int destination, but platform does not support it"); 777 778 for (unsigned s = 0; s < num_sources; s++) { 779 enum brw_reg_type src_type; 780 if (num_sources == 3) { 781 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 782 switch (s) { 783 case 0: src_type = brw_inst_3src_a1_src0_type(devinfo, inst); break; 784 case 1: src_type = brw_inst_3src_a1_src1_type(devinfo, inst); break; 785 case 2: src_type = brw_inst_3src_a1_src2_type(devinfo, inst); break; 786 default: unreachable("invalid src"); 787 } 788 } else { 789 src_type = brw_inst_3src_a16_src_type(devinfo, inst); 790 } 791 } else { 792 switch (s) { 793 case 0: src_type = brw_inst_src0_type(devinfo, inst); break; 794 case 1: src_type = brw_inst_src1_type(devinfo, inst); break; 795 default: unreachable("invalid src"); 796 } 797 } 798 799 ERROR_IF(src_type == BRW_REGISTER_TYPE_DF && 800 !devinfo->has_64bit_float, 801 "64-bit float source, but platform does not support it"); 802 803 ERROR_IF((src_type == BRW_REGISTER_TYPE_Q || 804 src_type == BRW_REGISTER_TYPE_UQ) && 805 !devinfo->has_64bit_int, 806 "64-bit int source, but platform does not support it"); 807 } 808 809 if (num_sources == 3) 810 return error_msg; 811 812 if (exec_size == 1) 813 return error_msg; 814 815 if (desc->ndst == 0) 816 return error_msg; 817 818 /* The PRMs say: 819 * 820 * Where n is the largest element size in bytes for any source or 821 * destination operand type, ExecSize * n must be <= 64. 822 * 823 * But we do not attempt to enforce it, because it is implied by other 824 * rules: 825 * 826 * - that the destination stride must match the execution data type 827 * - sources may not span more than two adjacent GRF registers 828 * - destination may not span more than two adjacent GRF registers 829 * 830 * In fact, checking it would weaken testing of the other rules. 831 */ 832 833 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 834 bool dst_type_is_byte = 835 inst_dst_type(isa, inst) == BRW_REGISTER_TYPE_B || 836 inst_dst_type(isa, inst) == BRW_REGISTER_TYPE_UB; 837 838 if (dst_type_is_byte) { 839 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) { 840 if (!inst_is_raw_move(isa, inst)) 841 ERROR("Only raw MOV supports a packed-byte destination"); 842 return error_msg; 843 } 844 } 845 846 unsigned exec_type = execution_type(isa, inst); 847 unsigned exec_type_size = brw_reg_type_to_size(exec_type); 848 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 849 850 /* On IVB/BYT, region parameters and execution size for DF are in terms of 851 * 32-bit elements, so they are doubled. For evaluating the validity of an 852 * instruction, we halve them. 853 */ 854 if (devinfo->verx10 == 70 && 855 exec_type_size == 8 && dst_type_size == 4) 856 dst_type_size = 8; 857 858 if (is_byte_conversion(isa, inst)) { 859 /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 860 * 861 * "There is no direct conversion from B/UB to DF or DF to B/UB. 862 * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB." 863 * 864 * Even if these restrictions are listed for the MOV instruction, we 865 * validate this more generally, since there is the possibility 866 * of implicit conversions from other instructions. 867 */ 868 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 869 enum brw_reg_type src1_type = num_sources > 1 ? 870 brw_inst_src1_type(devinfo, inst) : 0; 871 872 ERROR_IF(type_sz(dst_type) == 1 && 873 (type_sz(src0_type) == 8 || 874 (num_sources > 1 && type_sz(src1_type) == 8)), 875 "There are no direct conversions between 64-bit types and B/UB"); 876 877 ERROR_IF(type_sz(dst_type) == 8 && 878 (type_sz(src0_type) == 1 || 879 (num_sources > 1 && type_sz(src1_type) == 1)), 880 "There are no direct conversions between 64-bit types and B/UB"); 881 } 882 883 if (is_half_float_conversion(isa, inst)) { 884 /** 885 * A helper to validate used in the validation of the following restriction 886 * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV: 887 * 888 * "There is no direct conversion from HF to DF or DF to HF. 889 * There is no direct conversion from HF to Q/UQ or Q/UQ to HF." 890 * 891 * Even if these restrictions are listed for the MOV instruction, we 892 * validate this more generally, since there is the possibility 893 * of implicit conversions from other instructions, such us implicit 894 * conversion from integer to HF with the ADD instruction in SKL+. 895 */ 896 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 897 enum brw_reg_type src1_type = num_sources > 1 ? 898 brw_inst_src1_type(devinfo, inst) : 0; 899 ERROR_IF(dst_type == BRW_REGISTER_TYPE_HF && 900 (type_sz(src0_type) == 8 || 901 (num_sources > 1 && type_sz(src1_type) == 8)), 902 "There are no direct conversions between 64-bit types and HF"); 903 904 ERROR_IF(type_sz(dst_type) == 8 && 905 (src0_type == BRW_REGISTER_TYPE_HF || 906 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)), 907 "There are no direct conversions between 64-bit types and HF"); 908 909 /* From the BDW+ PRM: 910 * 911 * "Conversion between Integer and HF (Half Float) must be 912 * DWord-aligned and strided by a DWord on the destination." 913 * 914 * Also, the above restrictions seems to be expanded on CHV and SKL+ by: 915 * 916 * "There is a relaxed alignment rule for word destinations. When 917 * the destination type is word (UW, W, HF), destination data types 918 * can be aligned to either the lowest word or the second lowest 919 * word of the execution channel. This means the destination data 920 * words can be either all in the even word locations or all in the 921 * odd word locations." 922 * 923 * We do not implement the second rule as is though, since empirical 924 * testing shows inconsistencies: 925 * - It suggests that packed 16-bit is not allowed, which is not true. 926 * - It suggests that conversions from Q/DF to W (which need to be 927 * 64-bit aligned on the destination) are not possible, which is 928 * not true. 929 * 930 * So from this rule we only validate the implication that conversions 931 * from F to HF need to be DWord strided (except in Align1 mixed 932 * float mode where packed fp16 destination is allowed so long as the 933 * destination is oword-aligned). 934 * 935 * Finally, we only validate this for Align1 because Align16 always 936 * requires packed destinations, so these restrictions can't possibly 937 * apply to Align16 mode. 938 */ 939 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1) { 940 if ((dst_type == BRW_REGISTER_TYPE_HF && 941 (brw_reg_type_is_integer(src0_type) || 942 (num_sources > 1 && brw_reg_type_is_integer(src1_type)))) || 943 (brw_reg_type_is_integer(dst_type) && 944 (src0_type == BRW_REGISTER_TYPE_HF || 945 (num_sources > 1 && src1_type == BRW_REGISTER_TYPE_HF)))) { 946 ERROR_IF(dst_stride * dst_type_size != 4, 947 "Conversions between integer and half-float must be " 948 "strided by a DWord on the destination"); 949 950 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 951 ERROR_IF(subreg % 4 != 0, 952 "Conversions between integer and half-float must be " 953 "aligned to a DWord on the destination"); 954 } else if ((devinfo->platform == INTEL_PLATFORM_CHV || 955 devinfo->ver >= 9) && 956 dst_type == BRW_REGISTER_TYPE_HF) { 957 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 958 ERROR_IF(dst_stride != 2 && 959 !(is_mixed_float(isa, inst) && 960 dst_stride == 1 && subreg % 16 == 0), 961 "Conversions to HF must have either all words in even " 962 "word locations or all words in odd word locations or " 963 "be mixed-float with Oword-aligned packed destination"); 964 } 965 } 966 } 967 968 /* There are special regioning rules for mixed-float mode in CHV and SKL that 969 * override the general rule for the ratio of sizes of the destination type 970 * and the execution type. We will add validation for those in a later patch. 971 */ 972 bool validate_dst_size_and_exec_size_ratio = 973 !is_mixed_float(isa, inst) || 974 !(devinfo->platform == INTEL_PLATFORM_CHV || devinfo->ver >= 9); 975 976 if (validate_dst_size_and_exec_size_ratio && 977 exec_type_size > dst_type_size) { 978 if (!(dst_type_is_byte && inst_is_raw_move(isa, inst))) { 979 ERROR_IF(dst_stride * dst_type_size != exec_type_size, 980 "Destination stride must be equal to the ratio of the sizes " 981 "of the execution data type to the destination type"); 982 } 983 984 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 985 986 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 987 brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) { 988 /* The i965 PRM says: 989 * 990 * Implementation Restriction: The relaxed alignment rule for byte 991 * destination (#10.5) is not supported. 992 */ 993 if (devinfo->verx10 >= 45 && dst_type_is_byte) { 994 ERROR_IF(subreg % exec_type_size != 0 && 995 subreg % exec_type_size != 1, 996 "Destination subreg must be aligned to the size of the " 997 "execution data type (or to the next lowest byte for byte " 998 "destinations)"); 999 } else { 1000 ERROR_IF(subreg % exec_type_size != 0, 1001 "Destination subreg must be aligned to the size of the " 1002 "execution data type"); 1003 } 1004 } 1005 } 1006 1007 return error_msg; 1008} 1009 1010/** 1011 * Checks restrictions listed in "General Restrictions on Regioning Parameters" 1012 * in the "Register Region Restrictions" section. 1013 */ 1014static struct string 1015general_restrictions_on_region_parameters(const struct brw_isa_info *isa, 1016 const brw_inst *inst) 1017{ 1018 const struct intel_device_info *devinfo = isa->devinfo; 1019 1020 const struct opcode_desc *desc = 1021 brw_opcode_desc(isa, brw_inst_opcode(isa, inst)); 1022 unsigned num_sources = num_sources_from_inst(isa, inst); 1023 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1024 struct string error_msg = { .str = NULL, .len = 0 }; 1025 1026 if (num_sources == 3) 1027 return (struct string){}; 1028 1029 /* Split sends don't have the bits in the instruction to encode regions so 1030 * there's nothing to check. 1031 */ 1032 if (inst_is_split_send(isa, inst)) 1033 return (struct string){}; 1034 1035 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) { 1036 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) 1037 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1, 1038 "Destination Horizontal Stride must be 1"); 1039 1040 if (num_sources >= 1) { 1041 if (devinfo->verx10 >= 75) { 1042 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 1043 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 1044 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 1045 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1046 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 1047 } else { 1048 ERROR_IF(brw_inst_src0_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 1049 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 1050 brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1051 "In Align16 mode, only VertStride of 0 or 4 is allowed"); 1052 } 1053 } 1054 1055 if (num_sources == 2) { 1056 if (devinfo->verx10 >= 75) { 1057 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 1058 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 1059 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_2 && 1060 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1061 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed"); 1062 } else { 1063 ERROR_IF(brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE && 1064 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_0 && 1065 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1066 "In Align16 mode, only VertStride of 0 or 4 is allowed"); 1067 } 1068 } 1069 1070 return error_msg; 1071 } 1072 1073 for (unsigned i = 0; i < num_sources; i++) { 1074 unsigned vstride, width, hstride, element_size, subreg; 1075 enum brw_reg_type type; 1076 1077#define DO_SRC(n) \ 1078 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1079 BRW_IMMEDIATE_VALUE) \ 1080 continue; \ 1081 \ 1082 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1083 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1084 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1085 type = brw_inst_src ## n ## _type(devinfo, inst); \ 1086 element_size = brw_reg_type_to_size(type); \ 1087 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst) 1088 1089 if (i == 0) { 1090 DO_SRC(0); 1091 } else { 1092 DO_SRC(1); 1093 } 1094#undef DO_SRC 1095 1096 /* On IVB/BYT, region parameters and execution size for DF are in terms of 1097 * 32-bit elements, so they are doubled. For evaluating the validity of an 1098 * instruction, we halve them. 1099 */ 1100 if (devinfo->verx10 == 70 && 1101 element_size == 8) 1102 element_size = 4; 1103 1104 /* ExecSize must be greater than or equal to Width. */ 1105 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal " 1106 "to Width"); 1107 1108 /* If ExecSize = Width and HorzStride ≠ 0, 1109 * VertStride must be set to Width * HorzStride. 1110 */ 1111 if (exec_size == width && hstride != 0) { 1112 ERROR_IF(vstride != width * hstride, 1113 "If ExecSize = Width and HorzStride ≠ 0, " 1114 "VertStride must be set to Width * HorzStride"); 1115 } 1116 1117 /* If Width = 1, HorzStride must be 0 regardless of the values of 1118 * ExecSize and VertStride. 1119 */ 1120 if (width == 1) { 1121 ERROR_IF(hstride != 0, 1122 "If Width = 1, HorzStride must be 0 regardless " 1123 "of the values of ExecSize and VertStride"); 1124 } 1125 1126 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */ 1127 if (exec_size == 1 && width == 1) { 1128 ERROR_IF(vstride != 0 || hstride != 0, 1129 "If ExecSize = Width = 1, both VertStride " 1130 "and HorzStride must be 0"); 1131 } 1132 1133 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the 1134 * value of ExecSize. 1135 */ 1136 if (vstride == 0 && hstride == 0) { 1137 ERROR_IF(width != 1, 1138 "If VertStride = HorzStride = 0, Width must be " 1139 "1 regardless of the value of ExecSize"); 1140 } 1141 1142 /* VertStride must be used to cross GRF register boundaries. This rule 1143 * implies that elements within a 'Width' cannot cross GRF boundaries. 1144 */ 1145 const uint64_t mask = (1ULL << element_size) - 1; 1146 unsigned rowbase = subreg; 1147 1148 for (int y = 0; y < exec_size / width; y++) { 1149 uint64_t access_mask = 0; 1150 unsigned offset = rowbase; 1151 1152 for (int x = 0; x < width; x++) { 1153 access_mask |= mask << (offset % 64); 1154 offset += hstride * element_size; 1155 } 1156 1157 rowbase += vstride * element_size; 1158 1159 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) { 1160 ERROR("VertStride must be used to cross GRF register boundaries"); 1161 break; 1162 } 1163 } 1164 } 1165 1166 /* Dst.HorzStride must not be 0. */ 1167 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) { 1168 ERROR_IF(brw_inst_dst_hstride(devinfo, inst) == BRW_HORIZONTAL_STRIDE_0, 1169 "Destination Horizontal Stride must not be 0"); 1170 } 1171 1172 return error_msg; 1173} 1174 1175static struct string 1176special_restrictions_for_mixed_float_mode(const struct brw_isa_info *isa, 1177 const brw_inst *inst) 1178{ 1179 const struct intel_device_info *devinfo = isa->devinfo; 1180 1181 struct string error_msg = { .str = NULL, .len = 0 }; 1182 1183 const unsigned opcode = brw_inst_opcode(isa, inst); 1184 const unsigned num_sources = num_sources_from_inst(isa, inst); 1185 if (num_sources >= 3) 1186 return error_msg; 1187 1188 if (!is_mixed_float(isa, inst)) 1189 return error_msg; 1190 1191 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1192 bool is_align16 = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16; 1193 1194 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 1195 enum brw_reg_type src1_type = num_sources > 1 ? 1196 brw_inst_src1_type(devinfo, inst) : 0; 1197 enum brw_reg_type dst_type = brw_inst_dst_type(devinfo, inst); 1198 1199 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1200 bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride); 1201 1202 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1203 * Float Operations: 1204 * 1205 * "Indirect addressing on source is not supported when source and 1206 * destination data types are mixed float." 1207 */ 1208 ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT || 1209 (num_sources > 1 && 1210 brw_inst_src1_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT), 1211 "Indirect addressing on source is not supported when source and " 1212 "destination data types are mixed float"); 1213 1214 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1215 * Float Operations: 1216 * 1217 * "No SIMD16 in mixed mode when destination is f32. Instruction 1218 * execution size must be no more than 8." 1219 */ 1220 ERROR_IF(exec_size > 8 && dst_type == BRW_REGISTER_TYPE_F, 1221 "Mixed float mode with 32-bit float destination is limited " 1222 "to SIMD8"); 1223 1224 if (is_align16) { 1225 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1226 * Float Operations: 1227 * 1228 * "In Align16 mode, when half float and float data types are mixed 1229 * between source operands OR between source and destination operands, 1230 * the register content are assumed to be packed." 1231 * 1232 * Since Align16 doesn't have a concept of horizontal stride (or width), 1233 * it means that vertical stride must always be 4, since 0 and 2 would 1234 * lead to replicated data, and any other value is disallowed in Align16. 1235 */ 1236 ERROR_IF(brw_inst_src0_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1237 "Align16 mixed float mode assumes packed data (vstride must be 4"); 1238 1239 ERROR_IF(num_sources >= 2 && 1240 brw_inst_src1_vstride(devinfo, inst) != BRW_VERTICAL_STRIDE_4, 1241 "Align16 mixed float mode assumes packed data (vstride must be 4"); 1242 1243 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1244 * Float Operations: 1245 * 1246 * "For Align16 mixed mode, both input and output packed f16 data 1247 * must be oword aligned, no oword crossing in packed f16." 1248 * 1249 * The previous rule requires that Align16 operands are always packed, 1250 * and since there is only one bit for Align16 subnr, which represents 1251 * offsets 0B and 16B, this rule is always enforced and we don't need to 1252 * validate it. 1253 */ 1254 1255 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1256 * Float Operations: 1257 * 1258 * "No SIMD16 in mixed mode when destination is packed f16 for both 1259 * Align1 and Align16." 1260 * 1261 * And: 1262 * 1263 * "In Align16 mode, when half float and float data types are mixed 1264 * between source operands OR between source and destination operands, 1265 * the register content are assumed to be packed." 1266 * 1267 * Which implies that SIMD16 is not available in Align16. This is further 1268 * confirmed by: 1269 * 1270 * "For Align16 mixed mode, both input and output packed f16 data 1271 * must be oword aligned, no oword crossing in packed f16" 1272 * 1273 * Since oword-aligned packed f16 data would cross oword boundaries when 1274 * the execution size is larger than 8. 1275 */ 1276 ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8"); 1277 1278 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1279 * Float Operations: 1280 * 1281 * "No accumulator read access for Align16 mixed float." 1282 */ 1283 ERROR_IF(inst_uses_src_acc(isa, inst), 1284 "No accumulator read access for Align16 mixed float"); 1285 } else { 1286 assert(!is_align16); 1287 1288 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1289 * Float Operations: 1290 * 1291 * "No SIMD16 in mixed mode when destination is packed f16 for both 1292 * Align1 and Align16." 1293 */ 1294 ERROR_IF(exec_size > 8 && dst_is_packed && 1295 dst_type == BRW_REGISTER_TYPE_HF, 1296 "Align1 mixed float mode is limited to SIMD8 when destination " 1297 "is packed half-float"); 1298 1299 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1300 * Float Operations: 1301 * 1302 * "Math operations for mixed mode: 1303 * - In Align1, f16 inputs need to be strided" 1304 */ 1305 if (opcode == BRW_OPCODE_MATH) { 1306 if (src0_type == BRW_REGISTER_TYPE_HF) { 1307 ERROR_IF(STRIDE(brw_inst_src0_hstride(devinfo, inst)) <= 1, 1308 "Align1 mixed mode math needs strided half-float inputs"); 1309 } 1310 1311 if (num_sources >= 2 && src1_type == BRW_REGISTER_TYPE_HF) { 1312 ERROR_IF(STRIDE(brw_inst_src1_hstride(devinfo, inst)) <= 1, 1313 "Align1 mixed mode math needs strided half-float inputs"); 1314 } 1315 } 1316 1317 if (dst_type == BRW_REGISTER_TYPE_HF && dst_stride == 1) { 1318 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1319 * Float Operations: 1320 * 1321 * "In Align1, destination stride can be smaller than execution 1322 * type. When destination is stride of 1, 16 bit packed data is 1323 * updated on the destination. However, output packed f16 data 1324 * must be oword aligned, no oword crossing in packed f16." 1325 * 1326 * The requirement of not crossing oword boundaries for 16-bit oword 1327 * aligned data means that execution size is limited to 8. 1328 */ 1329 unsigned subreg; 1330 if (brw_inst_dst_address_mode(devinfo, inst) == BRW_ADDRESS_DIRECT) 1331 subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1332 else 1333 subreg = brw_inst_dst_ia_subreg_nr(devinfo, inst); 1334 ERROR_IF(subreg % 16 != 0, 1335 "Align1 mixed mode packed half-float output must be " 1336 "oword aligned"); 1337 ERROR_IF(exec_size > 8, 1338 "Align1 mixed mode packed half-float output must not " 1339 "cross oword boundaries (max exec size is 8)"); 1340 1341 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1342 * Float Operations: 1343 * 1344 * "When source is float or half float from accumulator register and 1345 * destination is half float with a stride of 1, the source must 1346 * register aligned. i.e., source must have offset zero." 1347 * 1348 * Align16 mixed float mode doesn't allow accumulator access on sources, 1349 * so we only need to check this for Align1. 1350 */ 1351 if (src0_is_acc(devinfo, inst) && 1352 (src0_type == BRW_REGISTER_TYPE_F || 1353 src0_type == BRW_REGISTER_TYPE_HF)) { 1354 ERROR_IF(brw_inst_src0_da1_subreg_nr(devinfo, inst) != 0, 1355 "Mixed float mode requires register-aligned accumulator " 1356 "source reads when destination is packed half-float"); 1357 1358 } 1359 1360 if (num_sources > 1 && 1361 src1_is_acc(devinfo, inst) && 1362 (src1_type == BRW_REGISTER_TYPE_F || 1363 src1_type == BRW_REGISTER_TYPE_HF)) { 1364 ERROR_IF(brw_inst_src1_da1_subreg_nr(devinfo, inst) != 0, 1365 "Mixed float mode requires register-aligned accumulator " 1366 "source reads when destination is packed half-float"); 1367 } 1368 } 1369 1370 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode 1371 * Float Operations: 1372 * 1373 * "No swizzle is allowed when an accumulator is used as an implicit 1374 * source or an explicit source in an instruction. i.e. when 1375 * destination is half float with an implicit accumulator source, 1376 * destination stride needs to be 2." 1377 * 1378 * FIXME: it is not quite clear what the first sentence actually means 1379 * or its link to the implication described after it, so we only 1380 * validate the explicit implication, which is clearly described. 1381 */ 1382 if (dst_type == BRW_REGISTER_TYPE_HF && 1383 inst_uses_src_acc(isa, inst)) { 1384 ERROR_IF(dst_stride != 2, 1385 "Mixed float mode with implicit/explicit accumulator " 1386 "source and half-float destination requires a stride " 1387 "of 2 on the destination"); 1388 } 1389 } 1390 1391 return error_msg; 1392} 1393 1394/** 1395 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region 1396 * 1397 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is 1398 * a bitmask of bytes accessed by the region. 1399 * 1400 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4 1401 * instruction would be 1402 * 1403 * access_mask[0] = 0x00000000000000F0 1404 * access_mask[1] = 0x000000000000F000 1405 * access_mask[2] = 0x0000000000F00000 1406 * access_mask[3] = 0x00000000F0000000 1407 * access_mask[4-31] = 0 1408 * 1409 * because the first execution channel accesses bytes 7-4 and the second 1410 * execution channel accesses bytes 15-12, etc. 1411 */ 1412static void 1413align1_access_mask(uint64_t access_mask[static 32], 1414 unsigned exec_size, unsigned element_size, unsigned subreg, 1415 unsigned vstride, unsigned width, unsigned hstride) 1416{ 1417 const uint64_t mask = (1ULL << element_size) - 1; 1418 unsigned rowbase = subreg; 1419 unsigned element = 0; 1420 1421 for (int y = 0; y < exec_size / width; y++) { 1422 unsigned offset = rowbase; 1423 1424 for (int x = 0; x < width; x++) { 1425 access_mask[element++] = mask << (offset % 64); 1426 offset += hstride * element_size; 1427 } 1428 1429 rowbase += vstride * element_size; 1430 } 1431 1432 assert(element == 0 || element == exec_size); 1433} 1434 1435/** 1436 * Returns the number of registers accessed according to the \p access_mask 1437 */ 1438static int 1439registers_read(const uint64_t access_mask[static 32]) 1440{ 1441 int regs_read = 0; 1442 1443 for (unsigned i = 0; i < 32; i++) { 1444 if (access_mask[i] > 0xFFFFFFFF) { 1445 return 2; 1446 } else if (access_mask[i]) { 1447 regs_read = 1; 1448 } 1449 } 1450 1451 return regs_read; 1452} 1453 1454/** 1455 * Checks restrictions listed in "Region Alignment Rules" in the "Register 1456 * Region Restrictions" section. 1457 */ 1458static struct string 1459region_alignment_rules(const struct brw_isa_info *isa, 1460 const brw_inst *inst) 1461{ 1462 const struct intel_device_info *devinfo = isa->devinfo; 1463 const struct opcode_desc *desc = 1464 brw_opcode_desc(isa, brw_inst_opcode(isa, inst)); 1465 unsigned num_sources = num_sources_from_inst(isa, inst); 1466 unsigned exec_size = 1 << brw_inst_exec_size(devinfo, inst); 1467 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32]; 1468 struct string error_msg = { .str = NULL, .len = 0 }; 1469 1470 if (num_sources == 3) 1471 return (struct string){}; 1472 1473 if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) 1474 return (struct string){}; 1475 1476 if (inst_is_send(isa, inst)) 1477 return (struct string){}; 1478 1479 memset(dst_access_mask, 0, sizeof(dst_access_mask)); 1480 memset(src0_access_mask, 0, sizeof(src0_access_mask)); 1481 memset(src1_access_mask, 0, sizeof(src1_access_mask)); 1482 1483 for (unsigned i = 0; i < num_sources; i++) { 1484 unsigned vstride, width, hstride, element_size, subreg; 1485 enum brw_reg_type type; 1486 1487 /* In Direct Addressing mode, a source cannot span more than 2 adjacent 1488 * GRF registers. 1489 */ 1490 1491#define DO_SRC(n) \ 1492 if (brw_inst_src ## n ## _address_mode(devinfo, inst) != \ 1493 BRW_ADDRESS_DIRECT) \ 1494 continue; \ 1495 \ 1496 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1497 BRW_IMMEDIATE_VALUE) \ 1498 continue; \ 1499 \ 1500 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1501 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1502 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1503 type = brw_inst_src ## n ## _type(devinfo, inst); \ 1504 element_size = brw_reg_type_to_size(type); \ 1505 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1506 align1_access_mask(src ## n ## _access_mask, \ 1507 exec_size, element_size, subreg, \ 1508 vstride, width, hstride) 1509 1510 if (i == 0) { 1511 DO_SRC(0); 1512 } else { 1513 DO_SRC(1); 1514 } 1515#undef DO_SRC 1516 1517 unsigned num_vstride = exec_size / width; 1518 unsigned num_hstride = width; 1519 unsigned vstride_elements = (num_vstride - 1) * vstride; 1520 unsigned hstride_elements = (num_hstride - 1) * hstride; 1521 unsigned offset = (vstride_elements + hstride_elements) * element_size + 1522 subreg; 1523 ERROR_IF(offset >= 64, 1524 "A source cannot span more than 2 adjacent GRF registers"); 1525 } 1526 1527 if (desc->ndst == 0 || dst_is_null(devinfo, inst)) 1528 return error_msg; 1529 1530 unsigned stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1531 enum brw_reg_type dst_type = inst_dst_type(isa, inst); 1532 unsigned element_size = brw_reg_type_to_size(dst_type); 1533 unsigned subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1534 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg; 1535 ERROR_IF(offset >= 64, 1536 "A destination cannot span more than 2 adjacent GRF registers"); 1537 1538 if (error_msg.str) 1539 return error_msg; 1540 1541 /* On IVB/BYT, region parameters and execution size for DF are in terms of 1542 * 32-bit elements, so they are doubled. For evaluating the validity of an 1543 * instruction, we halve them. 1544 */ 1545 if (devinfo->verx10 == 70 && 1546 element_size == 8) 1547 element_size = 4; 1548 1549 align1_access_mask(dst_access_mask, exec_size, element_size, subreg, 1550 exec_size == 1 ? 0 : exec_size * stride, 1551 exec_size == 1 ? 1 : exec_size, 1552 exec_size == 1 ? 0 : stride); 1553 1554 unsigned dst_regs = registers_read(dst_access_mask); 1555 unsigned src0_regs = registers_read(src0_access_mask); 1556 unsigned src1_regs = registers_read(src1_access_mask); 1557 1558 /* The SNB, IVB, HSW, BDW, and CHV PRMs say: 1559 * 1560 * When an instruction has a source region spanning two registers and a 1561 * destination region contained in one register, the number of elements 1562 * must be the same between two sources and one of the following must be 1563 * true: 1564 * 1565 * 1. The destination region is entirely contained in the lower OWord 1566 * of a register. 1567 * 2. The destination region is entirely contained in the upper OWord 1568 * of a register. 1569 * 3. The destination elements are evenly split between the two OWords 1570 * of a register. 1571 */ 1572 if (devinfo->ver <= 8) { 1573 if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) { 1574 unsigned upper_oword_writes = 0, lower_oword_writes = 0; 1575 1576 for (unsigned i = 0; i < exec_size; i++) { 1577 if (dst_access_mask[i] > 0x0000FFFF) { 1578 upper_oword_writes++; 1579 } else { 1580 assert(dst_access_mask[i] != 0); 1581 lower_oword_writes++; 1582 } 1583 } 1584 1585 ERROR_IF(lower_oword_writes != 0 && 1586 upper_oword_writes != 0 && 1587 upper_oword_writes != lower_oword_writes, 1588 "Writes must be to only one OWord or " 1589 "evenly split between OWords"); 1590 } 1591 } 1592 1593 /* The IVB and HSW PRMs say: 1594 * 1595 * When an instruction has a source region that spans two registers and 1596 * the destination spans two registers, the destination elements must be 1597 * evenly split between the two registers [...] 1598 * 1599 * The SNB PRM contains similar wording (but written in a much more 1600 * confusing manner). 1601 * 1602 * The BDW PRM says: 1603 * 1604 * When destination spans two registers, the source may be one or two 1605 * registers. The destination elements must be evenly split between the 1606 * two registers. 1607 * 1608 * The SKL PRM says: 1609 * 1610 * When destination of MATH instruction spans two registers, the 1611 * destination elements must be evenly split between the two registers. 1612 * 1613 * It is not known whether this restriction applies to KBL other Gens after 1614 * SKL. 1615 */ 1616 if (devinfo->ver <= 8 || 1617 brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) { 1618 1619 /* Nothing explicitly states that on Gen < 8 elements must be evenly 1620 * split between two destination registers in the two exceptional 1621 * source-region-spans-one-register cases, but since Broadwell requires 1622 * evenly split writes regardless of source region, we assume that it was 1623 * an oversight and require it. 1624 */ 1625 if (dst_regs == 2) { 1626 unsigned upper_reg_writes = 0, lower_reg_writes = 0; 1627 1628 for (unsigned i = 0; i < exec_size; i++) { 1629 if (dst_access_mask[i] > 0xFFFFFFFF) { 1630 upper_reg_writes++; 1631 } else { 1632 assert(dst_access_mask[i] != 0); 1633 lower_reg_writes++; 1634 } 1635 } 1636 1637 ERROR_IF(upper_reg_writes != lower_reg_writes, 1638 "Writes must be evenly split between the two " 1639 "destination registers"); 1640 } 1641 } 1642 1643 /* The IVB and HSW PRMs say: 1644 * 1645 * When an instruction has a source region that spans two registers and 1646 * the destination spans two registers, the destination elements must be 1647 * evenly split between the two registers and each destination register 1648 * must be entirely derived from one source register. 1649 * 1650 * Note: In such cases, the regioning parameters must ensure that the 1651 * offset from the two source registers is the same. 1652 * 1653 * The SNB PRM contains similar wording (but written in a much more 1654 * confusing manner). 1655 * 1656 * There are effectively three rules stated here: 1657 * 1658 * For an instruction with a source and a destination spanning two 1659 * registers, 1660 * 1661 * (1) destination elements must be evenly split between the two 1662 * registers 1663 * (2) all destination elements in a register must be derived 1664 * from one source register 1665 * (3) the offset (i.e. the starting location in each of the two 1666 * registers spanned by a region) must be the same in the two 1667 * registers spanned by a region 1668 * 1669 * It is impossible to violate rule (1) without violating (2) or (3), so we 1670 * do not attempt to validate it. 1671 */ 1672 if (devinfo->ver <= 7 && dst_regs == 2) { 1673 for (unsigned i = 0; i < num_sources; i++) { 1674#define DO_SRC(n) \ 1675 if (src ## n ## _regs <= 1) \ 1676 continue; \ 1677 \ 1678 for (unsigned i = 0; i < exec_size; i++) { \ 1679 if ((dst_access_mask[i] > 0xFFFFFFFF) != \ 1680 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \ 1681 ERROR("Each destination register must be entirely derived " \ 1682 "from one source register"); \ 1683 break; \ 1684 } \ 1685 } \ 1686 \ 1687 unsigned offset_0 = \ 1688 brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1689 unsigned offset_1 = offset_0; \ 1690 \ 1691 for (unsigned i = 0; i < exec_size; i++) { \ 1692 if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \ 1693 offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \ 1694 break; \ 1695 } \ 1696 } \ 1697 \ 1698 ERROR_IF(num_sources == 2 && offset_0 != offset_1, \ 1699 "The offset from the two source registers " \ 1700 "must be the same") 1701 1702 if (i == 0) { 1703 DO_SRC(0); 1704 } else { 1705 DO_SRC(1); 1706 } 1707#undef DO_SRC 1708 } 1709 } 1710 1711 /* The IVB and HSW PRMs say: 1712 * 1713 * When destination spans two registers, the source MUST span two 1714 * registers. The exception to the above rule: 1715 * 1. When source is scalar, the source registers are not 1716 * incremented. 1717 * 2. When source is packed integer Word and destination is packed 1718 * integer DWord, the source register is not incremented by the 1719 * source sub register is incremented. 1720 * 1721 * The SNB PRM does not contain this rule, but the internal documentation 1722 * indicates that it applies to SNB as well. We assume that the rule applies 1723 * to Gen <= 5 although their PRMs do not state it. 1724 * 1725 * While the documentation explicitly says in exception (2) that the 1726 * destination must be an integer DWord, the hardware allows at least a 1727 * float destination type as well. We emit such instructions from 1728 * 1729 * fs_visitor::emit_interpolation_setup_gfx6 1730 * fs_visitor::emit_fragcoord_interpolation 1731 * 1732 * and have for years with no ill effects. 1733 * 1734 * Additionally the simulator source code indicates that the real condition 1735 * is that the size of the destination type is 4 bytes. 1736 */ 1737 if (devinfo->ver <= 7 && dst_regs == 2) { 1738 enum brw_reg_type dst_type = inst_dst_type(isa, inst); 1739 bool dst_is_packed_dword = 1740 is_packed(exec_size * stride, exec_size, stride) && 1741 brw_reg_type_to_size(dst_type) == 4; 1742 1743 for (unsigned i = 0; i < num_sources; i++) { 1744#define DO_SRC(n) \ 1745 unsigned vstride, width, hstride; \ 1746 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1747 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1748 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1749 bool src ## n ## _is_packed_word = \ 1750 is_packed(vstride, width, hstride) && \ 1751 (brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_W || \ 1752 brw_inst_src ## n ## _type(devinfo, inst) == BRW_REGISTER_TYPE_UW); \ 1753 \ 1754 ERROR_IF(src ## n ## _regs == 1 && \ 1755 !src ## n ## _has_scalar_region(devinfo, inst) && \ 1756 !(dst_is_packed_dword && src ## n ## _is_packed_word), \ 1757 "When the destination spans two registers, the source must " \ 1758 "span two registers\n" ERROR_INDENT "(exceptions for scalar " \ 1759 "source and packed-word to packed-dword expansion)") 1760 1761 if (i == 0) { 1762 DO_SRC(0); 1763 } else { 1764 DO_SRC(1); 1765 } 1766#undef DO_SRC 1767 } 1768 } 1769 1770 return error_msg; 1771} 1772 1773static struct string 1774vector_immediate_restrictions(const struct brw_isa_info *isa, 1775 const brw_inst *inst) 1776{ 1777 const struct intel_device_info *devinfo = isa->devinfo; 1778 1779 unsigned num_sources = num_sources_from_inst(isa, inst); 1780 struct string error_msg = { .str = NULL, .len = 0 }; 1781 1782 if (num_sources == 3 || num_sources == 0) 1783 return (struct string){}; 1784 1785 unsigned file = num_sources == 1 ? 1786 brw_inst_src0_reg_file(devinfo, inst) : 1787 brw_inst_src1_reg_file(devinfo, inst); 1788 if (file != BRW_IMMEDIATE_VALUE) 1789 return (struct string){}; 1790 1791 enum brw_reg_type dst_type = inst_dst_type(isa, inst); 1792 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1793 unsigned dst_subreg = brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 ? 1794 brw_inst_dst_da1_subreg_nr(devinfo, inst) : 0; 1795 unsigned dst_stride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1796 enum brw_reg_type type = num_sources == 1 ? 1797 brw_inst_src0_type(devinfo, inst) : 1798 brw_inst_src1_type(devinfo, inst); 1799 1800 /* The PRMs say: 1801 * 1802 * When an immediate vector is used in an instruction, the destination 1803 * must be 128-bit aligned with destination horizontal stride equivalent 1804 * to a word for an immediate integer vector (v) and equivalent to a 1805 * DWord for an immediate float vector (vf). 1806 * 1807 * The text has not been updated for the addition of the immediate unsigned 1808 * integer vector type (uv) on SNB, but presumably the same restriction 1809 * applies. 1810 */ 1811 switch (type) { 1812 case BRW_REGISTER_TYPE_V: 1813 case BRW_REGISTER_TYPE_UV: 1814 case BRW_REGISTER_TYPE_VF: 1815 ERROR_IF(dst_subreg % (128 / 8) != 0, 1816 "Destination must be 128-bit aligned in order to use immediate " 1817 "vector types"); 1818 1819 if (type == BRW_REGISTER_TYPE_VF) { 1820 ERROR_IF(dst_type_size * dst_stride != 4, 1821 "Destination must have stride equivalent to dword in order " 1822 "to use the VF type"); 1823 } else { 1824 ERROR_IF(dst_type_size * dst_stride != 2, 1825 "Destination must have stride equivalent to word in order " 1826 "to use the V or UV type"); 1827 } 1828 break; 1829 default: 1830 break; 1831 } 1832 1833 return error_msg; 1834} 1835 1836static struct string 1837special_requirements_for_handling_double_precision_data_types( 1838 const struct brw_isa_info *isa, 1839 const brw_inst *inst) 1840{ 1841 const struct intel_device_info *devinfo = isa->devinfo; 1842 1843 unsigned num_sources = num_sources_from_inst(isa, inst); 1844 struct string error_msg = { .str = NULL, .len = 0 }; 1845 1846 if (num_sources == 3 || num_sources == 0) 1847 return (struct string){}; 1848 1849 /* Split sends don't have types so there's no doubles there. */ 1850 if (inst_is_split_send(isa, inst)) 1851 return (struct string){}; 1852 1853 enum brw_reg_type exec_type = execution_type(isa, inst); 1854 unsigned exec_type_size = brw_reg_type_to_size(exec_type); 1855 1856 enum brw_reg_file dst_file = brw_inst_dst_reg_file(devinfo, inst); 1857 enum brw_reg_type dst_type = inst_dst_type(isa, inst); 1858 unsigned dst_type_size = brw_reg_type_to_size(dst_type); 1859 unsigned dst_hstride = STRIDE(brw_inst_dst_hstride(devinfo, inst)); 1860 unsigned dst_reg = brw_inst_dst_da_reg_nr(devinfo, inst); 1861 unsigned dst_subreg = brw_inst_dst_da1_subreg_nr(devinfo, inst); 1862 unsigned dst_address_mode = brw_inst_dst_address_mode(devinfo, inst); 1863 1864 bool is_integer_dword_multiply = 1865 devinfo->ver >= 8 && 1866 brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL && 1867 (brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1868 brw_inst_src0_type(devinfo, inst) == BRW_REGISTER_TYPE_UD) && 1869 (brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_D || 1870 brw_inst_src1_type(devinfo, inst) == BRW_REGISTER_TYPE_UD); 1871 1872 const bool is_double_precision = 1873 dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply; 1874 1875 for (unsigned i = 0; i < num_sources; i++) { 1876 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode; 1877 bool is_scalar_region; 1878 enum brw_reg_file file; 1879 enum brw_reg_type type; 1880 1881#define DO_SRC(n) \ 1882 if (brw_inst_src ## n ## _reg_file(devinfo, inst) == \ 1883 BRW_IMMEDIATE_VALUE) \ 1884 continue; \ 1885 \ 1886 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \ 1887 vstride = STRIDE(brw_inst_src ## n ## _vstride(devinfo, inst)); \ 1888 width = WIDTH(brw_inst_src ## n ## _width(devinfo, inst)); \ 1889 hstride = STRIDE(brw_inst_src ## n ## _hstride(devinfo, inst)); \ 1890 file = brw_inst_src ## n ## _reg_file(devinfo, inst); \ 1891 type = brw_inst_src ## n ## _type(devinfo, inst); \ 1892 type_size = brw_reg_type_to_size(type); \ 1893 reg = brw_inst_src ## n ## _da_reg_nr(devinfo, inst); \ 1894 subreg = brw_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \ 1895 address_mode = brw_inst_src ## n ## _address_mode(devinfo, inst) 1896 1897 if (i == 0) { 1898 DO_SRC(0); 1899 } else { 1900 DO_SRC(1); 1901 } 1902#undef DO_SRC 1903 1904 const unsigned src_stride = (hstride ? hstride : vstride) * type_size; 1905 const unsigned dst_stride = dst_hstride * dst_type_size; 1906 1907 /* The PRMs say that for CHV, BXT: 1908 * 1909 * When source or destination datatype is 64b or operation is integer 1910 * DWord multiply, regioning in Align1 must follow these rules: 1911 * 1912 * 1. Source and Destination horizontal stride must be aligned to the 1913 * same qword. 1914 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride. 1915 * 3. Source and Destination offset must be the same, except the case 1916 * of scalar source. 1917 * 1918 * We assume that the restriction applies to GLK as well. 1919 */ 1920 if (is_double_precision && 1921 brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_1 && 1922 (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) { 1923 ERROR_IF(!is_scalar_region && 1924 (src_stride % 8 != 0 || 1925 dst_stride % 8 != 0 || 1926 src_stride != dst_stride), 1927 "Source and destination horizontal stride must equal and a " 1928 "multiple of a qword when the execution type is 64-bit"); 1929 1930 ERROR_IF(vstride != width * hstride, 1931 "Vstride must be Width * Hstride when the execution type is " 1932 "64-bit"); 1933 1934 ERROR_IF(!is_scalar_region && dst_subreg != subreg, 1935 "Source and destination offset must be the same when the " 1936 "execution type is 64-bit"); 1937 } 1938 1939 /* The PRMs say that for CHV, BXT: 1940 * 1941 * When source or destination datatype is 64b or operation is integer 1942 * DWord multiply, indirect addressing must not be used. 1943 * 1944 * We assume that the restriction applies to GLK as well. 1945 */ 1946 if (is_double_precision && 1947 (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) { 1948 ERROR_IF(BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode || 1949 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode, 1950 "Indirect addressing is not allowed when the execution type " 1951 "is 64-bit"); 1952 } 1953 1954 /* The PRMs say that for CHV, BXT: 1955 * 1956 * ARF registers must never be used with 64b datatype or when 1957 * operation is integer DWord multiply. 1958 * 1959 * We assume that the restriction applies to GLK as well. 1960 * 1961 * We assume that the restriction does not apply to the null register. 1962 */ 1963 if (is_double_precision && 1964 (devinfo->platform == INTEL_PLATFORM_CHV || 1965 intel_device_info_is_9lp(devinfo))) { 1966 ERROR_IF(brw_inst_opcode(isa, inst) == BRW_OPCODE_MAC || 1967 brw_inst_acc_wr_control(devinfo, inst) || 1968 (BRW_ARCHITECTURE_REGISTER_FILE == file && 1969 reg != BRW_ARF_NULL) || 1970 (BRW_ARCHITECTURE_REGISTER_FILE == dst_file && 1971 dst_reg != BRW_ARF_NULL), 1972 "Architecture registers cannot be used when the execution " 1973 "type is 64-bit"); 1974 } 1975 1976 /* From the hardware spec section "Register Region Restrictions": 1977 * 1978 * There are two rules: 1979 * 1980 * "In case of all floating point data types used in destination:" and 1981 * 1982 * "In case where source or destination datatype is 64b or operation is 1983 * integer DWord multiply:" 1984 * 1985 * both of which list the same restrictions: 1986 * 1987 * "1. Register Regioning patterns where register data bit location 1988 * of the LSB of the channels are changed between source and 1989 * destination are not supported on Src0 and Src1 except for 1990 * broadcast of a scalar. 1991 * 1992 * 2. Explicit ARF registers except null and accumulator must not be 1993 * used." 1994 */ 1995 if (devinfo->verx10 >= 125 && 1996 (brw_reg_type_is_floating_point(dst_type) || 1997 is_double_precision)) { 1998 ERROR_IF(!is_scalar_region && 1999 BRW_ADDRESS_REGISTER_INDIRECT_REGISTER != address_mode && 2000 (!is_linear(vstride, width, hstride) || 2001 src_stride != dst_stride || 2002 subreg != dst_subreg), 2003 "Register Regioning patterns where register data bit " 2004 "location of the LSB of the channels are changed between " 2005 "source and destination are not supported except for " 2006 "broadcast of a scalar."); 2007 2008 ERROR_IF((file == BRW_ARCHITECTURE_REGISTER_FILE && 2009 reg != BRW_ARF_NULL && !(reg >= BRW_ARF_ACCUMULATOR && reg < BRW_ARF_FLAG)) || 2010 (dst_file == BRW_ARCHITECTURE_REGISTER_FILE && 2011 dst_reg != BRW_ARF_NULL && dst_reg != BRW_ARF_ACCUMULATOR), 2012 "Explicit ARF registers except null and accumulator must not " 2013 "be used."); 2014 } 2015 2016 /* From the hardware spec section "Register Region Restrictions": 2017 * 2018 * "Vx1 and VxH indirect addressing for Float, Half-Float, Double-Float and 2019 * Quad-Word data must not be used." 2020 */ 2021 if (devinfo->verx10 >= 125 && 2022 (brw_reg_type_is_floating_point(type) || type_sz(type) == 8)) { 2023 ERROR_IF(address_mode == BRW_ADDRESS_REGISTER_INDIRECT_REGISTER && 2024 vstride == BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL, 2025 "Vx1 and VxH indirect addressing for Float, Half-Float, " 2026 "Double-Float and Quad-Word data must not be used"); 2027 } 2028 } 2029 2030 /* The PRMs say that for BDW, SKL: 2031 * 2032 * If Align16 is required for an operation with QW destination and non-QW 2033 * source datatypes, the execution size cannot exceed 2. 2034 * 2035 * We assume that the restriction applies to all Gfx8+ parts. 2036 */ 2037 if (is_double_precision && devinfo->ver >= 8) { 2038 enum brw_reg_type src0_type = brw_inst_src0_type(devinfo, inst); 2039 enum brw_reg_type src1_type = 2040 num_sources > 1 ? brw_inst_src1_type(devinfo, inst) : src0_type; 2041 unsigned src0_type_size = brw_reg_type_to_size(src0_type); 2042 unsigned src1_type_size = brw_reg_type_to_size(src1_type); 2043 2044 ERROR_IF(brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16 && 2045 dst_type_size == 8 && 2046 (src0_type_size != 8 || src1_type_size != 8) && 2047 brw_inst_exec_size(devinfo, inst) > BRW_EXECUTE_2, 2048 "In Align16 exec size cannot exceed 2 with a QWord destination " 2049 "and a non-QWord source"); 2050 } 2051 2052 /* The PRMs say that for CHV, BXT: 2053 * 2054 * When source or destination datatype is 64b or operation is integer 2055 * DWord multiply, DepCtrl must not be used. 2056 * 2057 * We assume that the restriction applies to GLK as well. 2058 */ 2059 if (is_double_precision && 2060 (devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo))) { 2061 ERROR_IF(brw_inst_no_dd_check(devinfo, inst) || 2062 brw_inst_no_dd_clear(devinfo, inst), 2063 "DepCtrl is not allowed when the execution type is 64-bit"); 2064 } 2065 2066 return error_msg; 2067} 2068 2069static struct string 2070instruction_restrictions(const struct brw_isa_info *isa, 2071 const brw_inst *inst) 2072{ 2073 const struct intel_device_info *devinfo = isa->devinfo; 2074 struct string error_msg = { .str = NULL, .len = 0 }; 2075 2076 /* From Wa_1604601757: 2077 * 2078 * "When multiplying a DW and any lower precision integer, source modifier 2079 * is not supported." 2080 */ 2081 if (devinfo->ver >= 12 && 2082 brw_inst_opcode(isa, inst) == BRW_OPCODE_MUL) { 2083 enum brw_reg_type exec_type = execution_type(isa, inst); 2084 const bool src0_valid = type_sz(brw_inst_src0_type(devinfo, inst)) == 4 || 2085 brw_inst_src0_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || 2086 !(brw_inst_src0_negate(devinfo, inst) || 2087 brw_inst_src0_abs(devinfo, inst)); 2088 const bool src1_valid = type_sz(brw_inst_src1_type(devinfo, inst)) == 4 || 2089 brw_inst_src1_reg_file(devinfo, inst) == BRW_IMMEDIATE_VALUE || 2090 !(brw_inst_src1_negate(devinfo, inst) || 2091 brw_inst_src1_abs(devinfo, inst)); 2092 2093 ERROR_IF(!brw_reg_type_is_floating_point(exec_type) && 2094 type_sz(exec_type) == 4 && !(src0_valid && src1_valid), 2095 "When multiplying a DW and any lower precision integer, source " 2096 "modifier is not supported."); 2097 } 2098 2099 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_CMP || 2100 brw_inst_opcode(isa, inst) == BRW_OPCODE_CMPN) { 2101 if (devinfo->ver <= 7) { 2102 /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit 2103 * ISA) says: 2104 * 2105 * Accumulator cannot be destination, implicit or explicit. The 2106 * destination must be a general register or the null register. 2107 * 2108 * Page 77 of the Haswell PRM Volume 2b contains the same text. The 2109 * 965G PRMs contain similar text. 2110 * 2111 * Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says: 2112 * 2113 * For the cmp and cmpn instructions, remove the accumulator 2114 * restrictions. 2115 */ 2116 ERROR_IF(brw_inst_dst_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE && 2117 brw_inst_dst_da_reg_nr(devinfo, inst) != BRW_ARF_NULL, 2118 "Accumulator cannot be destination, implicit or explicit."); 2119 } 2120 2121 /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA) 2122 * says: 2123 * 2124 * If the destination is the null register, the {Switch} instruction 2125 * option must be used. 2126 * 2127 * Page 77 of the Haswell PRM Volume 2b contains the same text. 2128 */ 2129 if (devinfo->ver == 7) { 2130 ERROR_IF(dst_is_null(devinfo, inst) && 2131 brw_inst_thread_control(devinfo, inst) != BRW_THREAD_SWITCH, 2132 "If the destination is the null register, the {Switch} " 2133 "instruction option must be used."); 2134 } 2135 } 2136 2137 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_MATH) { 2138 unsigned math_function = brw_inst_math_function(devinfo, inst); 2139 switch (math_function) { 2140 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER: 2141 case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT: 2142 case BRW_MATH_FUNCTION_INT_DIV_REMAINDER: { 2143 /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says: 2144 * INT DIV function does not support source modifiers. 2145 * Bspec 6647 extends it back to Ivy Bridge. 2146 */ 2147 bool src0_valid = !brw_inst_src0_negate(devinfo, inst) && 2148 !brw_inst_src0_abs(devinfo, inst); 2149 bool src1_valid = !brw_inst_src1_negate(devinfo, inst) && 2150 !brw_inst_src1_abs(devinfo, inst); 2151 ERROR_IF(!src0_valid || !src1_valid, 2152 "INT DIV function does not support source modifiers."); 2153 break; 2154 } 2155 default: 2156 break; 2157 } 2158 } 2159 2160 if (brw_inst_opcode(isa, inst) == BRW_OPCODE_DP4A) { 2161 /* Page 396 (page 412 of the PDF) of the DG1 PRM volume 2a says: 2162 * 2163 * Only one of src0 or src1 operand may be an the (sic) accumulator 2164 * register (acc#). 2165 */ 2166 ERROR_IF(src0_is_acc(devinfo, inst) && src1_is_acc(devinfo, inst), 2167 "Only one of src0 or src1 operand may be an accumulator " 2168 "register (acc#)."); 2169 2170 } 2171 2172 return error_msg; 2173} 2174 2175static struct string 2176send_descriptor_restrictions(const struct brw_isa_info *isa, 2177 const brw_inst *inst) 2178{ 2179 const struct intel_device_info *devinfo = isa->devinfo; 2180 struct string error_msg = { .str = NULL, .len = 0 }; 2181 2182 if (inst_is_split_send(isa, inst)) { 2183 /* We can only validate immediate descriptors */ 2184 if (brw_inst_send_sel_reg32_desc(devinfo, inst)) 2185 return error_msg; 2186 } else if (inst_is_send(isa, inst)) { 2187 /* We can only validate immediate descriptors */ 2188 if (brw_inst_src1_reg_file(devinfo, inst) != BRW_IMMEDIATE_VALUE) 2189 return error_msg; 2190 } else { 2191 return error_msg; 2192 } 2193 2194 const uint32_t desc = brw_inst_send_desc(devinfo, inst); 2195 2196 switch (brw_inst_sfid(devinfo, inst)) { 2197 case GFX12_SFID_TGM: 2198 case GFX12_SFID_SLM: 2199 case GFX12_SFID_UGM: 2200 ERROR_IF(!devinfo->has_lsc, "Platform does not support LSC"); 2201 2202 ERROR_IF(lsc_opcode_has_transpose(lsc_msg_desc_opcode(devinfo, desc)) && 2203 lsc_msg_desc_transpose(devinfo, desc) && 2204 brw_inst_exec_size(devinfo, inst) != BRW_EXECUTE_1, 2205 "Transposed vectors are restricted to Exec_Mask = 1."); 2206 break; 2207 2208 default: 2209 break; 2210 } 2211 2212 if (brw_inst_sfid(devinfo, inst) == BRW_SFID_URB) { 2213 /* Gfx4 doesn't have a "header present" bit in the SEND message. */ 2214 ERROR_IF(devinfo->ver > 4 && !brw_inst_header_present(devinfo, inst), 2215 "Header must be present for all URB messages."); 2216 2217 switch (brw_inst_urb_opcode(devinfo, inst)) { 2218 case BRW_URB_OPCODE_WRITE_HWORD: 2219 break; 2220 2221 /* case FF_SYNC: */ 2222 case BRW_URB_OPCODE_WRITE_OWORD: 2223 /* Gfx5 / Gfx6 FF_SYNC message and Gfx7+ URB_WRITE_OWORD have the 2224 * same opcode value. 2225 */ 2226 if (devinfo->ver == 5 || devinfo->ver == 6) { 2227 ERROR_IF(brw_inst_urb_global_offset(devinfo, inst) != 0, 2228 "FF_SYNC global offset must be zero."); 2229 ERROR_IF(brw_inst_urb_swizzle_control(devinfo, inst) != 0, 2230 "FF_SYNC swizzle control must be zero."); 2231 ERROR_IF(brw_inst_urb_used(devinfo, inst) != 0, 2232 "FF_SYNC used must be zero."); 2233 ERROR_IF(brw_inst_urb_complete(devinfo, inst) != 0, 2234 "FF_SYNC complete must be zero."); 2235 2236 /* Volume 4 part 2 of the Sandybridge PRM (page 28) says: 2237 * 2238 * A message response (writeback) length of 1 GRF will be 2239 * indicated on the ‘send’ instruction if the thread requires 2240 * response data and/or synchronization. 2241 */ 2242 ERROR_IF((unsigned)brw_inst_rlen(devinfo, inst) > 1, 2243 "FF_SYNC read length must be 0 or 1."); 2244 } else { 2245 ERROR_IF(devinfo->ver < 7, 2246 "URB OWORD write messages only valid on gfx >= 7"); 2247 } 2248 break; 2249 2250 case BRW_URB_OPCODE_READ_HWORD: 2251 case BRW_URB_OPCODE_READ_OWORD: 2252 ERROR_IF(devinfo->ver < 7, 2253 "URB read messages only valid on gfx >= 7"); 2254 break; 2255 2256 case GFX7_URB_OPCODE_ATOMIC_MOV: 2257 case GFX7_URB_OPCODE_ATOMIC_INC: 2258 ERROR_IF(devinfo->ver < 7, 2259 "URB atomic move and increment messages only valid on gfx >= 7"); 2260 break; 2261 2262 case GFX8_URB_OPCODE_ATOMIC_ADD: 2263 /* The Haswell PRM lists this opcode as valid on page 317. */ 2264 ERROR_IF(devinfo->verx10 < 75, 2265 "URB atomic add message only valid on gfx >= 7.5"); 2266 break; 2267 2268 case GFX8_URB_OPCODE_SIMD8_READ: 2269 ERROR_IF(brw_inst_rlen(devinfo, inst) == 0, 2270 "URB SIMD8 read message must read some data."); 2271 FALLTHROUGH; 2272 2273 case GFX8_URB_OPCODE_SIMD8_WRITE: 2274 ERROR_IF(devinfo->ver < 8, 2275 "URB SIMD8 messages only valid on gfx >= 8"); 2276 break; 2277 2278 case GFX125_URB_OPCODE_FENCE: 2279 ERROR_IF(devinfo->verx10 < 125, 2280 "URB fence message only valid on gfx >= 12.5"); 2281 break; 2282 2283 default: 2284 ERROR_IF(true, "Invalid URB message"); 2285 break; 2286 } 2287 } 2288 2289 return error_msg; 2290} 2291 2292bool 2293brw_validate_instruction(const struct brw_isa_info *isa, 2294 const brw_inst *inst, int offset, 2295 unsigned inst_size, 2296 struct disasm_info *disasm) 2297{ 2298 struct string error_msg = { .str = NULL, .len = 0 }; 2299 2300 if (is_unsupported_inst(isa, inst)) { 2301 ERROR("Instruction not supported on this Gen"); 2302 } else { 2303 CHECK(invalid_values); 2304 2305 if (error_msg.str == NULL) { 2306 CHECK(sources_not_null); 2307 CHECK(send_restrictions); 2308 CHECK(alignment_supported); 2309 CHECK(general_restrictions_based_on_operand_types); 2310 CHECK(general_restrictions_on_region_parameters); 2311 CHECK(special_restrictions_for_mixed_float_mode); 2312 CHECK(region_alignment_rules); 2313 CHECK(vector_immediate_restrictions); 2314 CHECK(special_requirements_for_handling_double_precision_data_types); 2315 CHECK(instruction_restrictions); 2316 CHECK(send_descriptor_restrictions); 2317 } 2318 } 2319 2320 if (error_msg.str && disasm) { 2321 disasm_insert_error(disasm, offset, inst_size, error_msg.str); 2322 } 2323 free(error_msg.str); 2324 2325 return error_msg.len == 0; 2326} 2327 2328bool 2329brw_validate_instructions(const struct brw_isa_info *isa, 2330 const void *assembly, int start_offset, int end_offset, 2331 struct disasm_info *disasm) 2332{ 2333 const struct intel_device_info *devinfo = isa->devinfo; 2334 bool valid = true; 2335 2336 for (int src_offset = start_offset; src_offset < end_offset;) { 2337 const brw_inst *inst = assembly + src_offset; 2338 bool is_compact = brw_inst_cmpt_control(devinfo, inst); 2339 unsigned inst_size = is_compact ? sizeof(brw_compact_inst) 2340 : sizeof(brw_inst); 2341 brw_inst uncompacted; 2342 2343 if (is_compact) { 2344 brw_compact_inst *compacted = (void *)inst; 2345 brw_uncompact_instruction(isa, &uncompacted, compacted); 2346 inst = &uncompacted; 2347 } 2348 2349 bool v = brw_validate_instruction(isa, inst, src_offset, 2350 inst_size, disasm); 2351 valid = valid && v; 2352 2353 src_offset += inst_size; 2354 } 2355 2356 return valid; 2357} 2358