1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23#include <math.h> 24#include "nir.h" 25#include "nir_builder.h" 26#include "util/u_vector.h" 27 28/** 29 * Lower flrp instructions. 30 * 31 * Unlike the lowerings that are possible in nir_opt_algrbraic, this pass can 32 * examine more global information to determine a possibly more efficient 33 * lowering for each flrp. 34 */ 35 36static void 37append_flrp_to_dead_list(struct u_vector *dead_flrp, struct nir_alu_instr *alu) 38{ 39 struct nir_alu_instr **tail = u_vector_add(dead_flrp); 40 *tail = alu; 41} 42 43/** 44 * Replace flrp(a, b, c) with ffma(b, c, ffma(-a, c, a)). 45 */ 46static void 47replace_with_strict_ffma(struct nir_builder *bld, struct u_vector *dead_flrp, 48 struct nir_alu_instr *alu) 49{ 50 nir_ssa_def *const a = nir_ssa_for_alu_src(bld, alu, 0); 51 nir_ssa_def *const b = nir_ssa_for_alu_src(bld, alu, 1); 52 nir_ssa_def *const c = nir_ssa_for_alu_src(bld, alu, 2); 53 54 nir_ssa_def *const neg_a = nir_fneg(bld, a); 55 nir_instr_as_alu(neg_a->parent_instr)->exact = alu->exact; 56 57 nir_ssa_def *const inner_ffma = nir_ffma(bld, neg_a, c, a); 58 nir_instr_as_alu(inner_ffma->parent_instr)->exact = alu->exact; 59 60 nir_ssa_def *const outer_ffma = nir_ffma(bld, b, c, inner_ffma); 61 nir_instr_as_alu(outer_ffma->parent_instr)->exact = alu->exact; 62 63 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, outer_ffma); 64 65 /* DO NOT REMOVE the original flrp yet. Many of the lowering choices are 66 * based on other uses of the sources. Removing the flrp may cause the 67 * last flrp in a sequence to make a different, incorrect choice. 68 */ 69 append_flrp_to_dead_list(dead_flrp, alu); 70} 71 72/** 73 * Replace flrp(a, b, c) with ffma(a, (1 - c), bc) 74 */ 75static void 76replace_with_single_ffma(struct nir_builder *bld, struct u_vector *dead_flrp, 77 struct nir_alu_instr *alu) 78{ 79 nir_ssa_def *const a = nir_ssa_for_alu_src(bld, alu, 0); 80 nir_ssa_def *const b = nir_ssa_for_alu_src(bld, alu, 1); 81 nir_ssa_def *const c = nir_ssa_for_alu_src(bld, alu, 2); 82 83 nir_ssa_def *const neg_c = nir_fneg(bld, c); 84 nir_instr_as_alu(neg_c->parent_instr)->exact = alu->exact; 85 86 nir_ssa_def *const one_minus_c = 87 nir_fadd(bld, nir_imm_floatN_t(bld, 1.0f, c->bit_size), neg_c); 88 nir_instr_as_alu(one_minus_c->parent_instr)->exact = alu->exact; 89 90 nir_ssa_def *const b_times_c = nir_fmul(bld, b, c); 91 nir_instr_as_alu(b_times_c->parent_instr)->exact = alu->exact; 92 93 nir_ssa_def *const final_ffma = nir_ffma(bld, a, one_minus_c, b_times_c); 94 nir_instr_as_alu(final_ffma->parent_instr)->exact = alu->exact; 95 96 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, final_ffma); 97 98 /* DO NOT REMOVE the original flrp yet. Many of the lowering choices are 99 * based on other uses of the sources. Removing the flrp may cause the 100 * last flrp in a sequence to make a different, incorrect choice. 101 */ 102 append_flrp_to_dead_list(dead_flrp, alu); 103} 104 105/** 106 * Replace flrp(a, b, c) with a(1-c) + bc. 107 */ 108static void 109replace_with_strict(struct nir_builder *bld, struct u_vector *dead_flrp, 110 struct nir_alu_instr *alu) 111{ 112 nir_ssa_def *const a = nir_ssa_for_alu_src(bld, alu, 0); 113 nir_ssa_def *const b = nir_ssa_for_alu_src(bld, alu, 1); 114 nir_ssa_def *const c = nir_ssa_for_alu_src(bld, alu, 2); 115 116 nir_ssa_def *const neg_c = nir_fneg(bld, c); 117 nir_instr_as_alu(neg_c->parent_instr)->exact = alu->exact; 118 119 nir_ssa_def *const one_minus_c = 120 nir_fadd(bld, nir_imm_floatN_t(bld, 1.0f, c->bit_size), neg_c); 121 nir_instr_as_alu(one_minus_c->parent_instr)->exact = alu->exact; 122 123 nir_ssa_def *const first_product = nir_fmul(bld, a, one_minus_c); 124 nir_instr_as_alu(first_product->parent_instr)->exact = alu->exact; 125 126 nir_ssa_def *const second_product = nir_fmul(bld, b, c); 127 nir_instr_as_alu(second_product->parent_instr)->exact = alu->exact; 128 129 nir_ssa_def *const sum = nir_fadd(bld, first_product, second_product); 130 nir_instr_as_alu(sum->parent_instr)->exact = alu->exact; 131 132 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, sum); 133 134 /* DO NOT REMOVE the original flrp yet. Many of the lowering choices are 135 * based on other uses of the sources. Removing the flrp may cause the 136 * last flrp in a sequence to make a different, incorrect choice. 137 */ 138 append_flrp_to_dead_list(dead_flrp, alu); 139} 140 141/** 142 * Replace flrp(a, b, c) with a + c(b-a). 143 */ 144static void 145replace_with_fast(struct nir_builder *bld, struct u_vector *dead_flrp, 146 struct nir_alu_instr *alu) 147{ 148 nir_ssa_def *const a = nir_ssa_for_alu_src(bld, alu, 0); 149 nir_ssa_def *const b = nir_ssa_for_alu_src(bld, alu, 1); 150 nir_ssa_def *const c = nir_ssa_for_alu_src(bld, alu, 2); 151 152 nir_ssa_def *const neg_a = nir_fneg(bld, a); 153 nir_instr_as_alu(neg_a->parent_instr)->exact = alu->exact; 154 155 nir_ssa_def *const b_minus_a = nir_fadd(bld, b, neg_a); 156 nir_instr_as_alu(b_minus_a->parent_instr)->exact = alu->exact; 157 158 nir_ssa_def *const product = nir_fmul(bld, c, b_minus_a); 159 nir_instr_as_alu(product->parent_instr)->exact = alu->exact; 160 161 nir_ssa_def *const sum = nir_fadd(bld, a, product); 162 nir_instr_as_alu(sum->parent_instr)->exact = alu->exact; 163 164 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, sum); 165 166 /* DO NOT REMOVE the original flrp yet. Many of the lowering choices are 167 * based on other uses of the sources. Removing the flrp may cause the 168 * last flrp in a sequence to make a different, incorrect choice. 169 */ 170 append_flrp_to_dead_list(dead_flrp, alu); 171} 172 173/** 174 * Replace flrp(a, b, c) with (b*c ± c) + a => b*c + (a ± c) 175 * 176 * \note: This only works if a = ±1. 177 */ 178static void 179replace_with_expanded_ffma_and_add(struct nir_builder *bld, 180 struct u_vector *dead_flrp, 181 struct nir_alu_instr *alu, bool subtract_c) 182{ 183 nir_ssa_def *const a = nir_ssa_for_alu_src(bld, alu, 0); 184 nir_ssa_def *const b = nir_ssa_for_alu_src(bld, alu, 1); 185 nir_ssa_def *const c = nir_ssa_for_alu_src(bld, alu, 2); 186 187 nir_ssa_def *const b_times_c = nir_fmul(bld, b, c); 188 nir_instr_as_alu(b_times_c->parent_instr)->exact = alu->exact; 189 190 nir_ssa_def *inner_sum; 191 192 if (subtract_c) { 193 nir_ssa_def *const neg_c = nir_fneg(bld, c); 194 nir_instr_as_alu(neg_c->parent_instr)->exact = alu->exact; 195 196 inner_sum = nir_fadd(bld, a, neg_c); 197 } else { 198 inner_sum = nir_fadd(bld, a, c); 199 } 200 201 nir_instr_as_alu(inner_sum->parent_instr)->exact = alu->exact; 202 203 nir_ssa_def *const outer_sum = nir_fadd(bld, inner_sum, b_times_c); 204 nir_instr_as_alu(outer_sum->parent_instr)->exact = alu->exact; 205 206 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, outer_sum); 207 208 /* DO NOT REMOVE the original flrp yet. Many of the lowering choices are 209 * based on other uses of the sources. Removing the flrp may cause the 210 * last flrp in a sequence to make a different, incorrect choice. 211 */ 212 append_flrp_to_dead_list(dead_flrp, alu); 213} 214 215/** 216 * Determines whether a swizzled source is constant w/ all components the same. 217 * 218 * The value of the constant is stored in \c result. 219 * 220 * \return 221 * True if all components of the swizzled source are the same constant. 222 * Otherwise false is returned. 223 */ 224static bool 225all_same_constant(const nir_alu_instr *instr, unsigned src, double *result) 226{ 227 nir_const_value *val = nir_src_as_const_value(instr->src[src].src); 228 229 if (!val) 230 return false; 231 232 const uint8_t *const swizzle = instr->src[src].swizzle; 233 const unsigned num_components = nir_dest_num_components(instr->dest.dest); 234 235 if (instr->dest.dest.ssa.bit_size == 32) { 236 const float first = val[swizzle[0]].f32; 237 238 for (unsigned i = 1; i < num_components; i++) { 239 if (val[swizzle[i]].f32 != first) 240 return false; 241 } 242 243 *result = first; 244 } else { 245 const double first = val[swizzle[0]].f64; 246 247 for (unsigned i = 1; i < num_components; i++) { 248 if (val[swizzle[i]].f64 != first) 249 return false; 250 } 251 252 *result = first; 253 } 254 255 return true; 256} 257 258static bool 259sources_are_constants_with_similar_magnitudes(const nir_alu_instr *instr) 260{ 261 nir_const_value *val0 = nir_src_as_const_value(instr->src[0].src); 262 nir_const_value *val1 = nir_src_as_const_value(instr->src[1].src); 263 264 if (val0 == NULL || val1 == NULL) 265 return false; 266 267 const uint8_t *const swizzle0 = instr->src[0].swizzle; 268 const uint8_t *const swizzle1 = instr->src[1].swizzle; 269 const unsigned num_components = nir_dest_num_components(instr->dest.dest); 270 271 if (instr->dest.dest.ssa.bit_size == 32) { 272 for (unsigned i = 0; i < num_components; i++) { 273 int exp0; 274 int exp1; 275 276 frexpf(val0[swizzle0[i]].f32, &exp0); 277 frexpf(val1[swizzle1[i]].f32, &exp1); 278 279 /* If the difference between exponents is >= 24, then A+B will always 280 * have the value whichever between A and B has the largest absolute 281 * value. So, [0, 23] is the valid range. The smaller the limit 282 * value, the more precision will be maintained at a potential 283 * performance cost. Somewhat arbitrarilly split the range in half. 284 */ 285 if (abs(exp0 - exp1) > (23 / 2)) 286 return false; 287 } 288 } else { 289 for (unsigned i = 0; i < num_components; i++) { 290 int exp0; 291 int exp1; 292 293 frexp(val0[swizzle0[i]].f64, &exp0); 294 frexp(val1[swizzle1[i]].f64, &exp1); 295 296 /* If the difference between exponents is >= 53, then A+B will always 297 * have the value whichever between A and B has the largest absolute 298 * value. So, [0, 52] is the valid range. The smaller the limit 299 * value, the more precision will be maintained at a potential 300 * performance cost. Somewhat arbitrarilly split the range in half. 301 */ 302 if (abs(exp0 - exp1) > (52 / 2)) 303 return false; 304 } 305 } 306 307 return true; 308} 309 310/** 311 * Counts of similar types of nir_op_flrp instructions 312 * 313 * If a similar instruction fits into more than one category, it will only be 314 * counted once. The assumption is that no other instruction will have all 315 * sources the same, or CSE would have removed one of the instructions. 316 */ 317struct similar_flrp_stats { 318 unsigned src2; 319 unsigned src0_and_src2; 320 unsigned src1_and_src2; 321}; 322 323/** 324 * Collection counts of similar FLRP instructions. 325 * 326 * This function only cares about similar instructions that have src2 in 327 * common. 328 */ 329static void 330get_similar_flrp_stats(nir_alu_instr *alu, struct similar_flrp_stats *st) 331{ 332 memset(st, 0, sizeof(*st)); 333 334 nir_foreach_use(other_use, alu->src[2].src.ssa) { 335 /* Is the use also a flrp? */ 336 nir_instr *const other_instr = other_use->parent_instr; 337 if (other_instr->type != nir_instr_type_alu) 338 continue; 339 340 /* Eh-hem... don't match the instruction with itself. */ 341 if (other_instr == &alu->instr) 342 continue; 343 344 nir_alu_instr *const other_alu = nir_instr_as_alu(other_instr); 345 if (other_alu->op != nir_op_flrp) 346 continue; 347 348 /* Does the other flrp use source 2 from the first flrp as its source 2 349 * as well? 350 */ 351 if (!nir_alu_srcs_equal(alu, other_alu, 2, 2)) 352 continue; 353 354 if (nir_alu_srcs_equal(alu, other_alu, 0, 0)) 355 st->src0_and_src2++; 356 else if (nir_alu_srcs_equal(alu, other_alu, 1, 1)) 357 st->src1_and_src2++; 358 else 359 st->src2++; 360 } 361} 362 363static void 364convert_flrp_instruction(nir_builder *bld, 365 struct u_vector *dead_flrp, 366 nir_alu_instr *alu, 367 bool always_precise) 368{ 369 bool have_ffma = false; 370 unsigned bit_size = nir_dest_bit_size(alu->dest.dest); 371 372 if (bit_size == 16) 373 have_ffma = !bld->shader->options->lower_ffma16; 374 else if (bit_size == 32) 375 have_ffma = !bld->shader->options->lower_ffma32; 376 else if (bit_size == 64) 377 have_ffma = !bld->shader->options->lower_ffma64; 378 else 379 unreachable("invalid bit_size"); 380 381 bld->cursor = nir_before_instr(&alu->instr); 382 383 /* There are two methods to implement flrp(x, y, t). The strictly correct 384 * implementation according to the GLSL spec is: 385 * 386 * x(1 - t) + yt 387 * 388 * This can also be implemented using two chained FMAs 389 * 390 * fma(y, t, fma(-x, t, x)) 391 * 392 * This method, using either formulation, has better precision when the 393 * difference between x and y is very large. It guarantess that flrp(x, y, 394 * 1) = y. For example, flrp(1e38, 1.0, 1.0) is 1.0. This is correct. 395 * 396 * The other possible implementation is: 397 * 398 * x + t(y - x) 399 * 400 * This can also be formuated as an FMA: 401 * 402 * fma(y - x, t, x) 403 * 404 * For this implementation, flrp(1e38, 1.0, 1.0) is 0.0. Since 1.0 was 405 * expected, that's a pretty significant error. 406 * 407 * The choice made for lowering depends on a number of factors. 408 * 409 * - If the flrp is marked precise and FMA is supported: 410 * 411 * fma(y, t, fma(-x, t, x)) 412 * 413 * This is strictly correct (maybe?), and the cost is two FMA 414 * instructions. It at least maintains the flrp(x, y, 1.0) == y 415 * condition. 416 * 417 * - If the flrp is marked precise and FMA is not supported: 418 * 419 * x(1 - t) + yt 420 * 421 * This is strictly correct, and the cost is 4 instructions. If FMA is 422 * supported, this may or may not be reduced to 3 instructions (a 423 * subtract, a multiply, and an FMA)... but in that case the other 424 * formulation should have been used. 425 */ 426 if (alu->exact) { 427 if (have_ffma) 428 replace_with_strict_ffma(bld, dead_flrp, alu); 429 else 430 replace_with_strict(bld, dead_flrp, alu); 431 432 return; 433 } 434 435 /* 436 * - If x and y are both immediates and the relative magnitude of the 437 * values is similar (such that x-y does not lose too much precision): 438 * 439 * x + t(x - y) 440 * 441 * We rely on constant folding to eliminate x-y, and we rely on 442 * nir_opt_algebraic to possibly generate an FMA. The cost is either one 443 * FMA or two instructions. 444 */ 445 if (sources_are_constants_with_similar_magnitudes(alu)) { 446 replace_with_fast(bld, dead_flrp, alu); 447 return; 448 } 449 450 /* 451 * - If x = 1: 452 * 453 * (yt + -t) + 1 454 * 455 * - If x = -1: 456 * 457 * (yt + t) - 1 458 * 459 * In both cases, x is used in place of ±1 for simplicity. Both forms 460 * lend to ffma generation on platforms that support ffma. 461 */ 462 double src0_as_constant; 463 if (all_same_constant(alu, 0, &src0_as_constant)) { 464 if (src0_as_constant == 1.0) { 465 replace_with_expanded_ffma_and_add(bld, dead_flrp, alu, 466 true /* subtract t */); 467 return; 468 } else if (src0_as_constant == -1.0) { 469 replace_with_expanded_ffma_and_add(bld, dead_flrp, alu, 470 false /* add t */); 471 return; 472 } 473 } 474 475 /* 476 * - If y = ±1: 477 * 478 * x(1 - t) + yt 479 * 480 * In this case either the multiply in yt will be eliminated by 481 * nir_opt_algebraic. If FMA is supported, this results in fma(x, (1 - 482 * t), ±t) for two instructions. If FMA is not supported, then the cost 483 * is 3 instructions. We rely on nir_opt_algebraic to generate the FMA 484 * instructions as well. 485 * 486 * Another possible replacement is 487 * 488 * -xt + x ± t 489 * 490 * Some groupings of this may be better on some platforms in some 491 * circumstances, bit it is probably dependent on scheduling. Futher 492 * investigation may be required. 493 */ 494 double src1_as_constant; 495 if ((all_same_constant(alu, 1, &src1_as_constant) && 496 (src1_as_constant == -1.0 || src1_as_constant == 1.0))) { 497 replace_with_strict(bld, dead_flrp, alu); 498 return; 499 } 500 501 if (have_ffma) { 502 if (always_precise) { 503 replace_with_strict_ffma(bld, dead_flrp, alu); 504 return; 505 } 506 507 /* 508 * - If FMA is supported and other flrp(x, _, t) exists: 509 * 510 * fma(y, t, fma(-x, t, x)) 511 * 512 * The hope is that the inner FMA calculation will be shared with the 513 * other lowered flrp. This results in two FMA instructions for the 514 * first flrp and one FMA instruction for each additional flrp. It 515 * also means that the live range for x might be complete after the 516 * inner ffma instead of after the last flrp. 517 */ 518 struct similar_flrp_stats st; 519 520 get_similar_flrp_stats(alu, &st); 521 if (st.src0_and_src2 > 0) { 522 replace_with_strict_ffma(bld, dead_flrp, alu); 523 return; 524 } 525 526 /* 527 * - If FMA is supported and another flrp(_, y, t) exists: 528 * 529 * fma(x, (1 - t), yt) 530 * 531 * The hope is that the (1 - t) and the yt will be shared with the 532 * other lowered flrp. This results in 3 insructions for the first 533 * flrp and 1 for each additional flrp. 534 */ 535 if (st.src1_and_src2 > 0) { 536 replace_with_single_ffma(bld, dead_flrp, alu); 537 return; 538 } 539 } else { 540 if (always_precise) { 541 replace_with_strict(bld, dead_flrp, alu); 542 return; 543 } 544 545 /* 546 * - If FMA is not supported and another flrp(x, _, t) exists: 547 * 548 * x(1 - t) + yt 549 * 550 * The hope is that the x(1 - t) will be shared with the other lowered 551 * flrp. This results in 4 insructions for the first flrp and 2 for 552 * each additional flrp. 553 * 554 * - If FMA is not supported and another flrp(_, y, t) exists: 555 * 556 * x(1 - t) + yt 557 * 558 * The hope is that the (1 - t) and the yt will be shared with the 559 * other lowered flrp. This results in 4 insructions for the first 560 * flrp and 2 for each additional flrp. 561 */ 562 struct similar_flrp_stats st; 563 564 get_similar_flrp_stats(alu, &st); 565 if (st.src0_and_src2 > 0 || st.src1_and_src2 > 0) { 566 replace_with_strict(bld, dead_flrp, alu); 567 return; 568 } 569 } 570 571 /* 572 * - If t is constant: 573 * 574 * x(1 - t) + yt 575 * 576 * The cost is three instructions without FMA or two instructions with 577 * FMA. This is the same cost as the imprecise lowering, but it gives 578 * the instruction scheduler a little more freedom. 579 * 580 * There is no need to handle t = 0.5 specially. nir_opt_algebraic 581 * already has optimizations to convert 0.5x + 0.5y to 0.5(x + y). 582 */ 583 if (alu->src[2].src.ssa->parent_instr->type == nir_instr_type_load_const) { 584 replace_with_strict(bld, dead_flrp, alu); 585 return; 586 } 587 588 /* 589 * - Otherwise 590 * 591 * x + t(x - y) 592 */ 593 replace_with_fast(bld, dead_flrp, alu); 594} 595 596static void 597lower_flrp_impl(nir_function_impl *impl, 598 struct u_vector *dead_flrp, 599 unsigned lowering_mask, 600 bool always_precise) 601{ 602 nir_builder b; 603 nir_builder_init(&b, impl); 604 605 nir_foreach_block(block, impl) { 606 nir_foreach_instr_safe(instr, block) { 607 if (instr->type == nir_instr_type_alu) { 608 nir_alu_instr *const alu = nir_instr_as_alu(instr); 609 610 if (alu->op == nir_op_flrp && 611 (alu->dest.dest.ssa.bit_size & lowering_mask)) { 612 convert_flrp_instruction(&b, dead_flrp, alu, always_precise); 613 } 614 } 615 } 616 } 617 618 nir_metadata_preserve(impl, nir_metadata_block_index | 619 nir_metadata_dominance); 620} 621 622/** 623 * \param lowering_mask - Bitwise-or of the bit sizes that need to be lowered 624 * (e.g., 16 | 64 if only 16-bit and 64-bit flrp need 625 * lowering). 626 * \param always_precise - Always require precise lowering for flrp. This 627 * will always lower flrp to (a * (1 - c)) + (b * c). 628 * \param have_ffma - Set to true if the GPU has an FFMA instruction that 629 * should be used. 630 */ 631bool 632nir_lower_flrp(nir_shader *shader, 633 unsigned lowering_mask, 634 bool always_precise) 635{ 636 struct u_vector dead_flrp; 637 638 if (!u_vector_init_pow2(&dead_flrp, 8, sizeof(struct nir_alu_instr *))) 639 return false; 640 641 nir_foreach_function(function, shader) { 642 if (function->impl) { 643 lower_flrp_impl(function->impl, &dead_flrp, lowering_mask, 644 always_precise); 645 } 646 } 647 648 /* Progress was made if the dead list is not empty. Remove all the 649 * instructions from the dead list. 650 */ 651 const bool progress = u_vector_length(&dead_flrp) != 0; 652 653 struct nir_alu_instr **instr; 654 u_vector_foreach(instr, &dead_flrp) 655 nir_instr_remove(&(*instr)->instr); 656 657 u_vector_finish(&dead_flrp); 658 659 return progress; 660} 661