1/* 2 * Copyright © 2021 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26 27/* 28 * This pass tries to reduce the bitsize of phi instructions by either 29 * moving narrowing conversions from the phi's consumers to the phi's 30 * sources, if all the uses of the phi are equivalent narrowing 31 * instructions. In other words, convert: 32 * 33 * vec1 32 ssa_124 = load_const (0x00000000) 34 * ... 35 * loop { 36 * ... 37 * vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53 38 * vec1 16 ssa_8 = i2imp ssa_155 39 * ... 40 * vec1 32 ssa_53 = i2i32 ssa_52 41 * } 42 * 43 * into: 44 * 45 * vec1 32 ssa_124 = load_const (0x00000000) 46 * vec1 16 ssa_156 = i2imp ssa_124 47 * ... 48 * loop { 49 * ... 50 * vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157 51 * ... 52 * vec1 32 ssa_53 = i2i32 ssa_52 53 * vec1 16 ssa_157 = i2i16 ssa_53 54 * } 55 * 56 * Or failing that, tries to push widening conversion of phi srcs to 57 * the phi def. In this case, since load_const is frequently one 58 * of the phi sources this pass checks if can be narrowed without a 59 * loss of precision: 60 * 61 * vec1 32 ssa_0 = load_const (0x00000000) 62 * ... 63 * loop { 64 * ... 65 * vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19 66 * ... 67 * vec1 16 ssa_18 = iadd ssa_21, ssa_3 68 * vec1 32 ssa_19 = i2i32 ssa_18 69 * } 70 * 71 * into: 72 * 73 * vec1 32 ssa_0 = load_const (0x00000000) 74 * vec1 16 ssa_22 = i2i16 ssa_0 75 * ... 76 * loop { 77 * ... 78 * vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18 79 * vec1 32 ssa_23 = i2i32 ssa_8 80 * ... 81 * vec1 16 ssa_18 = iadd ssa_21, ssa_3 82 * } 83 * 84 * Note that either transformations can convert x2ymp into x2y16, which 85 * is normally done later in nir_opt_algebraic_late(), losing the option 86 * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts 87 * cannot see through phis. 88 */ 89 90#define INVALID_OP nir_num_opcodes 91 92/** 93 * Get the corresponding exact conversion for a x2ymp conversion 94 */ 95static nir_op 96concrete_conversion(nir_op op) 97{ 98 switch (op) { 99 case nir_op_i2imp: return nir_op_i2i16; 100 case nir_op_i2fmp: return nir_op_i2f16; 101 case nir_op_u2fmp: return nir_op_u2f16; 102 case nir_op_f2fmp: return nir_op_f2f16; 103 case nir_op_f2imp: return nir_op_f2i16; 104 case nir_op_f2ump: return nir_op_f2u16; 105 default: return op; 106 } 107} 108 109static nir_op 110narrowing_conversion_op(nir_instr *instr, nir_op current_op) 111{ 112 if (instr->type != nir_instr_type_alu) 113 return INVALID_OP; 114 115 nir_op op = nir_instr_as_alu(instr)->op; 116 switch (op) { 117 case nir_op_i2imp: 118 case nir_op_i2i16: 119 case nir_op_i2fmp: 120 case nir_op_i2f16: 121 case nir_op_u2fmp: 122 case nir_op_u2f16: 123 case nir_op_f2fmp: 124 case nir_op_f2f16: 125 case nir_op_f2imp: 126 case nir_op_f2i16: 127 case nir_op_f2ump: 128 case nir_op_f2u16: 129 case nir_op_f2f16_rtne: 130 case nir_op_f2f16_rtz: 131 break; 132 default: 133 return INVALID_OP; 134 } 135 136 /* If we've already picked a conversion op from a previous phi use, 137 * make sure it is compatible with the current use 138 */ 139 if (current_op != INVALID_OP) { 140 if (current_op != op) { 141 /* If we have different conversions, but one can be converted 142 * to the other, then let's do that: 143 */ 144 if (concrete_conversion(current_op) == concrete_conversion(op)) { 145 op = concrete_conversion(op); 146 } else { 147 return INVALID_OP; 148 } 149 } 150 } 151 152 return op; 153} 154 155static nir_op 156widening_conversion_op(nir_instr *instr, unsigned *bit_size) 157{ 158 if (instr->type != nir_instr_type_alu) 159 return INVALID_OP; 160 161 nir_alu_instr *alu = nir_instr_as_alu(instr); 162 switch (alu->op) { 163 case nir_op_i2i32: 164 case nir_op_i2f32: 165 case nir_op_u2f32: 166 case nir_op_f2f32: 167 case nir_op_f2i32: 168 case nir_op_f2u32: 169 break; 170 default: 171 return INVALID_OP; 172 } 173 174 *bit_size = nir_src_bit_size(alu->src[0].src); 175 176 /* We also need to check that the conversion's dest was actually 177 * wider: 178 */ 179 if (nir_dest_bit_size(alu->dest.dest) <= *bit_size) 180 return INVALID_OP; 181 182 return alu->op; 183} 184 185static nir_alu_type 186op_to_type(nir_op op) 187{ 188 return nir_alu_type_get_base_type(nir_op_infos[op].output_type); 189} 190 191/* Try to move narrowing instructions consuming the phi into the phi's 192 * sources to reduce the phi's precision: 193 */ 194static bool 195try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi) 196{ 197 nir_op op = INVALID_OP; 198 199 assert(phi->dest.is_ssa); 200 201 /* If the phi has already been narrowed, nothing more to do: */ 202 if (phi->dest.ssa.bit_size != 32) 203 return false; 204 205 /* Are the only uses of the phi conversion instructions, and 206 * are they all the same conversion? 207 */ 208 nir_foreach_use (use, &phi->dest.ssa) { 209 op = narrowing_conversion_op(use->parent_instr, op); 210 211 /* Not a (compatible) narrowing conversion: */ 212 if (op == INVALID_OP) 213 return false; 214 } 215 216 /* an if_uses means the phi is used directly in a conditional, ie. 217 * without a conversion 218 */ 219 if (!list_is_empty(&phi->dest.ssa.if_uses)) 220 return false; 221 222 /* If the phi has no uses, then nothing to do: */ 223 if (op == INVALID_OP) 224 return false; 225 226 /* construct replacement phi instruction: */ 227 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader); 228 nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 229 phi->dest.ssa.num_components, 230 nir_alu_type_get_type_size(nir_op_infos[op].output_type), 231 NULL); 232 233 /* Push the conversion into the new phi sources: */ 234 nir_foreach_phi_src (src, phi) { 235 assert(src->src.is_ssa); 236 237 /* insert new conversion instr in block of original phi src: */ 238 b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr); 239 nir_ssa_def *old_src = src->src.ssa; 240 nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL); 241 242 /* and add corresponding phi_src to the new_phi: */ 243 nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src)); 244 } 245 246 /* And finally rewrite the original uses of the original phi uses to 247 * directly use the new phi, skipping the conversion out of the orig 248 * phi 249 */ 250 nir_foreach_use (use, &phi->dest.ssa) { 251 /* We've previously established that all the uses were alu 252 * conversion ops. Turn them into movs instead. 253 */ 254 nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr); 255 alu->op = nir_op_mov; 256 } 257 nir_ssa_def_rewrite_uses(&phi->dest.ssa, &new_phi->dest.ssa); 258 259 /* And finally insert the new phi after all sources are in place: */ 260 b->cursor = nir_after_instr(&phi->instr); 261 nir_builder_instr_insert(b, &new_phi->instr); 262 263 return true; 264} 265 266static bool 267can_convert_load_const(nir_load_const_instr *lc, nir_op op) 268{ 269 nir_alu_type type = op_to_type(op); 270 271 /* Note that we only handle phi's with bit_size == 32: */ 272 assert(lc->def.bit_size == 32); 273 274 for (unsigned i = 0; i < lc->def.num_components; i++) { 275 switch (type) { 276 case nir_type_int: 277 if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32) 278 return false; 279 break; 280 case nir_type_uint: 281 if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32) 282 return false; 283 break; 284 case nir_type_float: 285 if (lc->value[i].f32 != _mesa_half_to_float( 286 _mesa_float_to_half(lc->value[i].f32))) 287 return false; 288 break; 289 default: 290 unreachable("bad type"); 291 return false; 292 } 293 } 294 295 return true; 296} 297 298/* Check all the phi sources to see if they are the same widening op, in 299 * which case we can push the widening op to the other side of the phi 300 */ 301static nir_op 302find_widening_op(nir_phi_instr *phi, unsigned *bit_size) 303{ 304 nir_op op = INVALID_OP; 305 306 bool has_load_const = false; 307 *bit_size = 0; 308 309 nir_foreach_phi_src (src, phi) { 310 assert(src->src.is_ssa); 311 312 nir_instr *instr = src->src.ssa->parent_instr; 313 if (instr->type == nir_instr_type_load_const) { 314 has_load_const = true; 315 continue; 316 } 317 318 unsigned src_bit_size; 319 nir_op src_op = widening_conversion_op(instr, &src_bit_size); 320 321 /* Not a widening conversion: */ 322 if (src_op == INVALID_OP) 323 return INVALID_OP; 324 325 /* If it is a widening conversion, it needs to be the same op as 326 * other phi sources: 327 */ 328 if ((op != INVALID_OP) && (op != src_op)) 329 return INVALID_OP; 330 331 if (*bit_size && (*bit_size != src_bit_size)) 332 return INVALID_OP; 333 334 op = src_op; 335 *bit_size = src_bit_size; 336 } 337 338 if ((op == INVALID_OP) || !has_load_const) 339 return op; 340 341 /* If we could otherwise move widening sources, but load_const is 342 * one of the phi sources (and does not have a widening conversion, 343 * but could have a narrowing->widening sequence inserted without 344 * loss of precision), then we could insert a narrowing->widening 345 * sequence to make the rest of the transformation possible: 346 */ 347 nir_foreach_phi_src (src, phi) { 348 assert(src->src.is_ssa); 349 350 nir_instr *instr = src->src.ssa->parent_instr; 351 if (instr->type != nir_instr_type_load_const) 352 continue; 353 354 if (!can_convert_load_const(nir_instr_as_load_const(instr), op)) 355 return INVALID_OP; 356 } 357 358 return op; 359} 360 361/* Try to move widening conversions into the phi to the phi's output 362 * to reduce the phi's precision: 363 */ 364static bool 365try_move_widening_src(nir_builder *b, nir_phi_instr *phi) 366{ 367 assert(phi->dest.is_ssa); 368 369 /* If the phi has already been narrowed, nothing more to do: */ 370 if (phi->dest.ssa.bit_size != 32) 371 return false; 372 373 unsigned bit_size; 374 nir_op op = find_widening_op(phi, &bit_size); 375 376 if (op == INVALID_OP) 377 return false; 378 379 /* construct replacement phi instruction: */ 380 nir_phi_instr *new_phi = nir_phi_instr_create(b->shader); 381 nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 382 phi->dest.ssa.num_components, 383 bit_size, NULL); 384 385 /* Remove the widening conversions from the phi sources: */ 386 nir_foreach_phi_src (src, phi) { 387 assert(src->src.is_ssa); 388 389 nir_instr *instr = src->src.ssa->parent_instr; 390 nir_ssa_def *new_src; 391 392 b->cursor = nir_after_instr(instr); 393 394 if (instr->type == nir_instr_type_load_const) { 395 /* if the src is a load_const, we've already verified that it 396 * is safe to insert a narrowing conversion to make the rest 397 * of this transformation legal: 398 */ 399 nir_load_const_instr *lc = nir_instr_as_load_const(instr); 400 401 if (op_to_type(op) == nir_type_float) { 402 new_src = nir_f2f16(b, &lc->def); 403 } else { 404 new_src = nir_i2i16(b, &lc->def); 405 } 406 } else { 407 /* at this point we know the sources source is a conversion: */ 408 nir_alu_instr *alu = nir_instr_as_alu(instr); 409 410 /* The conversion we are stripping off could have had a swizzle, 411 * so replace it with a mov if necessary: 412 */ 413 unsigned num_comp = nir_dest_num_components(alu->dest.dest); 414 new_src = nir_mov_alu(b, alu->src[0], num_comp); 415 } 416 417 /* add corresponding phi_src to the new_phi: */ 418 nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src)); 419 } 420 421 /* And insert the new phi after all sources are in place: */ 422 b->cursor = nir_after_instr(&phi->instr); 423 nir_builder_instr_insert(b, &new_phi->instr); 424 425 /* And finally add back the widening conversion after the phi, 426 * and re-write the original phi's uses 427 */ 428 b->cursor = nir_after_instr_and_phis(&new_phi->instr); 429 nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL); 430 431 nir_ssa_def_rewrite_uses(&phi->dest.ssa, def); 432 433 return true; 434} 435 436static bool 437lower_phi(nir_builder *b, nir_phi_instr *phi) 438{ 439 bool progress = try_move_narrowing_dst(b, phi); 440 if (!progress) 441 progress = try_move_widening_src(b, phi); 442 return progress; 443} 444 445bool 446nir_opt_phi_precision(nir_shader *shader) 447{ 448 bool progress = false; 449 450 /* If 8b or 16b bit_sizes are not used, no point to run this pass: */ 451 unsigned bit_sizes_used = shader->info.bit_sizes_float | 452 shader->info.bit_sizes_int; 453 454 if (!bit_sizes_used) { 455 nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader)); 456 bit_sizes_used = shader->info.bit_sizes_float | 457 shader->info.bit_sizes_int; 458 } 459 460 if (!(bit_sizes_used & (8 | 16))) 461 return false; 462 463 nir_foreach_function(function, shader) { 464 if (!function->impl) 465 continue; 466 467 nir_builder b; 468 nir_builder_init(&b, function->impl); 469 470 nir_foreach_block (block, function->impl) { 471 nir_foreach_instr_safe (instr, block) { 472 if (instr->type != nir_instr_type_phi) 473 break; 474 475 progress |= lower_phi(&b, nir_instr_as_phi(instr)); 476 } 477 } 478 479 if (progress) { 480 nir_metadata_preserve(function->impl, 481 nir_metadata_block_index | 482 nir_metadata_dominance); 483 } else { 484 nir_metadata_preserve(function->impl, nir_metadata_all); 485 } 486 } 487 488 return progress; 489} 490 491