1/*
2 * Copyright © 2021 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_builder.h"
26
27/*
28 * This pass tries to reduce the bitsize of phi instructions by either
29 * moving narrowing conversions from the phi's consumers to the phi's
30 * sources, if all the uses of the phi are equivalent narrowing
31 * instructions.  In other words, convert:
32 *
33 *    vec1 32 ssa_124 = load_const (0x00000000)
34 *    ...
35 *    loop {
36 *        ...
37 *        vec1 32 ssa_155 = phi block_0: ssa_124, block_4: ssa_53
38 *        vec1 16 ssa_8 = i2imp ssa_155
39 *        ...
40 *        vec1 32 ssa_53 = i2i32 ssa_52
41 *    }
42 *
43 * into:
44 *
45 *    vec1 32 ssa_124 = load_const (0x00000000)
46 *    vec1 16 ssa_156 = i2imp ssa_124
47 *    ...
48 *    loop {
49 *        ...
50 *        vec1 16 ssa_8 = phi block_0: ssa_156, block_4: ssa_157
51 *        ...
52 *        vec1 32 ssa_53 = i2i32 ssa_52
53 *        vec1 16 ssa_157 = i2i16 ssa_53
54 *    }
55 *
56 * Or failing that, tries to push widening conversion of phi srcs to
57 * the phi def.  In this case, since load_const is frequently one
58 * of the phi sources this pass checks if can be narrowed without a
59 * loss of precision:
60 *
61 *    vec1 32 ssa_0 = load_const (0x00000000)
62 *    ...
63 *    loop {
64 *        ...
65 *        vec1 32 ssa_8 = phi block_0: ssa_0, block_4: ssa_19
66 *        ...
67 *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
68 *        vec1 32 ssa_19 = i2i32 ssa_18
69 *    }
70 *
71 * into:
72 *
73 *    vec1 32 ssa_0 = load_const (0x00000000)
74 *    vec1 16 ssa_22 = i2i16 ssa_0
75 *    ...
76 *    loop {
77 *        ...
78 *        vec1 16 ssa_8 = phi block_0: ssa_22, block_4: ssa_18
79 *        vec1 32 ssa_23 = i2i32 ssa_8
80 *        ...
81 *        vec1 16 ssa_18 = iadd ssa_21, ssa_3
82 *    }
83 *
84 * Note that either transformations can convert x2ymp  into x2y16, which
85 * is normally done later in nir_opt_algebraic_late(), losing the option
86 * to fold away sequences like (i2i32 (i2imp (x))), but algebraic opts
87 * cannot see through phis.
88 */
89
90#define INVALID_OP nir_num_opcodes
91
92/**
93 * Get the corresponding exact conversion for a x2ymp conversion
94 */
95static nir_op
96concrete_conversion(nir_op op)
97{
98   switch (op) {
99   case nir_op_i2imp: return nir_op_i2i16;
100   case nir_op_i2fmp: return nir_op_i2f16;
101   case nir_op_u2fmp: return nir_op_u2f16;
102   case nir_op_f2fmp: return nir_op_f2f16;
103   case nir_op_f2imp: return nir_op_f2i16;
104   case nir_op_f2ump: return nir_op_f2u16;
105   default:           return op;
106   }
107}
108
109static nir_op
110narrowing_conversion_op(nir_instr *instr, nir_op current_op)
111{
112   if (instr->type != nir_instr_type_alu)
113      return INVALID_OP;
114
115   nir_op op = nir_instr_as_alu(instr)->op;
116   switch (op) {
117   case nir_op_i2imp:
118   case nir_op_i2i16:
119   case nir_op_i2fmp:
120   case nir_op_i2f16:
121   case nir_op_u2fmp:
122   case nir_op_u2f16:
123   case nir_op_f2fmp:
124   case nir_op_f2f16:
125   case nir_op_f2imp:
126   case nir_op_f2i16:
127   case nir_op_f2ump:
128   case nir_op_f2u16:
129   case nir_op_f2f16_rtne:
130   case nir_op_f2f16_rtz:
131      break;
132   default:
133      return INVALID_OP;
134   }
135
136   /* If we've already picked a conversion op from a previous phi use,
137    * make sure it is compatible with the current use
138    */
139   if (current_op != INVALID_OP) {
140      if (current_op != op) {
141         /* If we have different conversions, but one can be converted
142          * to the other, then let's do that:
143          */
144         if (concrete_conversion(current_op) == concrete_conversion(op)) {
145            op = concrete_conversion(op);
146         } else {
147            return INVALID_OP;
148         }
149      }
150   }
151
152   return op;
153}
154
155static nir_op
156widening_conversion_op(nir_instr *instr, unsigned *bit_size)
157{
158   if (instr->type != nir_instr_type_alu)
159      return INVALID_OP;
160
161   nir_alu_instr *alu = nir_instr_as_alu(instr);
162   switch (alu->op) {
163   case nir_op_i2i32:
164   case nir_op_i2f32:
165   case nir_op_u2f32:
166   case nir_op_f2f32:
167   case nir_op_f2i32:
168   case nir_op_f2u32:
169      break;
170   default:
171      return INVALID_OP;
172   }
173
174   *bit_size = nir_src_bit_size(alu->src[0].src);
175
176   /* We also need to check that the conversion's dest was actually
177    * wider:
178    */
179   if (nir_dest_bit_size(alu->dest.dest) <= *bit_size)
180      return INVALID_OP;
181
182   return alu->op;
183}
184
185static nir_alu_type
186op_to_type(nir_op op)
187{
188   return nir_alu_type_get_base_type(nir_op_infos[op].output_type);
189}
190
191/* Try to move narrowing instructions consuming the phi into the phi's
192 * sources to reduce the phi's precision:
193 */
194static bool
195try_move_narrowing_dst(nir_builder *b, nir_phi_instr *phi)
196{
197   nir_op op = INVALID_OP;
198
199   assert(phi->dest.is_ssa);
200
201   /* If the phi has already been narrowed, nothing more to do: */
202   if (phi->dest.ssa.bit_size != 32)
203      return false;
204
205   /* Are the only uses of the phi conversion instructions, and
206    * are they all the same conversion?
207    */
208   nir_foreach_use (use, &phi->dest.ssa) {
209      op = narrowing_conversion_op(use->parent_instr, op);
210
211      /* Not a (compatible) narrowing conversion: */
212      if (op == INVALID_OP)
213         return false;
214   }
215
216   /* an if_uses means the phi is used directly in a conditional, ie.
217    * without a conversion
218    */
219   if (!list_is_empty(&phi->dest.ssa.if_uses))
220      return false;
221
222   /* If the phi has no uses, then nothing to do: */
223   if (op == INVALID_OP)
224      return false;
225
226   /* construct replacement phi instruction: */
227   nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
228   nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
229                     phi->dest.ssa.num_components,
230                     nir_alu_type_get_type_size(nir_op_infos[op].output_type),
231                     NULL);
232
233   /* Push the conversion into the new phi sources: */
234   nir_foreach_phi_src (src, phi) {
235      assert(src->src.is_ssa);
236
237      /* insert new conversion instr in block of original phi src: */
238      b->cursor = nir_after_instr_and_phis(src->src.ssa->parent_instr);
239      nir_ssa_def *old_src = src->src.ssa;
240      nir_ssa_def *new_src = nir_build_alu(b, op, old_src, NULL, NULL, NULL);
241
242      /* and add corresponding phi_src to the new_phi: */
243      nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
244   }
245
246   /* And finally rewrite the original uses of the original phi uses to
247    * directly use the new phi, skipping the conversion out of the orig
248    * phi
249    */
250   nir_foreach_use (use, &phi->dest.ssa) {
251      /* We've previously established that all the uses were alu
252       * conversion ops.  Turn them into movs instead.
253       */
254      nir_alu_instr *alu = nir_instr_as_alu(use->parent_instr);
255      alu->op = nir_op_mov;
256   }
257   nir_ssa_def_rewrite_uses(&phi->dest.ssa, &new_phi->dest.ssa);
258
259   /* And finally insert the new phi after all sources are in place: */
260   b->cursor = nir_after_instr(&phi->instr);
261   nir_builder_instr_insert(b, &new_phi->instr);
262
263   return true;
264}
265
266static bool
267can_convert_load_const(nir_load_const_instr *lc, nir_op op)
268{
269   nir_alu_type type = op_to_type(op);
270
271   /* Note that we only handle phi's with bit_size == 32: */
272   assert(lc->def.bit_size == 32);
273
274   for (unsigned i = 0; i < lc->def.num_components; i++) {
275      switch (type) {
276      case nir_type_int:
277         if (lc->value[i].i32 != (int32_t)(int16_t)lc->value[i].i32)
278            return false;
279         break;
280      case nir_type_uint:
281         if (lc->value[i].u32 != (uint32_t)(uint16_t)lc->value[i].u32)
282            return false;
283         break;
284      case nir_type_float:
285         if (lc->value[i].f32 != _mesa_half_to_float(
286               _mesa_float_to_half(lc->value[i].f32)))
287            return false;
288         break;
289      default:
290         unreachable("bad type");
291         return false;
292      }
293   }
294
295   return true;
296}
297
298/* Check all the phi sources to see if they are the same widening op, in
299 * which case we can push the widening op to the other side of the phi
300 */
301static nir_op
302find_widening_op(nir_phi_instr *phi, unsigned *bit_size)
303{
304   nir_op op = INVALID_OP;
305
306   bool has_load_const = false;
307   *bit_size = 0;
308
309   nir_foreach_phi_src (src, phi) {
310      assert(src->src.is_ssa);
311
312      nir_instr *instr = src->src.ssa->parent_instr;
313      if (instr->type == nir_instr_type_load_const) {
314         has_load_const = true;
315         continue;
316      }
317
318      unsigned src_bit_size;
319      nir_op src_op = widening_conversion_op(instr, &src_bit_size);
320
321      /* Not a widening conversion: */
322      if (src_op == INVALID_OP)
323         return INVALID_OP;
324
325      /* If it is a widening conversion, it needs to be the same op as
326       * other phi sources:
327       */
328      if ((op != INVALID_OP) && (op != src_op))
329         return INVALID_OP;
330
331      if (*bit_size && (*bit_size != src_bit_size))
332         return INVALID_OP;
333
334      op = src_op;
335      *bit_size = src_bit_size;
336   }
337
338   if ((op == INVALID_OP) || !has_load_const)
339      return op;
340
341   /* If we could otherwise move widening sources, but load_const is
342    * one of the phi sources (and does not have a widening conversion,
343    * but could have a narrowing->widening sequence inserted without
344    * loss of precision), then we could insert a narrowing->widening
345    * sequence to make the rest of the transformation possible:
346    */
347   nir_foreach_phi_src (src, phi) {
348      assert(src->src.is_ssa);
349
350      nir_instr *instr = src->src.ssa->parent_instr;
351      if (instr->type != nir_instr_type_load_const)
352         continue;
353
354      if (!can_convert_load_const(nir_instr_as_load_const(instr), op))
355         return INVALID_OP;
356   }
357
358   return op;
359}
360
361/* Try to move widening conversions into the phi to the phi's output
362 * to reduce the phi's precision:
363 */
364static bool
365try_move_widening_src(nir_builder *b, nir_phi_instr *phi)
366{
367   assert(phi->dest.is_ssa);
368
369   /* If the phi has already been narrowed, nothing more to do: */
370   if (phi->dest.ssa.bit_size != 32)
371      return false;
372
373   unsigned bit_size;
374   nir_op op = find_widening_op(phi, &bit_size);
375
376   if (op == INVALID_OP)
377      return false;
378
379   /* construct replacement phi instruction: */
380   nir_phi_instr *new_phi = nir_phi_instr_create(b->shader);
381   nir_ssa_dest_init(&new_phi->instr, &new_phi->dest,
382                     phi->dest.ssa.num_components,
383                     bit_size, NULL);
384
385   /* Remove the widening conversions from the phi sources: */
386   nir_foreach_phi_src (src, phi) {
387      assert(src->src.is_ssa);
388
389      nir_instr *instr = src->src.ssa->parent_instr;
390      nir_ssa_def *new_src;
391
392      b->cursor = nir_after_instr(instr);
393
394      if (instr->type == nir_instr_type_load_const) {
395         /* if the src is a load_const, we've already verified that it
396          * is safe to insert a narrowing conversion to make the rest
397          * of this transformation legal:
398          */
399         nir_load_const_instr *lc = nir_instr_as_load_const(instr);
400
401         if (op_to_type(op) == nir_type_float) {
402            new_src = nir_f2f16(b, &lc->def);
403         } else {
404            new_src = nir_i2i16(b, &lc->def);
405         }
406      } else {
407         /* at this point we know the sources source is a conversion: */
408         nir_alu_instr *alu = nir_instr_as_alu(instr);
409
410         /* The conversion we are stripping off could have had a swizzle,
411          * so replace it with a mov if necessary:
412          */
413         unsigned num_comp = nir_dest_num_components(alu->dest.dest);
414         new_src = nir_mov_alu(b, alu->src[0], num_comp);
415      }
416
417      /* add corresponding phi_src to the new_phi: */
418      nir_phi_instr_add_src(new_phi, src->pred, nir_src_for_ssa(new_src));
419   }
420
421   /* And insert the new phi after all sources are in place: */
422   b->cursor = nir_after_instr(&phi->instr);
423   nir_builder_instr_insert(b, &new_phi->instr);
424
425   /* And finally add back the widening conversion after the phi,
426    * and re-write the original phi's uses
427    */
428   b->cursor = nir_after_instr_and_phis(&new_phi->instr);
429   nir_ssa_def *def = nir_build_alu(b, op, &new_phi->dest.ssa, NULL, NULL, NULL);
430
431   nir_ssa_def_rewrite_uses(&phi->dest.ssa, def);
432
433   return true;
434}
435
436static bool
437lower_phi(nir_builder *b, nir_phi_instr *phi)
438{
439   bool progress = try_move_narrowing_dst(b, phi);
440   if (!progress)
441      progress = try_move_widening_src(b, phi);
442   return progress;
443}
444
445bool
446nir_opt_phi_precision(nir_shader *shader)
447{
448   bool progress = false;
449
450   /* If 8b or 16b bit_sizes are not used, no point to run this pass: */
451   unsigned bit_sizes_used = shader->info.bit_sizes_float |
452                             shader->info.bit_sizes_int;
453
454   if (!bit_sizes_used) {
455      nir_shader_gather_info(shader, nir_shader_get_entrypoint(shader));
456      bit_sizes_used = shader->info.bit_sizes_float |
457                       shader->info.bit_sizes_int;
458   }
459
460   if (!(bit_sizes_used & (8 | 16)))
461      return false;
462
463   nir_foreach_function(function, shader) {
464      if (!function->impl)
465         continue;
466
467      nir_builder b;
468      nir_builder_init(&b, function->impl);
469
470      nir_foreach_block (block, function->impl) {
471         nir_foreach_instr_safe (instr, block) {
472            if (instr->type != nir_instr_type_phi)
473               break;
474
475            progress |= lower_phi(&b, nir_instr_as_phi(instr));
476         }
477      }
478
479      if (progress) {
480         nir_metadata_preserve(function->impl,
481                               nir_metadata_block_index |
482                               nir_metadata_dominance);
483      } else {
484         nir_metadata_preserve(function->impl, nir_metadata_all);
485      }
486   }
487
488   return progress;
489}
490
491