1/*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/*
25 * This lowering pass supports (as configured via nir_lower_tex_options)
26 * various texture related conversions:
27 *   + texture projector lowering: converts the coordinate division for
28 *     texture projection to be done in ALU instructions instead of
29 *     asking the texture operation to do so.
30 *   + lowering RECT: converts the un-normalized RECT texture coordinates
31 *     to normalized coordinates with txs plus ALU instructions
32 *   + saturate s/t/r coords: to emulate certain texture clamp/wrap modes,
33 *     inserts instructions to clamp specified coordinates to [0.0, 1.0].
34 *     Note that this automatically triggers texture projector lowering if
35 *     needed, since clamping must happen after projector lowering.
36 *   + YUV-to-RGB conversion: to allow sampling YUV values as RGB values
37 *     according to a specific YUV color space and range.
38 */
39
40#include "nir.h"
41#include "nir_builder.h"
42#include "nir_builtin_builder.h"
43#include "nir_format_convert.h"
44
45typedef struct nir_const_value_3_4 {
46   nir_const_value v[3][4];
47} nir_const_value_3_4;
48
49static const nir_const_value_3_4 bt601_limited_range_csc_coeffs = { {
50   { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
51   { { .f32 = 0.0f        }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } },
52   { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f        } },
53} };
54static const nir_const_value_3_4 bt601_full_range_csc_coeffs = { {
55   { { .f32 = 1.0f        }, { .f32 =  1.0f        }, { .f32 = 1.0f        } },
56   { { .f32 = 0.0f        }, { .f32 = -0.34413629f }, { .f32 = 1.772f      } },
57   { { .f32 = 1.402f      }, { .f32 = -0.71413629f }, { .f32 = 0.0f        } },
58} };
59static const nir_const_value_3_4 bt709_limited_range_csc_coeffs = { {
60   { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
61   { { .f32 = 0.0f        }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } },
62   { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f        } },
63} };
64static const nir_const_value_3_4 bt709_full_range_csc_coeffs = { {
65   { { .f32 = 1.0f        }, { .f32 =  1.0f        }, { .f32 = 1.0f        } },
66   { { .f32 = 0.0f        }, { .f32 = -0.18732427f }, { .f32 = 1.8556f     } },
67   { { .f32 = 1.5748f     }, { .f32 = -0.46812427f }, { .f32 = 0.0f        } },
68} };
69static const nir_const_value_3_4 bt2020_limited_range_csc_coeffs = { {
70   { { .f32 = 1.16438356f }, { .f32 =  1.16438356f }, { .f32 = 1.16438356f } },
71   { { .f32 = 0.0f        }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } },
72   { { .f32 = 1.67878795f }, { .f32 = -0.65046843f }, { .f32 = 0.0f        } },
73} };
74static const nir_const_value_3_4 bt2020_full_range_csc_coeffs = { {
75   { { .f32 = 1.0f        }, { .f32 =  1.0f        }, { .f32 = 1.0f        } },
76   { { .f32 = 0.0f        }, { .f32 = -0.16455313f }, { .f32 = 1.88140000f } },
77   { { .f32 = 1.4747f     }, { .f32 = -0.57139187f }, { .f32 = 0.0f        } },
78} };
79
80static const float bt601_limited_range_csc_offsets[3] = {
81   -0.874202218f, 0.531667823f, -1.085630789f
82};
83static const float bt601_full_range_csc_offsets[3] = {
84   -0.701000000f, 0.529136286f, -0.886000000f
85};
86static const float bt709_limited_range_csc_offsets[3] = {
87   -0.972945075f, 0.301482665f, -1.133402218f
88};
89static const float bt709_full_range_csc_offsets[3] = {
90   -0.787400000f, 0.327724273f, -0.927800000f
91};
92static const float bt2020_limited_range_csc_offsets[3] = {
93   -0.915745075f, 0.347480639f, -1.148145075f
94};
95static const float bt2020_full_range_csc_offsets[3] = {
96   -0.737350000f, 0.367972500f, -0.940700000f
97};
98
99static bool
100project_src(nir_builder *b, nir_tex_instr *tex)
101{
102   /* Find the projector in the srcs list, if present. */
103   int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector);
104   if (proj_index < 0)
105      return false;
106
107   b->cursor = nir_before_instr(&tex->instr);
108
109   nir_ssa_def *inv_proj =
110      nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1));
111
112   /* Walk through the sources projecting the arguments. */
113   for (unsigned i = 0; i < tex->num_srcs; i++) {
114      switch (tex->src[i].src_type) {
115      case nir_tex_src_coord:
116      case nir_tex_src_comparator:
117         break;
118      default:
119         continue;
120      }
121      nir_ssa_def *unprojected =
122         nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i));
123      nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj);
124
125      /* Array indices don't get projected, so make an new vector with the
126       * coordinate's array index untouched.
127       */
128      if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) {
129         switch (tex->coord_components) {
130         case 4:
131            projected = nir_vec4(b,
132                                 nir_channel(b, projected, 0),
133                                 nir_channel(b, projected, 1),
134                                 nir_channel(b, projected, 2),
135                                 nir_channel(b, unprojected, 3));
136            break;
137         case 3:
138            projected = nir_vec3(b,
139                                 nir_channel(b, projected, 0),
140                                 nir_channel(b, projected, 1),
141                                 nir_channel(b, unprojected, 2));
142            break;
143         case 2:
144            projected = nir_vec2(b,
145                                 nir_channel(b, projected, 0),
146                                 nir_channel(b, unprojected, 1));
147            break;
148         default:
149            unreachable("bad texture coord count for array");
150            break;
151         }
152      }
153
154      nir_instr_rewrite_src(&tex->instr,
155                            &tex->src[i].src,
156                            nir_src_for_ssa(projected));
157   }
158
159   nir_tex_instr_remove_src(tex, proj_index);
160   return true;
161}
162
163static bool
164lower_offset(nir_builder *b, nir_tex_instr *tex)
165{
166   int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset);
167   if (offset_index < 0)
168      return false;
169
170   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
171   assert(coord_index >= 0);
172
173   assert(tex->src[offset_index].src.is_ssa);
174   assert(tex->src[coord_index].src.is_ssa);
175   nir_ssa_def *offset = tex->src[offset_index].src.ssa;
176   nir_ssa_def *coord = tex->src[coord_index].src.ssa;
177
178   b->cursor = nir_before_instr(&tex->instr);
179
180   nir_ssa_def *offset_coord;
181   if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
182      if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
183         offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset));
184      } else {
185         nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
186         nir_ssa_def *scale = nir_frcp(b, txs);
187
188         offset_coord = nir_fadd(b, coord,
189                                 nir_fmul(b,
190                                          nir_i2f32(b, offset),
191                                          scale));
192      }
193   } else {
194      offset_coord = nir_iadd(b, coord, offset);
195   }
196
197   if (tex->is_array) {
198      /* The offset is not applied to the array index */
199      if (tex->coord_components == 2) {
200         offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
201                                    nir_channel(b, coord, 1));
202      } else if (tex->coord_components == 3) {
203         offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
204                                    nir_channel(b, offset_coord, 1),
205                                    nir_channel(b, coord, 2));
206      } else {
207         unreachable("Invalid number of components");
208      }
209   }
210
211   nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
212                         nir_src_for_ssa(offset_coord));
213
214   nir_tex_instr_remove_src(tex, offset_index);
215
216   return true;
217}
218
219static void
220lower_rect(nir_builder *b, nir_tex_instr *tex)
221{
222   /* Set the sampler_dim to 2D here so that get_texture_size picks up the
223    * right dimensionality.
224    */
225   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
226
227   nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
228   nir_ssa_def *scale = nir_frcp(b, txs);
229   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
230
231   if (coord_index != -1) {
232      nir_ssa_def *coords =
233         nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
234      nir_instr_rewrite_src(&tex->instr,
235                            &tex->src[coord_index].src,
236                            nir_src_for_ssa(nir_fmul(b, coords, scale)));
237   }
238}
239
240static void
241lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex)
242{
243   b->cursor = nir_before_instr(&tex->instr);
244
245   nir_ssa_def *idx = nir_imm_int(b, tex->texture_index);
246   nir_ssa_def *scale = nir_build_load_texture_rect_scaling(b, 32, idx);
247   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
248
249   if (coord_index != -1) {
250      nir_ssa_def *coords =
251         nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
252      nir_instr_rewrite_src(&tex->instr,
253                            &tex->src[coord_index].src,
254                            nir_src_for_ssa(nir_fmul(b, coords, scale)));
255   }
256}
257
258static void
259lower_lod(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *lod)
260{
261   assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb);
262   assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0);
263   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
264   assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
265
266   int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
267   if (bias_idx >= 0) {
268      /* If we have a bias, add it in */
269      lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
270      nir_tex_instr_remove_src(tex, bias_idx);
271   }
272
273   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
274   if (min_lod_idx >= 0) {
275      /* If we have a minimum LOD, clamp LOD accordingly */
276      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
277      nir_tex_instr_remove_src(tex, min_lod_idx);
278   }
279
280   nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
281   tex->op = nir_texop_txl;
282}
283
284static void
285lower_implicit_lod(nir_builder *b, nir_tex_instr *tex)
286{
287   b->cursor = nir_before_instr(&tex->instr);
288   lower_lod(b, tex, nir_get_texture_lod(b, tex));
289}
290
291static void
292lower_zero_lod(nir_builder *b, nir_tex_instr *tex)
293{
294   b->cursor = nir_before_instr(&tex->instr);
295
296   if (tex->op == nir_texop_lod) {
297      nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_imm_int(b, 0));
298      nir_instr_remove(&tex->instr);
299      return;
300   }
301
302   lower_lod(b, tex, nir_imm_int(b, 0));
303}
304
305static nir_ssa_def *
306sample_plane(nir_builder *b, nir_tex_instr *tex, int plane,
307             const nir_lower_tex_options *options)
308{
309   assert(tex->dest.is_ssa);
310   assert(nir_tex_instr_dest_size(tex) == 4);
311   assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
312   assert(tex->op == nir_texop_tex);
313   assert(tex->coord_components == 2);
314
315   nir_tex_instr *plane_tex =
316      nir_tex_instr_create(b->shader, tex->num_srcs + 1);
317   for (unsigned i = 0; i < tex->num_srcs; i++) {
318      nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src);
319      plane_tex->src[i].src_type = tex->src[i].src_type;
320   }
321   plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane));
322   plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane;
323   plane_tex->op = nir_texop_tex;
324   plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
325   plane_tex->dest_type = nir_type_float | nir_dest_bit_size(tex->dest);
326   plane_tex->coord_components = 2;
327
328   plane_tex->texture_index = tex->texture_index;
329   plane_tex->sampler_index = tex->sampler_index;
330
331   nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4,
332         nir_dest_bit_size(tex->dest), NULL);
333
334   nir_builder_instr_insert(b, &plane_tex->instr);
335
336   /* If scaling_factor is set, return a scaled value. */
337   if (options->scale_factors[tex->texture_index])
338      return nir_fmul_imm(b, &plane_tex->dest.ssa,
339                          options->scale_factors[tex->texture_index]);
340
341   return &plane_tex->dest.ssa;
342}
343
344static void
345convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
346                   nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v,
347                   nir_ssa_def *a,
348                   const nir_lower_tex_options *options,
349                   unsigned texture_index)
350{
351
352   const float *offset_vals;
353   const nir_const_value_3_4 *m;
354   assert((options->bt709_external & options->bt2020_external) == 0);
355   if (options->yuv_full_range_external & (1u << texture_index)) {
356      if (options->bt709_external & (1u << texture_index)) {
357         m = &bt709_full_range_csc_coeffs;
358         offset_vals = bt709_full_range_csc_offsets;
359      } else if (options->bt2020_external & (1u << texture_index)) {
360         m = &bt2020_full_range_csc_coeffs;
361         offset_vals = bt2020_full_range_csc_offsets;
362      } else {
363         m = &bt601_full_range_csc_coeffs;
364         offset_vals = bt601_full_range_csc_offsets;
365      }
366   } else {
367      if (options->bt709_external & (1u << texture_index)) {
368         m = &bt709_limited_range_csc_coeffs;
369         offset_vals = bt709_limited_range_csc_offsets;
370      } else if (options->bt2020_external & (1u << texture_index)) {
371         m = &bt2020_limited_range_csc_coeffs;
372         offset_vals = bt2020_limited_range_csc_offsets;
373      } else {
374         m = &bt601_limited_range_csc_coeffs;
375         offset_vals = bt601_limited_range_csc_offsets;
376      }
377   }
378
379   unsigned bit_size = nir_dest_bit_size(tex->dest);
380
381   nir_ssa_def *offset =
382      nir_vec4(b,
383               nir_imm_floatN_t(b, offset_vals[0], a->bit_size),
384               nir_imm_floatN_t(b, offset_vals[1], a->bit_size),
385               nir_imm_floatN_t(b, offset_vals[2], a->bit_size),
386               a);
387
388   offset = nir_f2fN(b, offset, bit_size);
389
390   nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size);
391   nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size);
392   nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size);
393
394   nir_ssa_def *result =
395      nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset)));
396
397   nir_ssa_def_rewrite_uses(&tex->dest.ssa, result);
398}
399
400static void
401lower_y_uv_external(nir_builder *b, nir_tex_instr *tex,
402                    const nir_lower_tex_options *options,
403                    unsigned texture_index)
404{
405   b->cursor = nir_after_instr(&tex->instr);
406
407   nir_ssa_def *y = sample_plane(b, tex, 0, options);
408   nir_ssa_def *uv = sample_plane(b, tex, 1, options);
409
410   convert_yuv_to_rgb(b, tex,
411                      nir_channel(b, y, 0),
412                      nir_channel(b, uv, 0),
413                      nir_channel(b, uv, 1),
414                      nir_imm_float(b, 1.0f),
415                      options,
416                      texture_index);
417}
418
419static void
420lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex,
421                     const nir_lower_tex_options *options,
422                     unsigned texture_index)
423{
424   b->cursor = nir_after_instr(&tex->instr);
425
426   nir_ssa_def *y = sample_plane(b, tex, 0, options);
427   nir_ssa_def *u = sample_plane(b, tex, 1, options);
428   nir_ssa_def *v = sample_plane(b, tex, 2, options);
429
430   convert_yuv_to_rgb(b, tex,
431                      nir_channel(b, y, 0),
432                      nir_channel(b, u, 0),
433                      nir_channel(b, v, 0),
434                      nir_imm_float(b, 1.0f),
435                      options,
436                      texture_index);
437}
438
439static void
440lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex,
441                       const nir_lower_tex_options *options,
442                       unsigned texture_index)
443{
444   b->cursor = nir_after_instr(&tex->instr);
445
446   nir_ssa_def *y = sample_plane(b, tex, 0, options);
447   nir_ssa_def *xuxv = sample_plane(b, tex, 1, options);
448
449   convert_yuv_to_rgb(b, tex,
450                      nir_channel(b, y, 0),
451                      nir_channel(b, xuxv, 1),
452                      nir_channel(b, xuxv, 3),
453                      nir_imm_float(b, 1.0f),
454                      options,
455                      texture_index);
456}
457
458static void
459lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex,
460                       const nir_lower_tex_options *options,
461                       unsigned texture_index)
462{
463  b->cursor = nir_after_instr(&tex->instr);
464
465  nir_ssa_def *y = sample_plane(b, tex, 0, options);
466  nir_ssa_def *uxvx = sample_plane(b, tex, 1, options);
467
468  convert_yuv_to_rgb(b, tex,
469                     nir_channel(b, y, 1),
470                     nir_channel(b, uxvx, 0),
471                     nir_channel(b, uxvx, 2),
472                     nir_imm_float(b, 1.0f),
473                     options,
474                     texture_index);
475}
476
477static void
478lower_ayuv_external(nir_builder *b, nir_tex_instr *tex,
479                    const nir_lower_tex_options *options,
480                    unsigned texture_index)
481{
482  b->cursor = nir_after_instr(&tex->instr);
483
484  nir_ssa_def *ayuv = sample_plane(b, tex, 0, options);
485
486  convert_yuv_to_rgb(b, tex,
487                     nir_channel(b, ayuv, 2),
488                     nir_channel(b, ayuv, 1),
489                     nir_channel(b, ayuv, 0),
490                     nir_channel(b, ayuv, 3),
491                     options,
492                     texture_index);
493}
494
495static void
496lower_y41x_external(nir_builder *b, nir_tex_instr *tex,
497                    const nir_lower_tex_options *options,
498                    unsigned texture_index)
499{
500  b->cursor = nir_after_instr(&tex->instr);
501
502  nir_ssa_def *y41x = sample_plane(b, tex, 0, options);
503
504  convert_yuv_to_rgb(b, tex,
505                     nir_channel(b, y41x, 1),
506                     nir_channel(b, y41x, 0),
507                     nir_channel(b, y41x, 2),
508                     nir_channel(b, y41x, 3),
509                     options,
510                     texture_index);
511}
512
513static void
514lower_xyuv_external(nir_builder *b, nir_tex_instr *tex,
515                    const nir_lower_tex_options *options,
516                    unsigned texture_index)
517{
518  b->cursor = nir_after_instr(&tex->instr);
519
520  nir_ssa_def *xyuv = sample_plane(b, tex, 0, options);
521
522  convert_yuv_to_rgb(b, tex,
523                     nir_channel(b, xyuv, 2),
524                     nir_channel(b, xyuv, 1),
525                     nir_channel(b, xyuv, 0),
526                     nir_imm_float(b, 1.0f),
527                     options,
528                     texture_index);
529}
530
531static void
532lower_yuv_external(nir_builder *b, nir_tex_instr *tex,
533                   const nir_lower_tex_options *options,
534                   unsigned texture_index)
535{
536  b->cursor = nir_after_instr(&tex->instr);
537
538  nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
539
540  convert_yuv_to_rgb(b, tex,
541                     nir_channel(b, yuv, 0),
542                     nir_channel(b, yuv, 1),
543                     nir_channel(b, yuv, 2),
544                     nir_imm_float(b, 1.0f),
545                     options,
546                     texture_index);
547}
548
549static void
550lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex,
551                     const nir_lower_tex_options *options,
552                     unsigned texture_index)
553{
554  b->cursor = nir_after_instr(&tex->instr);
555
556  nir_ssa_def *yuv = sample_plane(b, tex, 0, options);
557
558  convert_yuv_to_rgb(b, tex,
559                     nir_channel(b, yuv, 1),
560                     nir_channel(b, yuv, 2),
561                     nir_channel(b, yuv, 0),
562                     nir_imm_float(b, 1.0f),
563                     options,
564                     texture_index);
565}
566
567/*
568 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod
569 * computed from the gradients.
570 */
571static void
572replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex)
573{
574   assert(tex->op == nir_texop_txd);
575
576   nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx));
577   nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy));
578
579   int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
580   if (min_lod_idx >= 0) {
581      /* If we have a minimum LOD, clamp LOD accordingly */
582      lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
583      nir_tex_instr_remove_src(tex, min_lod_idx);
584   }
585
586   nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod));
587   tex->op = nir_texop_txl;
588}
589
590static void
591lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex)
592{
593   assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE);
594   assert(tex->op == nir_texop_txd);
595   assert(tex->dest.is_ssa);
596
597   /* Use textureSize() to get the width and height of LOD 0 */
598   nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
599
600   /* Cubemap texture lookups first generate a texture coordinate normalized
601    * to [-1, 1] on the appropiate face. The appropiate face is determined
602    * by which component has largest magnitude and its sign. The texture
603    * coordinate is the quotient of the remaining texture coordinates against
604    * that absolute value of the component of largest magnitude. This
605    * division requires that the computing of the derivative of the texel
606    * coordinate must use the quotient rule. The high level GLSL code is as
607    * follows:
608    *
609    * Step 1: selection
610    *
611    * vec3 abs_p, Q, dQdx, dQdy;
612    * abs_p = abs(ir->coordinate);
613    * if (abs_p.x >= max(abs_p.y, abs_p.z)) {
614    *    Q = ir->coordinate.yzx;
615    *    dQdx = ir->lod_info.grad.dPdx.yzx;
616    *    dQdy = ir->lod_info.grad.dPdy.yzx;
617    * }
618    * if (abs_p.y >= max(abs_p.x, abs_p.z)) {
619    *    Q = ir->coordinate.xzy;
620    *    dQdx = ir->lod_info.grad.dPdx.xzy;
621    *    dQdy = ir->lod_info.grad.dPdy.xzy;
622    * }
623    * if (abs_p.z >= max(abs_p.x, abs_p.y)) {
624    *    Q = ir->coordinate;
625    *    dQdx = ir->lod_info.grad.dPdx;
626    *    dQdy = ir->lod_info.grad.dPdy;
627    * }
628    *
629    * Step 2: use quotient rule to compute derivative. The normalized to
630    * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
631    * only concerned with the magnitudes of the derivatives whose values are
632    * not affected by the sign. We drop the sign from the computation.
633    *
634    * vec2 dx, dy;
635    * float recip;
636    *
637    * recip = 1.0 / Q.z;
638    * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
639    * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
640    *
641    * Step 3: compute LOD. At this point we have the derivatives of the
642    * texture coordinates normalized to [-1,1]. We take the LOD to be
643    *  result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
644    *         = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
645    *         = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
646    *         = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
647    *         = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
648    * where L is the dimension of the cubemap. The code is:
649    *
650    * float M, result;
651    * M = max(dot(dx, dx), dot(dy, dy));
652    * L = textureSize(sampler, 0).x;
653    * result = -1.0 + 0.5 * log2(L * L * M);
654    */
655
656   /* coordinate */
657   nir_ssa_def *p =
658      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa;
659
660   /* unmodified dPdx, dPdy values */
661   nir_ssa_def *dPdx =
662      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
663   nir_ssa_def *dPdy =
664      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
665
666   nir_ssa_def *abs_p = nir_fabs(b, p);
667   nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0);
668   nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1);
669   nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2);
670
671   /* 1. compute selector */
672   nir_ssa_def *Q, *dQdx, *dQdy;
673
674   nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y));
675   nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z));
676
677   unsigned yzx[3] = { 1, 2, 0 };
678   unsigned xzy[3] = { 0, 2, 1 };
679
680   Q = nir_bcsel(b, cond_z,
681                 p,
682                 nir_bcsel(b, cond_y,
683                           nir_swizzle(b, p, xzy, 3),
684                           nir_swizzle(b, p, yzx, 3)));
685
686   dQdx = nir_bcsel(b, cond_z,
687                    dPdx,
688                    nir_bcsel(b, cond_y,
689                              nir_swizzle(b, dPdx, xzy, 3),
690                              nir_swizzle(b, dPdx, yzx, 3)));
691
692   dQdy = nir_bcsel(b, cond_z,
693                    dPdy,
694                    nir_bcsel(b, cond_y,
695                              nir_swizzle(b, dPdy, xzy, 3),
696                              nir_swizzle(b, dPdy, yzx, 3)));
697
698   /* 2. quotient rule */
699
700   /* tmp = Q.xy * recip;
701    * dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
702    * dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
703    */
704   nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2));
705
706   nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3);
707   nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z);
708
709   nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3);
710   nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2);
711   nir_ssa_def *dx =
712      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z)));
713
714   nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3);
715   nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2);
716   nir_ssa_def *dy =
717      nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z)));
718
719   /* M = max(dot(dx, dx), dot(dy, dy)); */
720   nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy));
721
722   /* size has textureSize() of LOD 0 */
723   nir_ssa_def *L = nir_channel(b, size, 0);
724
725   /* lod = -1.0 + 0.5 * log2(L * L * M); */
726   nir_ssa_def *lod =
727      nir_fadd(b,
728               nir_imm_float(b, -1.0f),
729               nir_fmul(b,
730                        nir_imm_float(b, 0.5f),
731                        nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M)))));
732
733   /* 3. Replace the gradient instruction with an equivalent lod instruction */
734   replace_gradient_with_lod(b, lod, tex);
735}
736
737static void
738lower_gradient(nir_builder *b, nir_tex_instr *tex)
739{
740   /* Cubes are more complicated and have their own function */
741   if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
742      lower_gradient_cube_map(b, tex);
743      return;
744   }
745
746   assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE);
747   assert(tex->op == nir_texop_txd);
748   assert(tex->dest.is_ssa);
749
750   /* Use textureSize() to get the width and height of LOD 0 */
751   unsigned component_mask;
752   switch (tex->sampler_dim) {
753   case GLSL_SAMPLER_DIM_3D:
754      component_mask = 7;
755      break;
756   case GLSL_SAMPLER_DIM_1D:
757      component_mask = 1;
758      break;
759   default:
760      component_mask = 3;
761      break;
762   }
763
764   nir_ssa_def *size =
765      nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)),
766                      component_mask);
767
768   /* Scale the gradients by width and height.  Effectively, the incoming
769    * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the
770    * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y).
771    */
772   nir_ssa_def *ddx =
773      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa;
774   nir_ssa_def *ddy =
775      tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa;
776
777   nir_ssa_def *dPdx = nir_fmul(b, ddx, size);
778   nir_ssa_def *dPdy = nir_fmul(b, ddy, size);
779
780   nir_ssa_def *rho;
781   if (dPdx->num_components == 1) {
782      rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy));
783   } else {
784      rho = nir_fmax(b,
785                     nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)),
786                     nir_fsqrt(b, nir_fdot(b, dPdy, dPdy)));
787   }
788
789   /* lod = log2(rho).  We're ignoring GL state biases for now. */
790   nir_ssa_def *lod = nir_flog2(b, rho);
791
792   /* Replace the gradient instruction with an equivalent lod instruction */
793   replace_gradient_with_lod(b, lod, tex);
794}
795
796/* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */
797static nir_tex_instr *
798lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex)
799{
800   b->cursor = nir_after_instr(&tex->instr);
801   nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2);
802
803   txd->op = nir_texop_txd;
804   txd->sampler_dim = tex->sampler_dim;
805   txd->dest_type = tex->dest_type;
806   txd->coord_components = tex->coord_components;
807   txd->texture_index = tex->texture_index;
808   txd->sampler_index = tex->sampler_index;
809   txd->is_array = tex->is_array;
810   txd->is_shadow = tex->is_shadow;
811   txd->is_new_style_shadow = tex->is_new_style_shadow;
812
813   /* reuse existing srcs */
814   for (unsigned i = 0; i < tex->num_srcs; i++) {
815      nir_src_copy(&txd->src[i].src, &tex->src[i].src);
816      txd->src[i].src_type = tex->src[i].src_type;
817   }
818   int coord = nir_tex_instr_src_index(tex, nir_tex_src_coord);
819   assert(coord >= 0);
820   nir_ssa_def *dfdx = nir_fddx(b, tex->src[coord].src.ssa);
821   nir_ssa_def *dfdy = nir_fddy(b, tex->src[coord].src.ssa);
822   txd->src[tex->num_srcs].src = nir_src_for_ssa(dfdx);
823   txd->src[tex->num_srcs].src_type = nir_tex_src_ddx;
824   txd->src[tex->num_srcs + 1].src = nir_src_for_ssa(dfdy);
825   txd->src[tex->num_srcs + 1].src_type = nir_tex_src_ddy;
826
827   nir_ssa_dest_init(&txd->instr, &txd->dest, nir_dest_num_components(tex->dest),
828                     nir_dest_bit_size(tex->dest), NULL);
829   nir_builder_instr_insert(b, &txd->instr);
830   nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txd->dest.ssa);
831   nir_instr_remove(&tex->instr);
832   return txd;
833}
834
835/* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */
836static nir_tex_instr *
837lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex)
838{
839   b->cursor = nir_after_instr(&tex->instr);
840   nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs);
841
842   txl->op = nir_texop_txl;
843   txl->sampler_dim = tex->sampler_dim;
844   txl->dest_type = tex->dest_type;
845   txl->coord_components = tex->coord_components;
846   txl->texture_index = tex->texture_index;
847   txl->sampler_index = tex->sampler_index;
848   txl->is_array = tex->is_array;
849   txl->is_shadow = tex->is_shadow;
850   txl->is_new_style_shadow = tex->is_new_style_shadow;
851
852   /* reuse all but bias src */
853   for (int i = 0; i < 2; i++) {
854      if (tex->src[i].src_type != nir_tex_src_bias) {
855         nir_src_copy(&txl->src[i].src, &tex->src[i].src);
856         txl->src[i].src_type = tex->src[i].src_type;
857      }
858   }
859   nir_ssa_def *lod = nir_get_texture_lod(b, txl);
860
861   int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
862   assert(bias_idx >= 0);
863   lod = nir_fadd(b, nir_channel(b, lod, 1), nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
864   txl->src[tex->num_srcs - 1].src = nir_src_for_ssa(lod);
865   txl->src[tex->num_srcs - 1].src_type = nir_tex_src_lod;
866
867   nir_ssa_dest_init(&txl->instr, &txl->dest, nir_dest_num_components(tex->dest),
868                     nir_dest_bit_size(tex->dest), NULL);
869   nir_builder_instr_insert(b, &txl->instr);
870   nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txl->dest.ssa);
871   nir_instr_remove(&tex->instr);
872   return txl;
873}
874
875static nir_tex_instr *
876saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask)
877{
878   if (tex->op == nir_texop_tex)
879      tex = lower_tex_to_txd(b, tex);
880   else if (tex->op == nir_texop_txb)
881      tex = lower_txb_to_txl(b, tex);
882
883   b->cursor = nir_before_instr(&tex->instr);
884   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
885
886   if (coord_index != -1) {
887      nir_ssa_def *src =
888         nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components);
889
890      /* split src into components: */
891      nir_ssa_def *comp[4];
892
893      assume(tex->coord_components >= 1);
894
895      for (unsigned j = 0; j < tex->coord_components; j++)
896         comp[j] = nir_channel(b, src, j);
897
898      /* clamp requested components, array index does not get clamped: */
899      unsigned ncomp = tex->coord_components;
900      if (tex->is_array)
901         ncomp--;
902
903      for (unsigned j = 0; j < ncomp; j++) {
904         if ((1 << j) & sat_mask) {
905            if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
906               /* non-normalized texture coords, so clamp to texture
907                * size rather than [0.0, 1.0]
908                */
909               nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex));
910               comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0));
911               comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j));
912            } else {
913               comp[j] = nir_fsat(b, comp[j]);
914            }
915         }
916      }
917
918      /* and move the result back into a single vecN: */
919      src = nir_vec(b, comp, tex->coord_components);
920
921      nir_instr_rewrite_src(&tex->instr,
922                            &tex->src[coord_index].src,
923                            nir_src_for_ssa(src));
924   }
925   return tex;
926}
927
928static nir_ssa_def *
929get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
930{
931   nir_const_value v[4];
932
933   memset(&v, 0, sizeof(v));
934
935   if (swizzle_val == 4) {
936      v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0;
937   } else {
938      assert(swizzle_val == 5);
939      if (type == nir_type_float32)
940         v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0;
941      else
942         v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1;
943   }
944
945   return nir_build_imm(b, 4, 32, v);
946}
947
948static void
949swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex)
950{
951   assert(tex->dest.is_ssa);
952
953   b->cursor = nir_after_instr(&tex->instr);
954
955   assert(nir_tex_instr_dest_size(tex) == 4);
956   unsigned swiz[4] = { 2, 3, 1, 0 };
957   nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
958
959   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
960                                  swizzled->parent_instr);
961}
962
963static void
964swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4])
965{
966   assert(tex->dest.is_ssa);
967
968   b->cursor = nir_after_instr(&tex->instr);
969
970   nir_ssa_def *swizzled;
971   if (tex->op == nir_texop_tg4) {
972      if (swizzle[tex->component] < 4) {
973         /* This one's easy */
974         tex->component = swizzle[tex->component];
975         return;
976      } else {
977         swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]);
978      }
979   } else {
980      assert(nir_tex_instr_dest_size(tex) == 4);
981      if (swizzle[0] < 4 && swizzle[1] < 4 &&
982          swizzle[2] < 4 && swizzle[3] < 4) {
983         unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] };
984         /* We have no 0s or 1s, just emit a swizzling MOV */
985         swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4);
986      } else {
987         nir_ssa_scalar srcs[4];
988         for (unsigned i = 0; i < 4; i++) {
989            if (swizzle[i] < 4) {
990               srcs[i] = nir_get_ssa_scalar(&tex->dest.ssa, swizzle[i]);
991            } else {
992               srcs[i] = nir_get_ssa_scalar(get_zero_or_one(b, tex->dest_type, swizzle[i]), 0);
993            }
994         }
995         swizzled = nir_vec_scalars(b, srcs, 4);
996      }
997   }
998
999   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled,
1000                                  swizzled->parent_instr);
1001}
1002
1003static void
1004linearize_srgb_result(nir_builder *b, nir_tex_instr *tex)
1005{
1006   assert(tex->dest.is_ssa);
1007   assert(nir_tex_instr_dest_size(tex) == 4);
1008   assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1009
1010   b->cursor = nir_after_instr(&tex->instr);
1011
1012   nir_ssa_def *rgb =
1013      nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7));
1014
1015   /* alpha is untouched: */
1016   nir_ssa_def *result = nir_vec4(b,
1017                                  nir_channel(b, rgb, 0),
1018                                  nir_channel(b, rgb, 1),
1019                                  nir_channel(b, rgb, 2),
1020                                  nir_channel(b, &tex->dest.ssa, 3));
1021
1022   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,
1023                                  result->parent_instr);
1024}
1025
1026/**
1027 * Lowers texture instructions from giving a vec4 result to a vec2 of f16,
1028 * i16, or u16, or a single unorm4x8 value.
1029 *
1030 * Note that we don't change the destination num_components, because
1031 * nir_tex_instr_dest_size() will still return 4.  The driver is just expected
1032 * to not store the other channels, given that nothing at the NIR level will
1033 * read them.
1034 */
1035static void
1036lower_tex_packing(nir_builder *b, nir_tex_instr *tex,
1037                  const nir_lower_tex_options *options)
1038{
1039   nir_ssa_def *color = &tex->dest.ssa;
1040
1041   b->cursor = nir_after_instr(&tex->instr);
1042
1043   switch (options->lower_tex_packing[tex->sampler_index]) {
1044   case nir_lower_tex_packing_none:
1045      return;
1046
1047   case nir_lower_tex_packing_16: {
1048      static const unsigned bits[4] = {16, 16, 16, 16};
1049
1050      switch (nir_alu_type_get_base_type(tex->dest_type)) {
1051      case nir_type_float:
1052         switch (nir_tex_instr_dest_size(tex)) {
1053         case 1:
1054            assert(tex->is_shadow && tex->is_new_style_shadow);
1055            color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0));
1056            break;
1057         case 2: {
1058            nir_ssa_def *rg = nir_channel(b, color, 0);
1059            color = nir_vec2(b,
1060                             nir_unpack_half_2x16_split_x(b, rg),
1061                             nir_unpack_half_2x16_split_y(b, rg));
1062            break;
1063         }
1064         case 4: {
1065            nir_ssa_def *rg = nir_channel(b, color, 0);
1066            nir_ssa_def *ba = nir_channel(b, color, 1);
1067            color = nir_vec4(b,
1068                             nir_unpack_half_2x16_split_x(b, rg),
1069                             nir_unpack_half_2x16_split_y(b, rg),
1070                             nir_unpack_half_2x16_split_x(b, ba),
1071                             nir_unpack_half_2x16_split_y(b, ba));
1072            break;
1073         }
1074         default:
1075            unreachable("wrong dest_size");
1076         }
1077         break;
1078
1079      case nir_type_int:
1080         color = nir_format_unpack_sint(b, color, bits, 4);
1081         break;
1082
1083      case nir_type_uint:
1084         color = nir_format_unpack_uint(b, color, bits, 4);
1085         break;
1086
1087      default:
1088         unreachable("unknown base type");
1089      }
1090      break;
1091   }
1092
1093   case nir_lower_tex_packing_8:
1094      assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float);
1095      color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0));
1096      break;
1097   }
1098
1099   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, color,
1100                                  color->parent_instr);
1101}
1102
1103static bool
1104lower_array_layer_round_even(nir_builder *b, nir_tex_instr *tex)
1105{
1106   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1107   if (coord_index < 0 || nir_tex_instr_src_type(tex, coord_index) != nir_type_float)
1108      return false;
1109
1110   assert(tex->src[coord_index].src.is_ssa);
1111   nir_ssa_def *coord = tex->src[coord_index].src.ssa;
1112
1113   b->cursor = nir_before_instr(&tex->instr);
1114
1115   unsigned layer = tex->coord_components - 1;
1116   nir_ssa_def *rounded_layer = nir_fround_even(b, nir_channel(b, coord, layer));
1117   nir_ssa_def *new_coord = nir_vector_insert_imm(b, coord, rounded_layer, layer);
1118
1119   nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[coord_index].src, new_coord);
1120
1121   return true;
1122}
1123
1124static bool
1125sampler_index_lt(nir_tex_instr *tex, unsigned max)
1126{
1127   assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1);
1128
1129   unsigned sampler_index = tex->sampler_index;
1130
1131   int sampler_offset_idx =
1132      nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset);
1133   if (sampler_offset_idx >= 0) {
1134      if (!nir_src_is_const(tex->src[sampler_offset_idx].src))
1135         return false;
1136
1137      sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src);
1138   }
1139
1140   return sampler_index < max;
1141}
1142
1143static bool
1144lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex)
1145{
1146   assert(tex->op == nir_texop_tg4);
1147   assert(nir_tex_instr_has_explicit_tg4_offsets(tex));
1148   assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1);
1149
1150   b->cursor = nir_after_instr(&tex->instr);
1151
1152   nir_ssa_scalar dest[5] = { 0 };
1153   nir_ssa_def *residency = NULL;
1154   for (unsigned i = 0; i < 4; ++i) {
1155      nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1);
1156      tex_copy->op = tex->op;
1157      tex_copy->coord_components = tex->coord_components;
1158      tex_copy->sampler_dim = tex->sampler_dim;
1159      tex_copy->is_array = tex->is_array;
1160      tex_copy->is_shadow = tex->is_shadow;
1161      tex_copy->is_new_style_shadow = tex->is_new_style_shadow;
1162      tex_copy->is_sparse = tex->is_sparse;
1163      tex_copy->component = tex->component;
1164      tex_copy->dest_type = tex->dest_type;
1165
1166      for (unsigned j = 0; j < tex->num_srcs; ++j) {
1167         nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src);
1168         tex_copy->src[j].src_type = tex->src[j].src_type;
1169      }
1170
1171      nir_tex_src src;
1172      src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0],
1173                                                 tex->tg4_offsets[i][1]));
1174      src.src_type = nir_tex_src_offset;
1175      tex_copy->src[tex_copy->num_srcs - 1] = src;
1176
1177      nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest,
1178                        nir_tex_instr_dest_size(tex), 32, NULL);
1179
1180      nir_builder_instr_insert(b, &tex_copy->instr);
1181
1182      dest[i] = nir_get_ssa_scalar(&tex_copy->dest.ssa, 3);
1183      if (tex->is_sparse) {
1184         nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4);
1185         if (residency)
1186            residency = nir_sparse_residency_code_and(b, residency, code);
1187         else
1188            residency = code;
1189      }
1190   }
1191   dest[4] = nir_get_ssa_scalar(residency, 0);
1192
1193   nir_ssa_def *res = nir_vec_scalars(b, dest, tex->dest.ssa.num_components);
1194   nir_ssa_def_rewrite_uses(&tex->dest.ssa, res);
1195   nir_instr_remove(&tex->instr);
1196
1197   return true;
1198}
1199
1200static bool
1201nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex)
1202{
1203   int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
1204   if (lod_idx < 0 ||
1205       (nir_src_is_const(tex->src[lod_idx].src) &&
1206        nir_src_as_int(tex->src[lod_idx].src) == 0))
1207      return false;
1208
1209   unsigned dest_size = nir_tex_instr_dest_size(tex);
1210
1211   b->cursor = nir_before_instr(&tex->instr);
1212   nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1);
1213
1214   /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */
1215   nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src,
1216                         nir_src_for_ssa(nir_imm_int(b, 0)));
1217
1218   /* TXS(LOD) = max(TXS(0) >> LOD, 1)
1219    * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface,
1220    * which should return 0, not 1.
1221    */
1222   b->cursor = nir_after_instr(&tex->instr);
1223   nir_ssa_def *minified = nir_imin(b, &tex->dest.ssa,
1224                                    nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod),
1225                                             nir_imm_int(b, 1)));
1226
1227   /* Make sure the component encoding the array size (if any) is not
1228    * minified.
1229    */
1230   if (tex->is_array) {
1231      nir_ssa_def *comp[3];
1232
1233      assert(dest_size <= ARRAY_SIZE(comp));
1234      for (unsigned i = 0; i < dest_size - 1; i++)
1235         comp[i] = nir_channel(b, minified, i);
1236
1237      comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1);
1238      minified = nir_vec(b, comp, dest_size);
1239   }
1240
1241   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, minified,
1242                                  minified->parent_instr);
1243   return true;
1244}
1245
1246static void
1247nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex)
1248{
1249   assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array);
1250   tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1251
1252   b->cursor = nir_after_instr(&tex->instr);
1253
1254   assert(tex->dest.is_ssa);
1255   assert(tex->dest.ssa.num_components == 3);
1256   nir_ssa_def *size = &tex->dest.ssa;
1257   size = nir_vec3(b, nir_channel(b, size, 0),
1258                      nir_channel(b, size, 1),
1259                      nir_idiv(b, nir_channel(b, size, 2),
1260                                  nir_imm_int(b, 6)));
1261
1262   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, size, size->parent_instr);
1263}
1264
1265static void
1266nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1267{
1268   lower_offset(b, tex);
1269
1270   b->cursor = nir_before_instr(&tex->instr);
1271
1272   /* Create FMASK fetch. */
1273   assert(tex->texture_index == 0);
1274   nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1);
1275   fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1276   fmask_fetch->coord_components = tex->coord_components;
1277   fmask_fetch->sampler_dim = tex->sampler_dim;
1278   fmask_fetch->is_array = tex->is_array;
1279   fmask_fetch->texture_non_uniform = tex->texture_non_uniform;
1280   fmask_fetch->dest_type = nir_type_uint32;
1281   nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1282
1283   fmask_fetch->num_srcs = 0;
1284   for (unsigned i = 0; i < tex->num_srcs; i++) {
1285      if (tex->src[i].src_type == nir_tex_src_ms_index)
1286         continue;
1287      nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++];
1288      src->src = nir_src_for_ssa(tex->src[i].src.ssa);
1289      src->src_type = tex->src[i].src_type;
1290   }
1291
1292   nir_builder_instr_insert(b, &fmask_fetch->instr);
1293
1294   /* Obtain new sample index. */
1295   int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index);
1296   assert(ms_index >= 0);
1297   nir_src sample = tex->src[ms_index].src;
1298   nir_ssa_def *new_sample = NULL;
1299   if (nir_src_is_const(sample) && (nir_src_as_uint(sample) == 0 || nir_src_as_uint(sample) == 7)) {
1300      if (nir_src_as_uint(sample) == 7)
1301         new_sample = nir_ushr(b, &fmask_fetch->dest.ssa, nir_imm_int(b, 28));
1302      else
1303         new_sample = nir_iand_imm(b, &fmask_fetch->dest.ssa, 0xf);
1304   } else {
1305      new_sample = nir_ubitfield_extract(b, &fmask_fetch->dest.ssa,
1306                                         nir_imul_imm(b, sample.ssa, 4), nir_imm_int(b, 4));
1307   }
1308
1309   /* Update instruction. */
1310   tex->op = nir_texop_fragment_fetch_amd;
1311   nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[ms_index].src, new_sample);
1312}
1313
1314static void
1315nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex)
1316{
1317   b->cursor = nir_after_instr(&tex->instr);
1318
1319   nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr));
1320   fmask_fetch->op = nir_texop_fragment_mask_fetch_amd;
1321   fmask_fetch->dest_type = nir_type_uint32;
1322   nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL);
1323   nir_builder_instr_insert(b, &fmask_fetch->instr);
1324
1325   nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_ieq_imm(b, &fmask_fetch->dest.ssa, 0));
1326   nir_instr_remove_v(&tex->instr);
1327}
1328
1329static void
1330nir_lower_lod_zero_width(nir_builder *b, nir_tex_instr *tex)
1331{
1332   int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
1333   assert(coord_index >= 0);
1334
1335   b->cursor = nir_after_instr(&tex->instr);
1336
1337   nir_ssa_def *is_zero = nir_imm_bool(b, true);
1338   for (unsigned i = 0; i < tex->coord_components; i++) {
1339      nir_ssa_def *coord = nir_channel(b, tex->src[coord_index].src.ssa, i);
1340
1341      /* Compute the sum of the absolute values of derivatives. */
1342      nir_ssa_def *dfdx = nir_fddx(b, coord);
1343      nir_ssa_def *dfdy = nir_fddy(b, coord);
1344      nir_ssa_def *fwidth = nir_fadd(b, nir_fabs(b, dfdx), nir_fabs(b, dfdy));
1345
1346      /* Check if the sum is 0. */
1347      is_zero = nir_iand(b, is_zero, nir_feq(b, fwidth, nir_imm_float(b, 0.0)));
1348   }
1349
1350   /* Replace the raw LOD by -FLT_MAX if the sum is 0 for all coordinates. */
1351   nir_ssa_def *adjusted_lod =
1352      nir_bcsel(b, is_zero, nir_imm_float(b, -FLT_MAX),
1353                   nir_channel(b, &tex->dest.ssa, 1));
1354
1355   nir_ssa_def *def =
1356      nir_vec2(b, nir_channel(b, &tex->dest.ssa, 0), adjusted_lod);
1357
1358   nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, def, def->parent_instr);
1359}
1360
1361static bool
1362nir_lower_tex_block(nir_block *block, nir_builder *b,
1363                    const nir_lower_tex_options *options,
1364                    const struct nir_shader_compiler_options *compiler_options)
1365{
1366   bool progress = false;
1367
1368   nir_foreach_instr_safe(instr, block) {
1369      if (instr->type != nir_instr_type_tex)
1370         continue;
1371
1372      nir_tex_instr *tex = nir_instr_as_tex(instr);
1373      bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim));
1374
1375      /* mask of src coords to saturate (clamp): */
1376      unsigned sat_mask = 0;
1377
1378      if ((1 << tex->sampler_index) & options->saturate_r)
1379         sat_mask |= (1 << 2);    /* .z */
1380      if ((1 << tex->sampler_index) & options->saturate_t)
1381         sat_mask |= (1 << 1);    /* .y */
1382      if ((1 << tex->sampler_index) & options->saturate_s)
1383         sat_mask |= (1 << 0);    /* .x */
1384
1385      /* If we are clamping any coords, we must lower projector first
1386       * as clamping happens *after* projection:
1387       */
1388      if (lower_txp || sat_mask ||
1389          (options->lower_txp_array && tex->is_array)) {
1390         progress |= project_src(b, tex);
1391      }
1392
1393      if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
1394          (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) ||
1395          (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
1396           options->lower_rect_offset) ||
1397          (options->lower_offset_filter &&
1398           options->lower_offset_filter(instr, options->callback_data))) {
1399         progress = lower_offset(b, tex) || progress;
1400      }
1401
1402      if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect &&
1403          tex->op != nir_texop_txf) {
1404         if (nir_tex_instr_is_query(tex))
1405            tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
1406         else if (compiler_options->has_txs)
1407            lower_rect(b, tex);
1408         else
1409            lower_rect_tex_scale(b, tex);
1410
1411         progress = true;
1412      }
1413
1414      unsigned texture_index = tex->texture_index;
1415      uint32_t texture_mask = 1u << texture_index;
1416      int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1417      if (tex_index >= 0) {
1418         nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);
1419         nir_variable *var = nir_deref_instr_get_variable(deref);
1420         texture_index = var ? var->data.binding : 0;
1421         texture_mask = var && texture_index < 32 ? (1u << texture_index) : 0u;
1422      }
1423
1424      if (texture_mask & options->lower_y_uv_external) {
1425         lower_y_uv_external(b, tex, options, texture_index);
1426         progress = true;
1427      }
1428
1429      if (texture_mask & options->lower_y_u_v_external) {
1430         lower_y_u_v_external(b, tex, options, texture_index);
1431         progress = true;
1432      }
1433
1434      if (texture_mask & options->lower_yx_xuxv_external) {
1435         lower_yx_xuxv_external(b, tex, options, texture_index);
1436         progress = true;
1437      }
1438
1439      if (texture_mask & options->lower_xy_uxvx_external) {
1440         lower_xy_uxvx_external(b, tex, options, texture_index);
1441         progress = true;
1442      }
1443
1444      if (texture_mask & options->lower_ayuv_external) {
1445         lower_ayuv_external(b, tex, options, texture_index);
1446         progress = true;
1447      }
1448
1449      if (texture_mask & options->lower_xyuv_external) {
1450         lower_xyuv_external(b, tex, options, texture_index);
1451         progress = true;
1452      }
1453
1454      if (texture_mask & options->lower_yuv_external) {
1455         lower_yuv_external(b, tex, options, texture_index);
1456         progress = true;
1457      }
1458
1459      if ((1 << tex->texture_index) & options->lower_yu_yv_external) {
1460         lower_yu_yv_external(b, tex, options, texture_index);
1461         progress = true;
1462      }
1463
1464      if ((1 << tex->texture_index) & options->lower_y41x_external) {
1465         lower_y41x_external(b, tex, options, texture_index);
1466         progress = true;
1467      }
1468
1469      if (sat_mask) {
1470         tex = saturate_src(b, tex, sat_mask);
1471         progress = true;
1472      }
1473
1474      if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) {
1475         swizzle_tg4_broadcom(b, tex);
1476         progress = true;
1477      }
1478
1479      if ((texture_mask & options->swizzle_result) &&
1480          !nir_tex_instr_is_query(tex) &&
1481          !(tex->is_shadow && tex->is_new_style_shadow)) {
1482         swizzle_result(b, tex, options->swizzles[tex->texture_index]);
1483         progress = true;
1484      }
1485
1486      /* should be after swizzle so we know which channels are rgb: */
1487      if ((texture_mask & options->lower_srgb) &&
1488          !nir_tex_instr_is_query(tex) && !tex->is_shadow) {
1489         linearize_srgb_result(b, tex);
1490         progress = true;
1491      }
1492
1493      const bool has_min_lod =
1494         nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0;
1495      const bool has_offset =
1496         nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0;
1497
1498      if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod &&
1499          options->lower_txb_shadow_clamp) {
1500         lower_implicit_lod(b, tex);
1501         progress = true;
1502      }
1503
1504      if (options->lower_tex_packing[tex->sampler_index] !=
1505          nir_lower_tex_packing_none &&
1506          tex->op != nir_texop_txs &&
1507          tex->op != nir_texop_query_levels &&
1508          tex->op != nir_texop_texture_samples) {
1509         lower_tex_packing(b, tex, options);
1510         progress = true;
1511      }
1512
1513      if (options->lower_array_layer_round_even && tex->is_array &&
1514          tex->op != nir_texop_lod) {
1515         progress |= lower_array_layer_round_even(b, tex);
1516      }
1517
1518      if (tex->op == nir_texop_txd &&
1519          (options->lower_txd ||
1520           (options->lower_txd_shadow && tex->is_shadow) ||
1521           (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) ||
1522           (options->lower_txd_offset_clamp && has_offset && has_min_lod) ||
1523           (options->lower_txd_clamp_bindless_sampler && has_min_lod &&
1524            nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) ||
1525           (options->lower_txd_clamp_if_sampler_index_not_lt_16 &&
1526            has_min_lod && !sampler_index_lt(tex, 16)) ||
1527           (options->lower_txd_cube_map &&
1528            tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) ||
1529           (options->lower_txd_3d &&
1530            tex->sampler_dim == GLSL_SAMPLER_DIM_3D) ||
1531           (options->lower_txd_array && tex->is_array))) {
1532         lower_gradient(b, tex);
1533         progress = true;
1534         continue;
1535      }
1536
1537      /* TXF, TXS and TXL require a LOD but not everything we implement using those
1538       * three opcodes provides one.  Provide a default LOD of 0.
1539       */
1540      if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) &&
1541          (tex->op == nir_texop_txf || tex->op == nir_texop_txs ||
1542           tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) {
1543         b->cursor = nir_before_instr(&tex->instr);
1544         nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0)));
1545         progress = true;
1546         continue;
1547      }
1548
1549      /* Only fragment and compute (in some cases) support implicit
1550       * derivatives.  Lower those opcodes which use implicit derivatives to
1551       * use an explicit LOD of 0.
1552       * But don't touch RECT samplers because they don't have mips.
1553       */
1554      if (options->lower_invalid_implicit_lod &&
1555          nir_tex_instr_has_implicit_derivative(tex) &&
1556          tex->sampler_dim != GLSL_SAMPLER_DIM_RECT &&
1557          !nir_shader_supports_implicit_lod(b->shader)) {
1558         lower_zero_lod(b, tex);
1559         progress = true;
1560      }
1561
1562      if (options->lower_txs_lod && tex->op == nir_texop_txs) {
1563         progress |= nir_lower_txs_lod(b, tex);
1564         continue;
1565      }
1566
1567      if (options->lower_txs_cube_array && tex->op == nir_texop_txs &&
1568          tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) {
1569         nir_lower_txs_cube_array(b, tex);
1570         progress = true;
1571         continue;
1572      }
1573
1574      /* has to happen after all the other lowerings as the original tg4 gets
1575       * replaced by 4 tg4 instructions.
1576       */
1577      if (tex->op == nir_texop_tg4 &&
1578          nir_tex_instr_has_explicit_tg4_offsets(tex) &&
1579          options->lower_tg4_offsets) {
1580         progress |= lower_tg4_offsets(b, tex);
1581         continue;
1582      }
1583
1584      if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) {
1585         nir_lower_ms_txf_to_fragment_fetch(b, tex);
1586         progress = true;
1587         continue;
1588      }
1589
1590      if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) {
1591         nir_lower_samples_identical_to_fragment_fetch(b, tex);
1592         progress = true;
1593         continue;
1594      }
1595
1596      if (options->lower_lod_zero_width && tex->op == nir_texop_lod) {
1597         nir_lower_lod_zero_width(b, tex);
1598         progress = true;
1599         continue;
1600      }
1601   }
1602
1603   return progress;
1604}
1605
1606static bool
1607nir_lower_tex_impl(nir_function_impl *impl,
1608                   const nir_lower_tex_options *options,
1609                   const struct nir_shader_compiler_options *compiler_options)
1610{
1611   bool progress = false;
1612   nir_builder builder;
1613   nir_builder_init(&builder, impl);
1614
1615   nir_foreach_block(block, impl) {
1616      progress |= nir_lower_tex_block(block, &builder, options, compiler_options);
1617   }
1618
1619   nir_metadata_preserve(impl, nir_metadata_block_index |
1620                               nir_metadata_dominance);
1621   return progress;
1622}
1623
1624bool
1625nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options)
1626{
1627   bool progress = false;
1628
1629   nir_foreach_function(function, shader) {
1630      if (function->impl)
1631         progress |= nir_lower_tex_impl(function->impl, options, shader->options);
1632   }
1633
1634   return progress;
1635}
1636