1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_nir_lower_tex.h"
28 
29 #include "nir.h"
30 #include "nir_builder.h"
31 #include "nir_builtin_builder.h"
32 
lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)33 static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
34 {
35    b->cursor = nir_before_instr(&tex->instr);
36 
37    nir_ssa_def * size = nir_i2f32(b, nir_get_texture_size(b, tex));
38    nir_ssa_def *scale = nir_frcp(b, size);
39 
40    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
41    nir_ssa_def *corr = nullptr;
42    if (unlikely(tex->array_is_lowered_cube)) {
43       auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
44                             nir_fmul(b, nir_imm_float(b, -0.5f), scale));
45       corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
46                       nir_channel(
47                          b, tex->src[coord_index].src.ssa, 2));
48    } else {
49       corr = nir_fadd(b,
50                       nir_fmul(b, nir_imm_float(b, -0.5f), scale),
51                       tex->src[coord_index].src.ssa);
52    }
53 
54    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
55                          nir_src_for_ssa(corr));
56    return true;
57 }
58 
lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)59 static bool lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)
60 {
61    b->cursor = nir_before_instr(&tex->instr);
62    int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
63    nir_ssa_def *corr = nullptr;
64    if (unlikely(tex->array_is_lowered_cube)) {
65       auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
66                             nir_imm_float(b, -0.5f));
67       corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
68                       nir_channel(b, tex->src[coord_index].src.ssa, 2));
69    } else {
70       corr = nir_fadd(b, tex->src[coord_index].src.ssa,
71                       nir_imm_float(b, -0.5f));
72    }
73    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
74                          nir_src_for_ssa(corr));
75    return true;
76 }
77 
78 static bool
r600_nir_lower_int_tg4_impl(nir_function_impl *impl)79 r600_nir_lower_int_tg4_impl(nir_function_impl *impl)
80 {
81    nir_builder b;
82    nir_builder_init(&b, impl);
83 
84    bool progress = false;
85    nir_foreach_block(block, impl) {
86       nir_foreach_instr_safe(instr, block) {
87          if (instr->type == nir_instr_type_tex) {
88             nir_tex_instr *tex = nir_instr_as_tex(instr);
89             if (tex->op == nir_texop_tg4 &&
90                 tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
91                if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) {
92                   if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
93                      lower_coord_shift_normalized(&b, tex);
94                   else
95                      lower_coord_shift_unnormalized(&b, tex);
96                   progress = true;
97                }
98             }
99          }
100       }
101    }
102    return progress;
103 }
104 
105 /*
106  * This lowering pass works around a bug in r600 when doing TG4 from
107  * integral valued samplers.
108 
109  * Gather4 should follow the same rules as bilinear filtering, but the hardware
110  * incorrectly forces nearest filtering if the texture format is integer.
111  * The only effect it has on Gather4, which always returns 4 texels for
112  * bilinear filtering, is that the final coordinates are off by 0.5 of
113  * the texel size.
114 */
115 
r600_nir_lower_int_tg4(nir_shader *shader)116 bool r600_nir_lower_int_tg4(nir_shader *shader)
117 {
118    bool progress = false;
119    bool need_lowering = false;
120 
121    nir_foreach_uniform_variable(var, shader) {
122       if (var->type->is_sampler()) {
123          if (glsl_base_type_is_integer(var->type->sampled_type)) {
124             need_lowering = true;
125          }
126       }
127    }
128 
129    if (need_lowering) {
130       nir_foreach_function(function, shader) {
131          if (function->impl && r600_nir_lower_int_tg4_impl(function->impl))
132             progress = true;
133       }
134    }
135 
136    return progress;
137 }
138 
139 static
lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)140 bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
141 {
142    assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
143    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
144    assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
145 
146    b->cursor = nir_before_instr(&tex->instr);
147 
148    int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
149    int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
150    int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
151    assert (lod_idx >= 0 || bias_idx >= 0);
152 
153    nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
154    nir_ssa_def *lod = (lod_idx >= 0) ?
155                          nir_ssa_for_src(b, tex->src[lod_idx].src, 1) :
156                          nir_get_texture_lod(b, tex);
157 
158    if (bias_idx >= 0)
159       lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
160 
161    if (min_lod_idx >= 0)
162       lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
163 
164    /* max lod? */
165 
166    nir_ssa_def *lambda_exp =  nir_fexp2(b, lod);
167    nir_ssa_def *scale = NULL;
168 
169    if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
170          unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
171          scale = nir_frcp(b, nir_channels(b, size, 1));
172          scale = nir_swizzle(b, scale, swizzle, 3);
173    } else if  (tex->is_array) {
174       int cmp_mask = (1 << (size->num_components - 1)) - 1;
175       scale = nir_frcp(b, nir_channels(b, size,
176                                        (nir_component_mask_t)cmp_mask));
177    }
178 
179    nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
180 
181    if (lod_idx >= 0)
182       nir_tex_instr_remove_src(tex, lod_idx);
183    if (bias_idx >= 0)
184       nir_tex_instr_remove_src(tex, bias_idx);
185    if (min_lod_idx >= 0)
186       nir_tex_instr_remove_src(tex, min_lod_idx);
187    nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad));
188    nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad));
189 
190    tex->op = nir_texop_txd;
191    return true;
192 }
193 
194 
195 static bool
r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)196 r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
197 {
198    nir_builder b;
199    nir_builder_init(&b, impl);
200 
201    bool progress = false;
202    nir_foreach_block(block, impl) {
203       nir_foreach_instr_safe(instr, block) {
204          if (instr->type == nir_instr_type_tex) {
205             nir_tex_instr *tex = nir_instr_as_tex(instr);
206 
207             if (tex->is_shadow &&
208                 (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
209                 (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
210                progress |= lower_txl_txf_array_or_cube(&b, tex);
211          }
212       }
213    }
214    return progress;
215 }
216 
217 bool
r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)218 r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
219 {
220    bool progress = false;
221    nir_foreach_function(function, shader) {
222       if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl))
223          progress = true;
224    }
225    return progress;
226 }
227 
228 static bool
r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)229 r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)
230 {
231    if (instr->type != nir_instr_type_tex)
232       return false;
233 
234    auto tex = nir_instr_as_tex(instr);
235    if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
236       return false;
237 
238    switch (tex->op) {
239    case nir_texop_tex:
240    case nir_texop_txb:
241    case nir_texop_txf:
242    case nir_texop_txl:
243    case nir_texop_lod:
244    case nir_texop_tg4:
245    case nir_texop_txd:
246       return true;
247    default:
248       return false;
249    }
250 }
251 
252 static nir_ssa_def *
r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)253 r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)
254 {
255    b->cursor = nir_before_instr(instr);
256 
257    auto tex = nir_instr_as_tex(instr);
258    int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
259    assert(coord_idx >= 0);
260 
261    auto cubed = nir_cube_r600(b, nir_channels(b, tex->src[coord_idx].src.ssa, 0x7));
262    auto xy = nir_fmad(b,
263                       nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)),
264                       nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))),
265                       nir_imm_float(b, 1.5));
266 
267    nir_ssa_def *z = nir_channel(b, cubed, 3);
268    if (tex->is_array) {
269       auto slice = nir_fround_even(b, nir_channel(b, tex->src[coord_idx].src.ssa, 3));
270       z = nir_fmad(b, nir_fmax(b, slice, nir_imm_float(b, 0.0)), nir_imm_float(b, 8.0),
271                    z);
272    }
273 
274    if (tex->op == nir_texop_txd) {
275       int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
276       auto zero_dot_5 = nir_imm_float(b, 0.5);
277       nir_instr_rewrite_src(&tex->instr, &tex->src[ddx_idx].src,
278                             nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddx_idx].src, 3), zero_dot_5)));
279 
280       int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
281       nir_instr_rewrite_src(&tex->instr, &tex->src[ddy_idx].src,
282                             nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddy_idx].src, 3), zero_dot_5)));
283    }
284 
285    auto new_coord = nir_vec3(b, nir_channel(b, xy, 0), nir_channel(b, xy, 1), z);
286    nir_instr_rewrite_src(&tex->instr, &tex->src[coord_idx].src,
287                          nir_src_for_ssa(new_coord));
288    tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
289    tex->is_array = true;
290    tex->array_is_lowered_cube = true;
291 
292    tex->coord_components = 3;
293 
294    return NIR_LOWER_INSTR_PROGRESS;
295 }
296 
297 bool
r600_nir_lower_cube_to_2darray(nir_shader *shader)298 r600_nir_lower_cube_to_2darray(nir_shader *shader)
299 {
300    return nir_shader_lower_instructions(shader,
301                                         r600_nir_lower_cube_to_2darray_filer,
302                                         r600_nir_lower_cube_to_2darray_impl, nullptr);
303 }
304