1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_nir_lower_tex.h"
28
29 #include "nir.h"
30 #include "nir_builder.h"
31 #include "nir_builtin_builder.h"
32
lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)33 static bool lower_coord_shift_normalized(nir_builder *b, nir_tex_instr *tex)
34 {
35 b->cursor = nir_before_instr(&tex->instr);
36
37 nir_ssa_def * size = nir_i2f32(b, nir_get_texture_size(b, tex));
38 nir_ssa_def *scale = nir_frcp(b, size);
39
40 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
41 nir_ssa_def *corr = nullptr;
42 if (unlikely(tex->array_is_lowered_cube)) {
43 auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
44 nir_fmul(b, nir_imm_float(b, -0.5f), scale));
45 corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
46 nir_channel(
47 b, tex->src[coord_index].src.ssa, 2));
48 } else {
49 corr = nir_fadd(b,
50 nir_fmul(b, nir_imm_float(b, -0.5f), scale),
51 tex->src[coord_index].src.ssa);
52 }
53
54 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
55 nir_src_for_ssa(corr));
56 return true;
57 }
58
lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)59 static bool lower_coord_shift_unnormalized(nir_builder *b, nir_tex_instr *tex)
60 {
61 b->cursor = nir_before_instr(&tex->instr);
62 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord);
63 nir_ssa_def *corr = nullptr;
64 if (unlikely(tex->array_is_lowered_cube)) {
65 auto corr2 = nir_fadd(b, nir_channels(b, tex->src[coord_index].src.ssa, 3),
66 nir_imm_float(b, -0.5f));
67 corr = nir_vec3(b, nir_channel(b, corr2, 0), nir_channel(b, corr2, 1),
68 nir_channel(b, tex->src[coord_index].src.ssa, 2));
69 } else {
70 corr = nir_fadd(b, tex->src[coord_index].src.ssa,
71 nir_imm_float(b, -0.5f));
72 }
73 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
74 nir_src_for_ssa(corr));
75 return true;
76 }
77
78 static bool
r600_nir_lower_int_tg4_impl(nir_function_impl *impl)79 r600_nir_lower_int_tg4_impl(nir_function_impl *impl)
80 {
81 nir_builder b;
82 nir_builder_init(&b, impl);
83
84 bool progress = false;
85 nir_foreach_block(block, impl) {
86 nir_foreach_instr_safe(instr, block) {
87 if (instr->type == nir_instr_type_tex) {
88 nir_tex_instr *tex = nir_instr_as_tex(instr);
89 if (tex->op == nir_texop_tg4 &&
90 tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) {
91 if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float) {
92 if (tex->sampler_dim != GLSL_SAMPLER_DIM_RECT)
93 lower_coord_shift_normalized(&b, tex);
94 else
95 lower_coord_shift_unnormalized(&b, tex);
96 progress = true;
97 }
98 }
99 }
100 }
101 }
102 return progress;
103 }
104
105 /*
106 * This lowering pass works around a bug in r600 when doing TG4 from
107 * integral valued samplers.
108
109 * Gather4 should follow the same rules as bilinear filtering, but the hardware
110 * incorrectly forces nearest filtering if the texture format is integer.
111 * The only effect it has on Gather4, which always returns 4 texels for
112 * bilinear filtering, is that the final coordinates are off by 0.5 of
113 * the texel size.
114 */
115
r600_nir_lower_int_tg4(nir_shader *shader)116 bool r600_nir_lower_int_tg4(nir_shader *shader)
117 {
118 bool progress = false;
119 bool need_lowering = false;
120
121 nir_foreach_uniform_variable(var, shader) {
122 if (var->type->is_sampler()) {
123 if (glsl_base_type_is_integer(var->type->sampled_type)) {
124 need_lowering = true;
125 }
126 }
127 }
128
129 if (need_lowering) {
130 nir_foreach_function(function, shader) {
131 if (function->impl && r600_nir_lower_int_tg4_impl(function->impl))
132 progress = true;
133 }
134 }
135
136 return progress;
137 }
138
139 static
lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)140 bool lower_txl_txf_array_or_cube(nir_builder *b, nir_tex_instr *tex)
141 {
142 assert(tex->op == nir_texop_txb || tex->op == nir_texop_txl);
143 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0);
144 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0);
145
146 b->cursor = nir_before_instr(&tex->instr);
147
148 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod);
149 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias);
150 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod);
151 assert (lod_idx >= 0 || bias_idx >= 0);
152
153 nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex));
154 nir_ssa_def *lod = (lod_idx >= 0) ?
155 nir_ssa_for_src(b, tex->src[lod_idx].src, 1) :
156 nir_get_texture_lod(b, tex);
157
158 if (bias_idx >= 0)
159 lod = nir_fadd(b, lod,nir_ssa_for_src(b, tex->src[bias_idx].src, 1));
160
161 if (min_lod_idx >= 0)
162 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1));
163
164 /* max lod? */
165
166 nir_ssa_def *lambda_exp = nir_fexp2(b, lod);
167 nir_ssa_def *scale = NULL;
168
169 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
170 unsigned int swizzle[NIR_MAX_VEC_COMPONENTS] = {0,0,0,0};
171 scale = nir_frcp(b, nir_channels(b, size, 1));
172 scale = nir_swizzle(b, scale, swizzle, 3);
173 } else if (tex->is_array) {
174 int cmp_mask = (1 << (size->num_components - 1)) - 1;
175 scale = nir_frcp(b, nir_channels(b, size,
176 (nir_component_mask_t)cmp_mask));
177 }
178
179 nir_ssa_def *grad = nir_fmul(b, lambda_exp, scale);
180
181 if (lod_idx >= 0)
182 nir_tex_instr_remove_src(tex, lod_idx);
183 if (bias_idx >= 0)
184 nir_tex_instr_remove_src(tex, bias_idx);
185 if (min_lod_idx >= 0)
186 nir_tex_instr_remove_src(tex, min_lod_idx);
187 nir_tex_instr_add_src(tex, nir_tex_src_ddx, nir_src_for_ssa(grad));
188 nir_tex_instr_add_src(tex, nir_tex_src_ddy, nir_src_for_ssa(grad));
189
190 tex->op = nir_texop_txd;
191 return true;
192 }
193
194
195 static bool
r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)196 r600_nir_lower_txl_txf_array_or_cube_impl(nir_function_impl *impl)
197 {
198 nir_builder b;
199 nir_builder_init(&b, impl);
200
201 bool progress = false;
202 nir_foreach_block(block, impl) {
203 nir_foreach_instr_safe(instr, block) {
204 if (instr->type == nir_instr_type_tex) {
205 nir_tex_instr *tex = nir_instr_as_tex(instr);
206
207 if (tex->is_shadow &&
208 (tex->op == nir_texop_txl || tex->op == nir_texop_txb) &&
209 (tex->is_array || tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE))
210 progress |= lower_txl_txf_array_or_cube(&b, tex);
211 }
212 }
213 }
214 return progress;
215 }
216
217 bool
r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)218 r600_nir_lower_txl_txf_array_or_cube(nir_shader *shader)
219 {
220 bool progress = false;
221 nir_foreach_function(function, shader) {
222 if (function->impl && r600_nir_lower_txl_txf_array_or_cube_impl(function->impl))
223 progress = true;
224 }
225 return progress;
226 }
227
228 static bool
r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)229 r600_nir_lower_cube_to_2darray_filer(const nir_instr *instr, const void *_options)
230 {
231 if (instr->type != nir_instr_type_tex)
232 return false;
233
234 auto tex = nir_instr_as_tex(instr);
235 if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE)
236 return false;
237
238 switch (tex->op) {
239 case nir_texop_tex:
240 case nir_texop_txb:
241 case nir_texop_txf:
242 case nir_texop_txl:
243 case nir_texop_lod:
244 case nir_texop_tg4:
245 case nir_texop_txd:
246 return true;
247 default:
248 return false;
249 }
250 }
251
252 static nir_ssa_def *
r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)253 r600_nir_lower_cube_to_2darray_impl(nir_builder *b, nir_instr *instr, void *_options)
254 {
255 b->cursor = nir_before_instr(instr);
256
257 auto tex = nir_instr_as_tex(instr);
258 int coord_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);
259 assert(coord_idx >= 0);
260
261 auto cubed = nir_cube_r600(b, nir_channels(b, tex->src[coord_idx].src.ssa, 0x7));
262 auto xy = nir_fmad(b,
263 nir_vec2(b, nir_channel(b, cubed, 1), nir_channel(b, cubed, 0)),
264 nir_frcp(b, nir_fabs(b, nir_channel(b, cubed, 2))),
265 nir_imm_float(b, 1.5));
266
267 nir_ssa_def *z = nir_channel(b, cubed, 3);
268 if (tex->is_array) {
269 auto slice = nir_fround_even(b, nir_channel(b, tex->src[coord_idx].src.ssa, 3));
270 z = nir_fmad(b, nir_fmax(b, slice, nir_imm_float(b, 0.0)), nir_imm_float(b, 8.0),
271 z);
272 }
273
274 if (tex->op == nir_texop_txd) {
275 int ddx_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddx);
276 auto zero_dot_5 = nir_imm_float(b, 0.5);
277 nir_instr_rewrite_src(&tex->instr, &tex->src[ddx_idx].src,
278 nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddx_idx].src, 3), zero_dot_5)));
279
280 int ddy_idx = nir_tex_instr_src_index(tex, nir_tex_src_ddy);
281 nir_instr_rewrite_src(&tex->instr, &tex->src[ddy_idx].src,
282 nir_src_for_ssa(nir_fmul(b, nir_ssa_for_src(b, tex->src[ddy_idx].src, 3), zero_dot_5)));
283 }
284
285 auto new_coord = nir_vec3(b, nir_channel(b, xy, 0), nir_channel(b, xy, 1), z);
286 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_idx].src,
287 nir_src_for_ssa(new_coord));
288 tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
289 tex->is_array = true;
290 tex->array_is_lowered_cube = true;
291
292 tex->coord_components = 3;
293
294 return NIR_LOWER_INSTR_PROGRESS;
295 }
296
297 bool
r600_nir_lower_cube_to_2darray(nir_shader *shader)298 r600_nir_lower_cube_to_2darray(nir_shader *shader)
299 {
300 return nir_shader_lower_instructions(shader,
301 r600_nir_lower_cube_to_2darray_filer,
302 r600_nir_lower_cube_to_2darray_impl, nullptr);
303 }
304