1 /**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_text.h"
33 #include "tgsi/tgsi_util.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "lp_debug.h"
36 #include "lp_state.h"
37 #include "nir.h"
38
39 /*
40 * Detect Aero minification shaders.
41 *
42 * Aero does not use texture mimaps when a window gets animated and its shaped
43 * bended. Instead it uses the average of 4 nearby texels. This is the simplest
44 * of such shader, but there are several variations:
45 *
46 * FRAG
47 * DCL IN[0], GENERIC[1], PERSPECTIVE
48 * DCL IN[1], GENERIC[2], PERSPECTIVE
49 * DCL IN[2], GENERIC[3], PERSPECTIVE
50 * DCL OUT[0], COLOR
51 * DCL SAMP[0]
52 * DCL TEMP[0..3]
53 * IMM FLT32 { 0.2500, 0.0000, 0.0000, 0.0000 }
54 * MOV TEMP[0].x, IN[0].zzzz
55 * MOV TEMP[0].y, IN[0].wwww
56 * MOV TEMP[1].x, IN[1].zzzz
57 * MOV TEMP[1].y, IN[1].wwww
58 * TEX TEMP[0], TEMP[0], SAMP[0], 2D
59 * TEX TEMP[2], IN[0], SAMP[0], 2D
60 * TEX TEMP[3], IN[1], SAMP[0], 2D
61 * TEX TEMP[1], TEMP[1], SAMP[0], 2D
62 * ADD TEMP[0], TEMP[0], TEMP[2]
63 * ADD TEMP[0], TEMP[3], TEMP[0]
64 * ADD TEMP[0], TEMP[1], TEMP[0]
65 * MUL TEMP[0], TEMP[0], IN[2]
66 * MUL TEMP[0], TEMP[0], IMM[0].xxxx
67 * MOV OUT[0], TEMP[0]
68 * END
69 *
70 * Texture coordinates are interleaved like the Gaussian blur shaders, but
71 * unlike the later there isn't structure in the sub-pixel positioning of the
72 * texels, other than being disposed in a diamond-like shape. For example,
73 * these are the relative offsets of the texels relative to the average:
74 *
75 * x offset y offset
76 * --------------------
77 * 0.691834 -0.21360
78 * -0.230230 -0.64160
79 * -0.692406 0.21356
80 * 0.230802 0.64160
81 *
82 * These shaders are typically used with linear min/mag filtering, but the
83 * linear filtering provides very little visual improvement compared to the
84 * performance impact it has. The ultimate purpose of detecting these shaders
85 * is to override with nearest texture filtering.
86 */
87 static inline boolean
match_aero_minification_shader(const struct tgsi_token *tokens, const struct lp_tgsi_info *info)88 match_aero_minification_shader(const struct tgsi_token *tokens,
89 const struct lp_tgsi_info *info)
90 {
91 struct tgsi_parse_context parse;
92 unsigned coord_mask;
93 boolean has_quarter_imm;
94 unsigned index, chan;
95
96 if ((info->base.opcode_count[TGSI_OPCODE_TEX] != 4 &&
97 info->base.opcode_count[TGSI_OPCODE_SAMPLE] != 4) ||
98 info->num_texs != 4) {
99 return FALSE;
100 }
101
102 /*
103 * Ensure the texture coordinates are interleaved as in the example above.
104 */
105
106 coord_mask = 0;
107 for (index = 0; index < 4; ++index) {
108 const struct lp_tgsi_texture_info *tex = &info->tex[index];
109 if (tex->sampler_unit != 0 ||
110 tex->texture_unit != 0 ||
111 tex->coord[0].file != TGSI_FILE_INPUT ||
112 tex->coord[1].file != TGSI_FILE_INPUT ||
113 tex->coord[0].u.index != tex->coord[1].u.index ||
114 (tex->coord[0].swizzle % 2) != 0 ||
115 tex->coord[1].swizzle != tex->coord[0].swizzle + 1) {
116 return FALSE;
117 }
118
119 coord_mask |= 1 << (tex->coord[0].u.index*2 + tex->coord[0].swizzle/2);
120 }
121 if (coord_mask != 0xf) {
122 return FALSE;
123 }
124
125 /*
126 * Ensure it has the 0.25 immediate.
127 */
128
129 has_quarter_imm = FALSE;
130
131 tgsi_parse_init(&parse, tokens);
132
133 while (!tgsi_parse_end_of_tokens(&parse)) {
134 tgsi_parse_token(&parse);
135
136 switch (parse.FullToken.Token.Type) {
137 case TGSI_TOKEN_TYPE_DECLARATION:
138 break;
139
140 case TGSI_TOKEN_TYPE_INSTRUCTION:
141 goto finished;
142
143 case TGSI_TOKEN_TYPE_IMMEDIATE:
144 {
145 const unsigned size =
146 parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
147 assert(size <= 4);
148 for (chan = 0; chan < size; ++chan) {
149 if (parse.FullToken.FullImmediate.u[chan].Float == 0.25f) {
150 has_quarter_imm = TRUE;
151 goto finished;
152 }
153 }
154 }
155 break;
156
157 case TGSI_TOKEN_TYPE_PROPERTY:
158 break;
159
160 default:
161 assert(0);
162 goto finished;
163 }
164 }
165 finished:
166
167 tgsi_parse_free(&parse);
168
169 if (!has_quarter_imm) {
170 return FALSE;
171 }
172
173 return TRUE;
174 }
175
176
177 /*
178 * Determine whether the given alu src comes directly from an input
179 * register. If so, return true and the input register index and
180 * component. Return false otherwise.
181 */
182 static bool
get_nir_input_info(const nir_alu_src *src, unsigned *input_index, int *input_component)183 get_nir_input_info(const nir_alu_src *src,
184 unsigned *input_index,
185 int *input_component)
186 {
187 if (!src->src.is_ssa) {
188 return false;
189 }
190
191 // The parent instr should be a nir_intrinsic_load_deref.
192 const nir_instr *parent = src->src.ssa[0].parent_instr;
193 if (!parent || parent->type != nir_instr_type_intrinsic) {
194 return false;
195 }
196 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(parent);
197 if (!intrin ||
198 intrin->intrinsic != nir_intrinsic_load_deref ||
199 !intrin->src[0].is_ssa) {
200 return false;
201 }
202
203 // The parent of the load should be a type_deref.
204 parent = intrin->src->ssa->parent_instr;
205 if (!parent || parent->type != nir_instr_type_deref) {
206 return false;
207 }
208
209 // The var being deref'd should be a shader input register.
210 nir_deref_instr *deref = nir_instr_as_deref(parent);
211 if (!deref || deref->deref_type != nir_deref_type_var ||
212 deref->modes != nir_var_shader_in) {
213 return false;
214 }
215
216 /*
217 * If the texture coordinate input is declared as two variables like this:
218 * decl_var shader_in INTERP_MODE_NONE float coord (VARYING_SLOT_VAR0.x, 0, 0)
219 * decl_var shader_in INTERP_MODE_NONE float coord@0 (VARYING_SLOT_VAR0.y, 0, 0)
220 * Then deref->var->data.location_frac will be 0 for the first var and 1
221 * for the second var and the texcoord will be set up with:
222 * vec2 32 ssa_5 = vec2 ssa_2, ssa_4 (note: no swizzles)
223 *
224 * Alternately, if the texture coordinate input is declared as one
225 * variable like this:
226 * decl_var shader_in INTERP_MODE_NONE vec4 i1xyzw (VARYING_SLOT_VAR1.xyzw, 0, 0)
227 * then deref->var->data.location_frac will be 0 and the
228 * tex coord will be setup with:
229 * vec2 32 ssa_2 = vec2 ssa_1.x, ssa_1.y
230 *
231 * We can handle both cases by adding deref->var->data.location_frac and
232 * src->swizzle[0].
233 */
234 *input_index = deref->var->data.driver_location;
235 *input_component = deref->var->data.location_frac + src->swizzle[0];
236 assert(*input_component >= 0);
237 assert(*input_component <= 3);
238
239 return true;
240 }
241
242
243 /*
244 * Examine the texcoord argument to a texture instruction to determine
245 * if the texcoord comes directly from a fragment shader input. If so
246 * return true and return the FS input register index for the coordinate
247 * and the (2-component) swizzle. Return false otherwise.
248 */
249 static bool
get_texcoord_provenance(const nir_tex_src *texcoord, unsigned *coord_fs_input_index, int swizzle[4])250 get_texcoord_provenance(const nir_tex_src *texcoord,
251 unsigned *coord_fs_input_index, // out
252 int swizzle[4]) // out
253 {
254 assert(texcoord->src_type == nir_tex_src_coord);
255
256 // The parent instr of the coord should be an nir_op_vec2 alu op
257 const nir_instr *parent = texcoord->src.ssa->parent_instr;
258 if (!parent || parent->type != nir_instr_type_alu) {
259 return false;
260 }
261 const nir_alu_instr *alu = nir_instr_as_alu(parent);
262 if (!alu || alu->op != nir_op_vec2) {
263 return false;
264 }
265
266 // Loop over nir_op_vec2 instruction arguments to find the
267 // input register index and component.
268 unsigned input_reg_indexes[2];
269 for (unsigned comp = 0; comp < 2; comp++) {
270 if (!get_nir_input_info(&alu->src[comp],
271 &input_reg_indexes[comp], &swizzle[comp])) {
272 return false;
273 }
274 }
275
276 // Both texcoord components should come from the same input register.
277 if (input_reg_indexes[0] != input_reg_indexes[1]) {
278 return false;
279 }
280
281 *coord_fs_input_index = input_reg_indexes[0];
282
283 return true;
284 }
285
286
287 /*
288 * Examine the NIR shader to determine if it's "linear".
289 */
290 static bool
llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader, nir_function_impl *impl, struct lp_tgsi_info *info)291 llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader,
292 nir_function_impl *impl,
293 struct lp_tgsi_info *info)
294 {
295 nir_foreach_block(block, impl) {
296 nir_foreach_instr_safe(instr, block) {
297 switch (instr->type) {
298 case nir_instr_type_deref:
299 case nir_instr_type_load_const:
300 break;
301 case nir_instr_type_intrinsic: {
302 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
303 if (intrin->intrinsic != nir_intrinsic_load_deref &&
304 intrin->intrinsic != nir_intrinsic_store_deref &&
305 intrin->intrinsic != nir_intrinsic_load_ubo)
306 return false;
307
308 if (intrin->intrinsic == nir_intrinsic_load_ubo) {
309 if (!nir_src_is_const(intrin->src[0]))
310 return false;
311 nir_load_const_instr *load =
312 nir_instr_as_load_const(intrin->src[0].ssa->parent_instr);
313 if (load->value[0].u32 != 0)
314 return false;
315 }
316 break;
317 }
318 case nir_instr_type_tex: {
319 nir_tex_instr *tex = nir_instr_as_tex(instr);
320 struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
321 int texcoord_swizzle[4] = {-1, -1, -1, -1};
322 unsigned coord_fs_input_index = 0;
323
324 for (unsigned i = 0; i < tex->num_srcs; i++) {
325 if (tex->src[i].src_type == nir_tex_src_coord) {
326 if (!get_texcoord_provenance(&tex->src[i],
327 &coord_fs_input_index,
328 texcoord_swizzle)) {
329 //debug nir_print_shader((nir_shader *) shader, stdout);
330 return false;
331 }
332 }
333 }
334
335 switch (tex->op) {
336 case nir_texop_tex:
337 tex_info->modifier = LP_BLD_TEX_MODIFIER_NONE;
338 break;
339 default:
340 /* inaccurate but sufficient. */
341 tex_info->modifier = LP_BLD_TEX_MODIFIER_EXPLICIT_LOD;
342 return false;
343 }
344 switch (tex->sampler_dim) {
345 case GLSL_SAMPLER_DIM_2D:
346 tex_info->target = TGSI_TEXTURE_2D;
347 break;
348 default:
349 /* inaccurate but sufficient. */
350 tex_info->target = TGSI_TEXTURE_1D;
351 return false;
352 }
353
354 tex_info->sampler_unit = tex->sampler_index;
355 tex_info->texture_unit = tex->texture_index;
356
357 /* this is enforced in the scanner previously. */
358 tex_info->coord[0].file = TGSI_FILE_INPUT; // S
359 tex_info->coord[1].file = TGSI_FILE_INPUT; // T
360 assert(texcoord_swizzle[0] >= 0);
361 assert(texcoord_swizzle[1] >= 0);
362 tex_info->coord[0].swizzle = texcoord_swizzle[0]; // S
363 tex_info->coord[1].swizzle = texcoord_swizzle[1]; // T
364 tex_info->coord[0].u.index = coord_fs_input_index;
365 tex_info->coord[1].u.index = coord_fs_input_index;
366
367 info->num_texs++;
368 break;
369 }
370 case nir_instr_type_alu: {
371 const nir_alu_instr *alu = nir_instr_as_alu(instr);
372 switch (alu->op) {
373 case nir_op_mov:
374 case nir_op_vec2:
375 case nir_op_vec4:
376 // these instructions are OK
377 break;
378 case nir_op_fmul: {
379 unsigned num_src = nir_op_infos[alu->op].num_inputs;;
380 for (unsigned s = 0; s < num_src; s++) {
381 /* If the MUL uses immediate values, the values must
382 * be 32-bit floats in the range [0,1].
383 */
384 if (nir_src_is_const(alu->src[s].src)) {
385 nir_load_const_instr *load =
386 nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
387
388 if (load->def.bit_size != 32)
389 return false;
390 for (unsigned c = 0; c < load->def.num_components; c++) {
391 if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) {
392 info->unclamped_immediates = true;
393 return false;
394 }
395 }
396 }
397 }
398 break;
399 }
400 default:
401 // disallowed instruction
402 return false;
403 }
404 break;
405 }
406 default:
407 return false;
408 }
409 }
410 }
411 return true;
412 }
413
414
415 static bool
llvmpipe_nir_is_linear_compat(struct nir_shader *shader, struct lp_tgsi_info *info)416 llvmpipe_nir_is_linear_compat(struct nir_shader *shader,
417 struct lp_tgsi_info *info)
418 {
419 nir_foreach_function(function, shader) {
420 if (function->impl) {
421 if (!llvmpipe_nir_fn_is_linear_compat(shader, function->impl, info))
422 return false;
423 }
424 }
425 return true;
426 }
427
428
429 /*
430 * Analyze the given NIR fragment shader and set its shader->kind field
431 * to LP_FS_KIND_x.
432 */
433 void
llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)434 llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
435 {
436 if (shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
437 shader->info.base.num_outputs == 1 &&
438 !shader->info.indirect_textures &&
439 !shader->info.sampler_texture_units_different &&
440 !shader->info.unclamped_immediates &&
441 shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
442 llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
443 shader->kind = LP_FS_KIND_LLVM_LINEAR;
444 } else {
445 shader->kind = LP_FS_KIND_GENERAL;
446 }
447 }
448
449
450 /*
451 * Analyze the given TGSI fragment shader and set its shader->kind field
452 * to LP_FS_KIND_x.
453 */
454 void
llvmpipe_fs_analyse(struct lp_fragment_shader *shader, const struct tgsi_token *tokens)455 llvmpipe_fs_analyse(struct lp_fragment_shader *shader,
456 const struct tgsi_token *tokens)
457 {
458 if (shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
459 shader->info.base.num_outputs == 1 &&
460 !shader->info.indirect_textures &&
461 !shader->info.sampler_texture_units_different &&
462 !shader->info.unclamped_immediates &&
463 shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
464 (shader->info.base.opcode_count[TGSI_OPCODE_TEX] +
465 shader->info.base.opcode_count[TGSI_OPCODE_SAMPLE] +
466 shader->info.base.opcode_count[TGSI_OPCODE_MOV] +
467 shader->info.base.opcode_count[TGSI_OPCODE_MUL] +
468 shader->info.base.opcode_count[TGSI_OPCODE_RET] +
469 shader->info.base.opcode_count[TGSI_OPCODE_END] ==
470 shader->info.base.num_instructions)) {
471 shader->kind = LP_FS_KIND_LLVM_LINEAR;
472 } else {
473 shader->kind = LP_FS_KIND_GENERAL;
474 }
475
476 if (shader->kind == LP_FS_KIND_GENERAL &&
477 match_aero_minification_shader(tokens, &shader->info)) {
478 shader->kind = LP_FS_KIND_AERO_MINIFICATION;
479 }
480 }
481