1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/u_memory.h"
30 #include "util/u_math.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_text.h"
33 #include "tgsi/tgsi_util.h"
34 #include "tgsi/tgsi_dump.h"
35 #include "lp_debug.h"
36 #include "lp_state.h"
37 #include "nir.h"
38 
39 /*
40  * Detect Aero minification shaders.
41  *
42  * Aero does not use texture mimaps when a window gets animated and its shaped
43  * bended. Instead it uses the average of 4 nearby texels. This is the simplest
44  * of such shader, but there are several variations:
45  *
46  *   FRAG
47  *   DCL IN[0], GENERIC[1], PERSPECTIVE
48  *   DCL IN[1], GENERIC[2], PERSPECTIVE
49  *   DCL IN[2], GENERIC[3], PERSPECTIVE
50  *   DCL OUT[0], COLOR
51  *   DCL SAMP[0]
52  *   DCL TEMP[0..3]
53  *   IMM FLT32 {     0.2500,     0.0000,     0.0000,     0.0000 }
54  *   MOV TEMP[0].x, IN[0].zzzz
55  *   MOV TEMP[0].y, IN[0].wwww
56  *   MOV TEMP[1].x, IN[1].zzzz
57  *   MOV TEMP[1].y, IN[1].wwww
58  *   TEX TEMP[0], TEMP[0], SAMP[0], 2D
59  *   TEX TEMP[2], IN[0], SAMP[0], 2D
60  *   TEX TEMP[3], IN[1], SAMP[0], 2D
61  *   TEX TEMP[1], TEMP[1], SAMP[0], 2D
62  *   ADD TEMP[0], TEMP[0], TEMP[2]
63  *   ADD TEMP[0], TEMP[3], TEMP[0]
64  *   ADD TEMP[0], TEMP[1], TEMP[0]
65  *   MUL TEMP[0], TEMP[0], IN[2]
66  *   MUL TEMP[0], TEMP[0], IMM[0].xxxx
67  *   MOV OUT[0], TEMP[0]
68  *   END
69  *
70  * Texture coordinates are interleaved like the Gaussian blur shaders, but
71  * unlike the later there isn't structure in the sub-pixel positioning of the
72  * texels, other than being disposed in a diamond-like shape. For example,
73  * these are the relative offsets of the texels relative to the average:
74  *
75  *    x offset   y offset
76  *   --------------------
77  *    0.691834   -0.21360
78  *   -0.230230   -0.64160
79  *   -0.692406    0.21356
80  *    0.230802    0.64160
81  *
82  *  These shaders are typically used with linear min/mag filtering, but the
83  *  linear filtering provides very little visual improvement compared to the
84  *  performance impact it has. The ultimate purpose of detecting these shaders
85  *  is to override with nearest texture filtering.
86  */
87 static inline boolean
match_aero_minification_shader(const struct tgsi_token *tokens, const struct lp_tgsi_info *info)88 match_aero_minification_shader(const struct tgsi_token *tokens,
89                                const struct lp_tgsi_info *info)
90 {
91    struct tgsi_parse_context parse;
92    unsigned coord_mask;
93    boolean has_quarter_imm;
94    unsigned index, chan;
95 
96    if ((info->base.opcode_count[TGSI_OPCODE_TEX] != 4 &&
97         info->base.opcode_count[TGSI_OPCODE_SAMPLE] != 4) ||
98        info->num_texs != 4) {
99       return FALSE;
100    }
101 
102    /*
103     * Ensure the texture coordinates are interleaved as in the example above.
104     */
105 
106    coord_mask = 0;
107    for (index = 0; index < 4; ++index) {
108       const struct lp_tgsi_texture_info *tex = &info->tex[index];
109       if (tex->sampler_unit != 0 ||
110           tex->texture_unit != 0 ||
111           tex->coord[0].file != TGSI_FILE_INPUT ||
112           tex->coord[1].file != TGSI_FILE_INPUT ||
113           tex->coord[0].u.index != tex->coord[1].u.index ||
114           (tex->coord[0].swizzle % 2) != 0 ||
115           tex->coord[1].swizzle != tex->coord[0].swizzle + 1) {
116          return FALSE;
117       }
118 
119       coord_mask |= 1 << (tex->coord[0].u.index*2 + tex->coord[0].swizzle/2);
120    }
121    if (coord_mask != 0xf) {
122       return FALSE;
123    }
124 
125    /*
126     * Ensure it has the 0.25 immediate.
127     */
128 
129    has_quarter_imm = FALSE;
130 
131    tgsi_parse_init(&parse, tokens);
132 
133    while (!tgsi_parse_end_of_tokens(&parse)) {
134       tgsi_parse_token(&parse);
135 
136       switch (parse.FullToken.Token.Type) {
137       case TGSI_TOKEN_TYPE_DECLARATION:
138          break;
139 
140       case TGSI_TOKEN_TYPE_INSTRUCTION:
141          goto finished;
142 
143       case TGSI_TOKEN_TYPE_IMMEDIATE:
144          {
145             const unsigned size =
146                   parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
147             assert(size <= 4);
148             for (chan = 0; chan < size; ++chan) {
149                if (parse.FullToken.FullImmediate.u[chan].Float == 0.25f) {
150                   has_quarter_imm = TRUE;
151                   goto finished;
152                }
153             }
154          }
155          break;
156 
157       case TGSI_TOKEN_TYPE_PROPERTY:
158          break;
159 
160       default:
161          assert(0);
162          goto finished;
163       }
164    }
165 finished:
166 
167    tgsi_parse_free(&parse);
168 
169    if (!has_quarter_imm) {
170       return FALSE;
171    }
172 
173    return TRUE;
174 }
175 
176 
177 /*
178  * Determine whether the given alu src comes directly from an input
179  * register.  If so, return true and the input register index and
180  * component.  Return false otherwise.
181  */
182 static bool
get_nir_input_info(const nir_alu_src *src, unsigned *input_index, int *input_component)183 get_nir_input_info(const nir_alu_src *src,
184                    unsigned *input_index,
185                    int *input_component)
186 {
187    if (!src->src.is_ssa) {
188       return false;
189    }
190 
191    // The parent instr should be a nir_intrinsic_load_deref.
192    const nir_instr *parent = src->src.ssa[0].parent_instr;
193    if (!parent || parent->type != nir_instr_type_intrinsic) {
194       return false;
195    }
196    nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(parent);
197    if (!intrin ||
198        intrin->intrinsic != nir_intrinsic_load_deref ||
199        !intrin->src[0].is_ssa) {
200       return false;
201    }
202 
203    // The parent of the load should be a type_deref.
204    parent = intrin->src->ssa->parent_instr;
205    if (!parent || parent->type != nir_instr_type_deref) {
206       return false;
207    }
208 
209    // The var being deref'd should be a shader input register.
210    nir_deref_instr *deref = nir_instr_as_deref(parent);
211    if (!deref || deref->deref_type != nir_deref_type_var ||
212        deref->modes != nir_var_shader_in) {
213       return false;
214    }
215 
216    /*
217     * If the texture coordinate input is declared as two variables like this:
218     * decl_var shader_in INTERP_MODE_NONE float coord (VARYING_SLOT_VAR0.x, 0, 0)
219     * decl_var shader_in INTERP_MODE_NONE float coord@0 (VARYING_SLOT_VAR0.y, 0, 0)
220     * Then deref->var->data.location_frac will be 0 for the first var and 1
221     * for the second var and the texcoord will be set up with:
222     *   vec2 32 ssa_5 = vec2 ssa_2, ssa_4  (note: no swizzles)
223     *
224     * Alternately, if the texture coordinate input is declared as one
225     * variable like this:
226     * decl_var shader_in INTERP_MODE_NONE vec4 i1xyzw (VARYING_SLOT_VAR1.xyzw, 0, 0)
227     * then deref->var->data.location_frac will be 0 and the
228     * tex coord will be setup with:
229     *   vec2 32 ssa_2 = vec2 ssa_1.x, ssa_1.y
230     *
231     * We can handle both cases by adding deref->var->data.location_frac and
232     * src->swizzle[0].
233     */
234    *input_index = deref->var->data.driver_location;
235    *input_component = deref->var->data.location_frac + src->swizzle[0];
236    assert(*input_component >= 0);
237    assert(*input_component <= 3);
238 
239    return true;
240 }
241 
242 
243 /*
244  * Examine the texcoord argument to a texture instruction to determine
245  * if the texcoord comes directly from a fragment shader input.  If so
246  * return true and return the FS input register index for the coordinate
247  * and the (2-component) swizzle.  Return false otherwise.
248  */
249 static bool
get_texcoord_provenance(const nir_tex_src *texcoord, unsigned *coord_fs_input_index, int swizzle[4])250 get_texcoord_provenance(const nir_tex_src *texcoord,
251                         unsigned *coord_fs_input_index, // out
252                         int swizzle[4]) // out
253 {
254    assert(texcoord->src_type == nir_tex_src_coord);
255 
256    // The parent instr of the coord should be an nir_op_vec2 alu op
257    const nir_instr *parent = texcoord->src.ssa->parent_instr;
258    if (!parent || parent->type != nir_instr_type_alu) {
259       return false;
260    }
261    const nir_alu_instr *alu = nir_instr_as_alu(parent);
262    if (!alu || alu->op != nir_op_vec2) {
263       return false;
264    }
265 
266    // Loop over nir_op_vec2 instruction arguments to find the
267    // input register index and component.
268    unsigned input_reg_indexes[2];
269    for (unsigned comp = 0; comp < 2; comp++) {
270       if (!get_nir_input_info(&alu->src[comp],
271                               &input_reg_indexes[comp], &swizzle[comp])) {
272          return false;
273       }
274    }
275 
276    // Both texcoord components should come from the same input register.
277    if (input_reg_indexes[0] != input_reg_indexes[1]) {
278       return false;
279    }
280 
281    *coord_fs_input_index = input_reg_indexes[0];
282 
283    return true;
284 }
285 
286 
287 /*
288  * Examine the NIR shader to determine if it's "linear".
289  */
290 static bool
llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader, nir_function_impl *impl, struct lp_tgsi_info *info)291 llvmpipe_nir_fn_is_linear_compat(const struct nir_shader *shader,
292                                  nir_function_impl *impl,
293                                  struct lp_tgsi_info *info)
294 {
295    nir_foreach_block(block, impl) {
296       nir_foreach_instr_safe(instr, block) {
297          switch (instr->type) {
298          case nir_instr_type_deref:
299          case nir_instr_type_load_const:
300             break;
301          case nir_instr_type_intrinsic: {
302             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
303             if (intrin->intrinsic != nir_intrinsic_load_deref &&
304                 intrin->intrinsic != nir_intrinsic_store_deref &&
305                 intrin->intrinsic != nir_intrinsic_load_ubo)
306                return false;
307 
308             if (intrin->intrinsic == nir_intrinsic_load_ubo) {
309                if (!nir_src_is_const(intrin->src[0]))
310                   return false;
311                nir_load_const_instr *load =
312                   nir_instr_as_load_const(intrin->src[0].ssa->parent_instr);
313                if (load->value[0].u32 != 0)
314                   return false;
315             }
316             break;
317          }
318          case nir_instr_type_tex: {
319             nir_tex_instr *tex = nir_instr_as_tex(instr);
320             struct lp_tgsi_texture_info *tex_info = &info->tex[info->num_texs];
321             int texcoord_swizzle[4] = {-1, -1, -1, -1};
322             unsigned coord_fs_input_index = 0;
323 
324             for (unsigned i = 0; i < tex->num_srcs; i++) {
325                if (tex->src[i].src_type == nir_tex_src_coord) {
326                   if (!get_texcoord_provenance(&tex->src[i],
327                                                &coord_fs_input_index,
328                                                texcoord_swizzle)) {
329                      //debug nir_print_shader((nir_shader *) shader, stdout);
330                      return false;
331                   }
332                }
333             }
334 
335             switch (tex->op) {
336             case nir_texop_tex:
337                tex_info->modifier = LP_BLD_TEX_MODIFIER_NONE;
338                break;
339             default:
340                /* inaccurate but sufficient. */
341                tex_info->modifier = LP_BLD_TEX_MODIFIER_EXPLICIT_LOD;
342                return false;
343             }
344             switch (tex->sampler_dim) {
345             case GLSL_SAMPLER_DIM_2D:
346                tex_info->target = TGSI_TEXTURE_2D;
347                break;
348             default:
349                /* inaccurate but sufficient. */
350                tex_info->target = TGSI_TEXTURE_1D;
351                return false;
352             }
353 
354             tex_info->sampler_unit = tex->sampler_index;
355             tex_info->texture_unit = tex->texture_index;
356 
357             /* this is enforced in the scanner previously. */
358             tex_info->coord[0].file = TGSI_FILE_INPUT;  // S
359             tex_info->coord[1].file = TGSI_FILE_INPUT;  // T
360             assert(texcoord_swizzle[0] >= 0);
361             assert(texcoord_swizzle[1] >= 0);
362             tex_info->coord[0].swizzle = texcoord_swizzle[0]; // S
363             tex_info->coord[1].swizzle = texcoord_swizzle[1]; // T
364             tex_info->coord[0].u.index = coord_fs_input_index;
365             tex_info->coord[1].u.index = coord_fs_input_index;
366 
367             info->num_texs++;
368             break;
369          }
370          case nir_instr_type_alu: {
371             const nir_alu_instr *alu = nir_instr_as_alu(instr);
372             switch (alu->op) {
373             case nir_op_mov:
374             case nir_op_vec2:
375             case nir_op_vec4:
376                // these instructions are OK
377                break;
378             case nir_op_fmul: {
379                unsigned num_src = nir_op_infos[alu->op].num_inputs;;
380                for (unsigned s = 0; s < num_src; s++) {
381                   /* If the MUL uses immediate values, the values must
382                    * be 32-bit floats in the range [0,1].
383                    */
384                   if (nir_src_is_const(alu->src[s].src)) {
385                      nir_load_const_instr *load =
386                         nir_instr_as_load_const(alu->src[s].src.ssa->parent_instr);
387 
388                      if (load->def.bit_size != 32)
389                         return false;
390                      for (unsigned c = 0; c < load->def.num_components; c++) {
391                         if (load->value[c].f32 < 0.0 || load->value[c].f32 > 1.0) {
392                            info->unclamped_immediates = true;
393                            return false;
394                         }
395                      }
396                   }
397                }
398                break;
399             }
400             default:
401                // disallowed instruction
402                return false;
403             }
404             break;
405          }
406          default:
407             return false;
408          }
409       }
410    }
411    return true;
412 }
413 
414 
415 static bool
llvmpipe_nir_is_linear_compat(struct nir_shader *shader, struct lp_tgsi_info *info)416 llvmpipe_nir_is_linear_compat(struct nir_shader *shader,
417                               struct lp_tgsi_info *info)
418 {
419    nir_foreach_function(function, shader) {
420       if (function->impl) {
421          if (!llvmpipe_nir_fn_is_linear_compat(shader, function->impl, info))
422             return false;
423       }
424    }
425    return true;
426 }
427 
428 
429 /*
430  * Analyze the given NIR fragment shader and set its shader->kind field
431  * to LP_FS_KIND_x.
432  */
433 void
llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)434 llvmpipe_fs_analyse_nir(struct lp_fragment_shader *shader)
435 {
436    if (shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
437        shader->info.base.num_outputs == 1 &&
438        !shader->info.indirect_textures &&
439        !shader->info.sampler_texture_units_different &&
440        !shader->info.unclamped_immediates &&
441        shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
442        llvmpipe_nir_is_linear_compat(shader->base.ir.nir, &shader->info)) {
443       shader->kind = LP_FS_KIND_LLVM_LINEAR;
444    } else {
445       shader->kind = LP_FS_KIND_GENERAL;
446    }
447 }
448 
449 
450 /*
451  * Analyze the given TGSI fragment shader and set its shader->kind field
452  * to LP_FS_KIND_x.
453  */
454 void
llvmpipe_fs_analyse(struct lp_fragment_shader *shader, const struct tgsi_token *tokens)455 llvmpipe_fs_analyse(struct lp_fragment_shader *shader,
456                     const struct tgsi_token *tokens)
457 {
458    if (shader->info.base.num_inputs <= LP_MAX_LINEAR_INPUTS &&
459        shader->info.base.num_outputs == 1 &&
460        !shader->info.indirect_textures &&
461        !shader->info.sampler_texture_units_different &&
462        !shader->info.unclamped_immediates &&
463        shader->info.num_texs <= LP_MAX_LINEAR_TEXTURES &&
464        (shader->info.base.opcode_count[TGSI_OPCODE_TEX] +
465         shader->info.base.opcode_count[TGSI_OPCODE_SAMPLE] +
466         shader->info.base.opcode_count[TGSI_OPCODE_MOV] +
467         shader->info.base.opcode_count[TGSI_OPCODE_MUL] +
468         shader->info.base.opcode_count[TGSI_OPCODE_RET] +
469         shader->info.base.opcode_count[TGSI_OPCODE_END] ==
470         shader->info.base.num_instructions)) {
471       shader->kind = LP_FS_KIND_LLVM_LINEAR;
472    } else {
473       shader->kind = LP_FS_KIND_GENERAL;
474    }
475 
476    if (shader->kind == LP_FS_KIND_GENERAL &&
477        match_aero_minification_shader(tokens, &shader->info)) {
478       shader->kind = LP_FS_KIND_AERO_MINIFICATION;
479    }
480 }
481