1/**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29#include "pipe/p_config.h"
30
31#include "util/u_math.h"
32#include "util/u_cpu_detect.h"
33#include "util/u_pack_color.h"
34#include "util/u_rect.h"
35#include "util/u_sse.h"
36
37#include "lp_jit.h"
38#include "lp_rast.h"
39#include "lp_debug.h"
40#include "lp_state_fs.h"
41#include "lp_linear_priv.h"
42
43
44#if defined(PIPE_ARCH_SSE)
45
46
47/* For debugging (LP_DEBUG=linear), shade areas of run-time fallback
48 * purple.  Keep blending active so we can see more of what's going
49 * on.
50 */
51static boolean
52linear_fallback(const struct lp_rast_state *state,
53                unsigned x, unsigned y,
54                unsigned width, unsigned height,
55                uint8_t *color,
56                unsigned stride)
57{
58   unsigned col = 0x808000ff;
59   int i;
60
61   for (y = 0; y < height; y++) {
62      for (i = 0; i < 64; i++) {
63         *((uint32_t *)(color + y*stride) + x + i) = col;
64      }
65   }
66
67   return TRUE;
68}
69
70
71/*
72 * Run our configurable linear shader pipeline:
73 * x,y is the surface position of the linear region, width, height is the size.
74 * Return TRUE for success, FALSE otherwise.
75 */
76static boolean
77lp_fs_linear_run(const struct lp_rast_state *state,
78                 unsigned x, unsigned y,
79                 unsigned width, unsigned height,
80                 const float (*a0)[4],
81                 const float (*dadx)[4],
82                 const float (*dady)[4],
83                 uint8_t *color,
84                 unsigned stride)
85{
86   const struct lp_fragment_shader_variant *variant = state->variant;
87   const struct lp_tgsi_info *info = &variant->shader->info;
88   uint8_t constants[LP_MAX_LINEAR_CONSTANTS * 4];
89
90   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
91
92   /* Require constant w in these rectangles:
93    */
94   if (dadx[0][3] != 0.0f ||
95       dady[0][3] != 0.0f) {
96      if (LP_DEBUG & DEBUG_LINEAR2)
97         debug_printf("  -- w not constant\n");
98      goto fail;
99   }
100
101   /* XXX: Per statechange:
102    */
103   int nr_consts; // in floats, not float[4]
104   if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) {
105      nr_consts = (info->base.file_max[TGSI_FILE_CONSTANT] + 1) * 4;
106   } else {
107      nr_consts = state->jit_context.num_constants[0];
108   }
109   for (int i = 0; i < nr_consts; i++){
110      float val = state->jit_context.constants[0][i];
111      if (val < 0.0f || val > 1.0f) {
112         if (LP_DEBUG & DEBUG_LINEAR2)
113            debug_printf("  -- const[%d] out of range %f\n", i, val);
114         goto fail;
115      }
116      constants[i] = (uint8_t)(val * 255.0f);
117   }
118
119   struct lp_jit_linear_context jit;
120   jit.constants = (const uint8_t (*)[4])constants;
121
122   /* We assume BGRA ordering */
123   assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM ||
124          variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM);
125
126   jit.blend_color =
127         state->jit_context.u8_blend_color[32] +
128         (state->jit_context.u8_blend_color[16] << 8) +
129         (state->jit_context.u8_blend_color[0] << 16) +
130         (state->jit_context.u8_blend_color[48] << 24);
131
132   jit.alpha_ref_value = float_to_ubyte(state->jit_context.alpha_ref_value);
133
134   /* XXX: Per primitive:
135    */
136   struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
137   const float oow = 1.0f / a0[0][3];
138   unsigned input_mask = variant->linear_input_mask;
139   while (input_mask) {
140      int i = u_bit_scan(&input_mask);
141      unsigned usage_mask = info->base.input_usage_mask[i];
142      boolean perspective =
143            info->base.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE ||
144            (info->base.input_interpolate[i] == TGSI_INTERPOLATE_COLOR &&
145             !variant->key.flatshade);
146      if (!lp_linear_init_interp(&interp[i],
147                                 x, y, width, height,
148                                 usage_mask,
149                                 perspective,
150                                 oow,
151                                 a0[i+1],
152                                 dadx[i+1],
153                                 dady[i+1])) {
154         if (LP_DEBUG & DEBUG_LINEAR2)
155            debug_printf("  -- init_interp(%d) failed\n", i);
156         goto fail;
157      }
158
159      jit.inputs[i] = &interp[i].base;
160   }
161
162   /* XXX: Per primitive: Initialize linear or nearest samplers:
163    */
164   struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
165   const int nr_tex = info->num_texs;
166   for (int i = 0; i < nr_tex; i++) {
167      const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
168      const unsigned tex_unit = tex_info->texture_unit;
169      const unsigned samp_unit = tex_info->sampler_unit;
170      //const unsigned fs_s_input = tex_info->coord[0].u.index;
171      //const unsigned fs_t_input = tex_info->coord[1].u.index;
172
173      // xxx investigate why these fail in deqp-vk
174      //assert(variant->linear_input_mask & (1 << fs_s_input));
175      //assert(variant->linear_input_mask & (1 << fs_t_input));
176
177      /* XXX: some texture coordinates are linear!
178       */
179      //boolean perspective = (info->base.input_interpolate[i] ==
180      //                       TGSI_INTERPOLATE_PERSPECTIVE);
181
182      if (!lp_linear_init_sampler(&samp[i], tex_info,
183                  lp_fs_variant_key_sampler_idx(&variant->key, samp_unit),
184                  &state->jit_context.textures[tex_unit],
185                  x, y, width, height, a0, dadx, dady)) {
186         if (LP_DEBUG & DEBUG_LINEAR2)
187            debug_printf("  -- init_sampler(%d) failed\n", i);
188         goto fail;
189      }
190
191      jit.tex[i] = &samp[i].base;
192   }
193
194   /* JIT function already does blending */
195   jit.color0 = color + x * 4 + y * stride;
196   lp_jit_linear_llvm_func jit_func = variant->jit_linear_llvm;
197
198   for (unsigned iy = 0; iy < height; iy++) {
199      jit_func(&jit, 0, 0, width);  // x=0, y=0
200      jit.color0 += stride;
201   }
202
203   return TRUE;
204
205fail:
206   /* Visually distinguish this from other fallbacks:
207    */
208   if (LP_DEBUG & DEBUG_LINEAR) {
209      return linear_fallback(state, x, y, width, height, color, stride);
210   }
211
212   return FALSE;
213}
214
215
216static void
217check_linear_interp_mask_a(struct lp_fragment_shader_variant *variant)
218{
219   const struct lp_tgsi_info *info = &variant->shader->info;
220   struct lp_jit_linear_context jit;
221
222   struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
223   struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
224   uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
225   alignas(16) uint8_t color0[TILE_SIZE*4];
226
227   const int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
228   const int nr_tex = info->num_texs;
229
230   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
231
232   jit.constants = (const uint8_t (*)[4])constants;
233
234   for (int i = 0; i < nr_tex; i++) {
235      lp_linear_init_noop_sampler(&samp[i]);
236      jit.tex[i] = &samp[i].base;
237   }
238
239   for (int i = 0; i < nr_inputs; i++) {
240      lp_linear_init_noop_interp(&interp[i]);
241      jit.inputs[i] = &interp[i].base;
242   }
243
244   jit.color0 = color0;
245
246   (void)variant->jit_linear_llvm(&jit, 0, 0, 0);
247
248   /* Find out which interpolators were called, and store this as a
249    * mask:
250    */
251   for (int i = 0; i < nr_inputs; i++) {
252      variant->linear_input_mask |= (interp[i].row[0] << i);
253   }
254}
255
256
257/* Until the above is working, look at texture information and guess
258 * that any input used as a texture coordinate is not used for
259 * anything else.
260 */
261static void
262check_linear_interp_mask_b(struct lp_fragment_shader_variant *variant)
263{
264   const struct lp_tgsi_info *info = &variant->shader->info;
265   int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
266   int nr_tex = info->num_texs;
267   unsigned tex_mask = 0;
268   int i;
269
270   LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__);
271
272   for (i = 0; i < nr_tex; i++) {
273      const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
274      const struct lp_tgsi_channel_info *schan = &tex_info->coord[0];
275      const struct lp_tgsi_channel_info *tchan = &tex_info->coord[1];
276      tex_mask |= 1 << schan->u.index;
277      tex_mask |= 1 << tchan->u.index;
278   }
279
280   variant->linear_input_mask = ((1 << nr_inputs) - 1) & ~tex_mask;
281}
282
283
284void
285lp_linear_check_variant(struct lp_fragment_shader_variant *variant)
286{
287   const struct lp_fragment_shader_variant_key *key = &variant->key;
288   const struct lp_fragment_shader *shader = variant->shader;
289   const struct lp_tgsi_info *info = &shader->info;
290
291   if (info->base.file_max[TGSI_FILE_CONSTANT] >= LP_MAX_LINEAR_CONSTANTS ||
292       info->base.file_max[TGSI_FILE_INPUT] >= LP_MAX_LINEAR_INPUTS) {
293      if (LP_DEBUG & DEBUG_LINEAR)
294         debug_printf("  -- too many inputs/constants\n");
295      goto fail;
296   }
297
298   /* If we have a fastpath which implements the entire variant, use
299    * that.
300    */
301   if (lp_linear_check_fastpath(variant)) {
302      return;
303   }
304
305   /* Otherwise, can we build up a spanline-based linear path for this
306    * variant?
307    */
308
309   /* Check static sampler state.
310    */
311   for (unsigned i = 0; i < info->num_texs; i++) {
312      const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
313      const unsigned unit = tex_info->sampler_unit;
314
315      /* XXX: Relax this once setup premultiplies by oow:
316       */
317      if (info->base.input_interpolate[unit] != TGSI_INTERPOLATE_PERSPECTIVE) {
318         if (LP_DEBUG & DEBUG_LINEAR)
319            debug_printf(" -- samp[%d]: texcoord not perspective\n", i);
320         goto fail;
321      }
322
323      struct lp_sampler_static_state *samp =
324         lp_fs_variant_key_sampler_idx(key, unit);
325      if (!lp_linear_check_sampler(samp, tex_info)) {
326         if (LP_DEBUG & DEBUG_LINEAR)
327            debug_printf(" -- samp[%d]: check_sampler failed\n", i);
328         goto fail;
329      }
330   }
331
332   /* Check shader.  May not have been jitted.
333    */
334   if (variant->linear_function == NULL) {
335      if (LP_DEBUG & DEBUG_LINEAR)
336         debug_printf("  -- no linear shader\n");
337      goto fail;
338   }
339
340   /* Hook in the catchall shader runner:
341    */
342   variant->jit_linear = lp_fs_linear_run;
343
344   /* Figure out which inputs we don't need to interpolate (because
345    * they are only used as texture coordinates).  This is important
346    * as we can cope with texture coordinates which exceed 1.0, but
347    * cannot do so for regular inputs.
348    */
349   if (1)
350      check_linear_interp_mask_a(variant);
351   else
352      check_linear_interp_mask_b(variant);
353
354
355   if (0) {
356      lp_debug_fs_variant(variant);
357      debug_printf("linear input mask: 0x%x\n", variant->linear_input_mask);
358   }
359
360   return;
361
362fail:
363   if (LP_DEBUG & DEBUG_LINEAR) {
364      lp_debug_fs_variant(variant);
365      debug_printf("    ----> no linear path for this variant\n");
366   }
367}
368
369
370#else
371void
372lp_linear_check_variant(struct lp_fragment_shader_variant *variant)
373{
374}
375#endif
376