1bf215546Sopenharmony_ci/**************************************************************************
2bf215546Sopenharmony_ci *
3bf215546Sopenharmony_ci * Copyright 2010-2021 VMware, Inc.
4bf215546Sopenharmony_ci * All Rights Reserved.
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the
8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to
11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
12bf215546Sopenharmony_ci * the following conditions:
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci *
22bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
23bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions
24bf215546Sopenharmony_ci * of the Software.
25bf215546Sopenharmony_ci *
26bf215546Sopenharmony_ci **************************************************************************/
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "pipe/p_config.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "util/u_math.h"
32bf215546Sopenharmony_ci#include "util/u_cpu_detect.h"
33bf215546Sopenharmony_ci#include "util/u_pack_color.h"
34bf215546Sopenharmony_ci#include "util/u_rect.h"
35bf215546Sopenharmony_ci#include "util/u_sse.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci#include "lp_jit.h"
38bf215546Sopenharmony_ci#include "lp_rast.h"
39bf215546Sopenharmony_ci#include "lp_debug.h"
40bf215546Sopenharmony_ci#include "lp_state_fs.h"
41bf215546Sopenharmony_ci#include "lp_linear_priv.h"
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci#if defined(PIPE_ARCH_SSE)
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci#define FIXED15_ONE 0x7fff
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci/* Translate floating point value to 1.15 unsigned fixed-point.
49bf215546Sopenharmony_ci */
50bf215546Sopenharmony_cistatic inline ushort
51bf215546Sopenharmony_cifloat_to_ufixed_1_15(float f)
52bf215546Sopenharmony_ci{
53bf215546Sopenharmony_ci   return CLAMP((unsigned)(f * (float)FIXED15_ONE), 0, FIXED15_ONE);
54bf215546Sopenharmony_ci}
55bf215546Sopenharmony_ci
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci/* Translate floating point value to 1.15 signed fixed-point.
58bf215546Sopenharmony_ci */
59bf215546Sopenharmony_cistatic inline int16_t
60bf215546Sopenharmony_cifloat_to_sfixed_1_15(float f)
61bf215546Sopenharmony_ci{
62bf215546Sopenharmony_ci   return CLAMP((signed)(f * (float)FIXED15_ONE), -FIXED15_ONE, FIXED15_ONE);
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ci/* Interpolate in 1.15 space, but produce a packed row of 0.8 values.
67bf215546Sopenharmony_ci */
68bf215546Sopenharmony_cistatic const uint32_t *
69bf215546Sopenharmony_ciinterp_0_8(struct lp_linear_elem *elem)
70bf215546Sopenharmony_ci{
71bf215546Sopenharmony_ci   struct lp_linear_interp *interp = (struct lp_linear_interp *)elem;
72bf215546Sopenharmony_ci   uint32_t *row = interp->row;
73bf215546Sopenharmony_ci   __m128i a0 = interp->a0;
74bf215546Sopenharmony_ci   const __m128i dadx = interp->dadx;
75bf215546Sopenharmony_ci   const int width = (interp->width + 3) & ~3;
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   for (int i = 0; i < width; i += 4) {
78bf215546Sopenharmony_ci      __m128i l = _mm_srai_epi16(a0, 7); // l = a0 >> 7
79bf215546Sopenharmony_ci      a0 = _mm_add_epi16(a0, dadx);      // a0 += dadx
80bf215546Sopenharmony_ci
81bf215546Sopenharmony_ci      __m128i h = _mm_srai_epi16(a0, 7); // h = a0 >> 7
82bf215546Sopenharmony_ci      a0 = _mm_add_epi16(a0, dadx);      // a0 += dadx
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci      // pack l[0..7] and h[0..7] as 16 bytes
85bf215546Sopenharmony_ci      *(__m128i *)&row[i] =  _mm_packus_epi16(l, h);
86bf215546Sopenharmony_ci   }
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   // advance to next row
89bf215546Sopenharmony_ci   interp->a0 = _mm_add_epi16(interp->a0, interp->dady);
90bf215546Sopenharmony_ci   return interp->row;
91bf215546Sopenharmony_ci}
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_cistatic const uint32_t *
94bf215546Sopenharmony_ciinterp_noop(struct lp_linear_elem *elem)
95bf215546Sopenharmony_ci{
96bf215546Sopenharmony_ci   struct lp_linear_interp *interp = (struct lp_linear_interp *)elem;
97bf215546Sopenharmony_ci   return interp->row;
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_cistatic const uint32_t *
102bf215546Sopenharmony_ciinterp_check(struct lp_linear_elem *elem)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   struct lp_linear_interp *interp = (struct lp_linear_interp *)elem;
105bf215546Sopenharmony_ci   interp->row[0] = 1;
106bf215546Sopenharmony_ci   return interp->row;
107bf215546Sopenharmony_ci}
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci/* Not quite a noop - we use row[0] to track whether this gets called
110bf215546Sopenharmony_ci * or not, so we can optimize which interpolants we care about.
111bf215546Sopenharmony_ci */
112bf215546Sopenharmony_civoid
113bf215546Sopenharmony_cilp_linear_init_noop_interp(struct lp_linear_interp *interp)
114bf215546Sopenharmony_ci{
115bf215546Sopenharmony_ci   interp->row[0] = 0;
116bf215546Sopenharmony_ci   interp->base.fetch = interp_check;
117bf215546Sopenharmony_ci}
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ciboolean
120bf215546Sopenharmony_cilp_linear_init_interp(struct lp_linear_interp *interp,
121bf215546Sopenharmony_ci                      int x, int y, int width, int height,
122bf215546Sopenharmony_ci                      unsigned usage_mask,
123bf215546Sopenharmony_ci                      boolean perspective,
124bf215546Sopenharmony_ci                      float oow,
125bf215546Sopenharmony_ci                      const float *a0,
126bf215546Sopenharmony_ci                      const float *dadx,
127bf215546Sopenharmony_ci                      const float *dady)
128bf215546Sopenharmony_ci{
129bf215546Sopenharmony_ci   float s0[4];
130bf215546Sopenharmony_ci   float dsdx[4];
131bf215546Sopenharmony_ci   float dsdy[4];
132bf215546Sopenharmony_ci   int16_t s0_fp[8];
133bf215546Sopenharmony_ci   int16_t dsdx_fp[4];
134bf215546Sopenharmony_ci   int16_t dsdy_fp[4];
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci   /* Zero coefficients to avoid using uninitialised values */
137bf215546Sopenharmony_ci   memset(s0, 0, sizeof(s0));
138bf215546Sopenharmony_ci   memset(dsdx, 0, sizeof(dsdx));
139bf215546Sopenharmony_ci   memset(dsdy, 0, sizeof(dsdy));
140bf215546Sopenharmony_ci   memset(s0_fp, 0, sizeof(s0_fp));
141bf215546Sopenharmony_ci   memset(dsdx_fp, 0, sizeof(dsdx_fp));
142bf215546Sopenharmony_ci   memset(dsdy_fp, 0, sizeof(dsdy_fp));
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_ci   if (perspective && oow != 1.0f) {
145bf215546Sopenharmony_ci      for (unsigned j = 0; j < 4; j++) {
146bf215546Sopenharmony_ci         if (usage_mask & (1<<j)) {
147bf215546Sopenharmony_ci            s0[j]   =   a0[j] * oow;
148bf215546Sopenharmony_ci            dsdx[j] = dadx[j] * oow;
149bf215546Sopenharmony_ci            dsdy[j] = dady[j] * oow;
150bf215546Sopenharmony_ci         }
151bf215546Sopenharmony_ci      }
152bf215546Sopenharmony_ci   } else {
153bf215546Sopenharmony_ci      for (unsigned j = 0; j < 4; j++) {
154bf215546Sopenharmony_ci         if (usage_mask & (1<<j)) {
155bf215546Sopenharmony_ci            s0[j]   =   a0[j];
156bf215546Sopenharmony_ci            dsdx[j] = dadx[j];
157bf215546Sopenharmony_ci            dsdy[j] = dady[j];
158bf215546Sopenharmony_ci         }
159bf215546Sopenharmony_ci      }
160bf215546Sopenharmony_ci   }
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   s0[0] += x * dsdx[0] + y * dsdy[0];
163bf215546Sopenharmony_ci   s0[1] += x * dsdx[1] + y * dsdy[1];
164bf215546Sopenharmony_ci   s0[2] += x * dsdx[2] + y * dsdy[2];
165bf215546Sopenharmony_ci   s0[3] += x * dsdx[3] + y * dsdy[3];
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci   /* XXX: lift all of this into the rectangle setup code.
168bf215546Sopenharmony_ci    *
169bf215546Sopenharmony_ci    * For rectangles with linear shaders, at setup time:
170bf215546Sopenharmony_ci    *    - if w is constant (else mark as non-fastpath)
171bf215546Sopenharmony_ci    *        - premultiply perspective interpolants by w
172bf215546Sopenharmony_ci    *        - set w = 1 in position
173bf215546Sopenharmony_ci    *   - check all interpolants for min/max 0..1 (else mark as
174bf215546Sopenharmony_ci    *          non-fastpath)
175bf215546Sopenharmony_ci    */
176bf215546Sopenharmony_ci   for (unsigned j = 0; j < 4; j++) {
177bf215546Sopenharmony_ci      if (usage_mask & (1<<j)) {
178bf215546Sopenharmony_ci         // compute texcoords at rect corners
179bf215546Sopenharmony_ci         float a = s0[j];
180bf215546Sopenharmony_ci         float b = s0[j] + (width  - 1) * dsdx[j];
181bf215546Sopenharmony_ci         float c = s0[j] + (height - 1) * dsdy[j];
182bf215546Sopenharmony_ci         float d = s0[j] + (height - 1) * dsdy[j] + (width - 1) * dsdx[j];
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci         if (MIN4(a,b,c,d) < 0.0)
185bf215546Sopenharmony_ci            FAIL("min < 0.0"); // out of bounds
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci         if (MAX4(a,b,c,d) > 1.0)
188bf215546Sopenharmony_ci            FAIL("max > 1.0"); // out of bounds
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci         dsdx_fp[j]   = float_to_sfixed_1_15(dsdx[j]);
191bf215546Sopenharmony_ci         dsdy_fp[j]   = float_to_sfixed_1_15(dsdy[j]);
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci         s0_fp[j]     = float_to_ufixed_1_15(s0[j]);  // first pixel
194bf215546Sopenharmony_ci         s0_fp[j + 4] = s0_fp[j] + dsdx_fp[j];        // second pixel
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci         dsdx_fp[j] *= 2;
197bf215546Sopenharmony_ci      }
198bf215546Sopenharmony_ci   }
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   interp->width = align(width, 4);
201bf215546Sopenharmony_ci   /* RGBA->BGRA swizzle here */
202bf215546Sopenharmony_ci   interp->a0    = _mm_setr_epi16(s0_fp[2], s0_fp[1], s0_fp[0], s0_fp[3],
203bf215546Sopenharmony_ci                                  s0_fp[6], s0_fp[5], s0_fp[4], s0_fp[7]);
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   interp->dadx  = _mm_setr_epi16(dsdx_fp[2], dsdx_fp[1], dsdx_fp[0], dsdx_fp[3],
206bf215546Sopenharmony_ci                                  dsdx_fp[2], dsdx_fp[1], dsdx_fp[0], dsdx_fp[3]);
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci   interp->dady  = _mm_setr_epi16(dsdy_fp[2], dsdy_fp[1], dsdy_fp[0], dsdy_fp[3],
209bf215546Sopenharmony_ci                                  dsdy_fp[2], dsdy_fp[1], dsdy_fp[0], dsdy_fp[3]);
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci   /* If the value is y-invariant, eagerly calculate it here and then
212bf215546Sopenharmony_ci    * always return the precalculated value.
213bf215546Sopenharmony_ci    */
214bf215546Sopenharmony_ci   if (dsdy[0] == 0 &&
215bf215546Sopenharmony_ci       dsdy[1] == 0 &&
216bf215546Sopenharmony_ci       dsdy[2] == 0 &&
217bf215546Sopenharmony_ci       dsdy[3] == 0) {
218bf215546Sopenharmony_ci      interp_0_8(&interp->base);
219bf215546Sopenharmony_ci      interp->base.fetch = interp_noop;
220bf215546Sopenharmony_ci   } else {
221bf215546Sopenharmony_ci      interp->base.fetch = interp_0_8;
222bf215546Sopenharmony_ci   }
223bf215546Sopenharmony_ci
224bf215546Sopenharmony_ci   return TRUE;
225bf215546Sopenharmony_ci}
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci#else
228bf215546Sopenharmony_ciboolean
229bf215546Sopenharmony_cilp_linear_init_interp(struct lp_linear_interp *interp,
230bf215546Sopenharmony_ci                      int x, int y, int width, int height,
231bf215546Sopenharmony_ci                      unsigned usage_mask,
232bf215546Sopenharmony_ci                      boolean perspective,
233bf215546Sopenharmony_ci                      float oow,
234bf215546Sopenharmony_ci                      const float *a0,
235bf215546Sopenharmony_ci                      const float *dadx,
236bf215546Sopenharmony_ci                      const float *dady)
237bf215546Sopenharmony_ci{
238bf215546Sopenharmony_ci   return FALSE;
239bf215546Sopenharmony_ci}
240bf215546Sopenharmony_ci#endif
241