1bf215546Sopenharmony_ci/************************************************************************** 2bf215546Sopenharmony_ci * 3bf215546Sopenharmony_ci * Copyright 2010-2021 VMware, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the 8bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including 9bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish, 10bf215546Sopenharmony_ci * distribute, sub license, and/or sell copies of the Software, and to 11bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to 12bf215546Sopenharmony_ci * the following conditions: 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci * 22bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the 23bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial portions 24bf215546Sopenharmony_ci * of the Software. 25bf215546Sopenharmony_ci * 26bf215546Sopenharmony_ci **************************************************************************/ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "pipe/p_config.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "util/u_math.h" 32bf215546Sopenharmony_ci#include "util/u_cpu_detect.h" 33bf215546Sopenharmony_ci#include "util/u_pack_color.h" 34bf215546Sopenharmony_ci#include "util/u_rect.h" 35bf215546Sopenharmony_ci#include "util/u_sse.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci#include "lp_jit.h" 38bf215546Sopenharmony_ci#include "lp_rast.h" 39bf215546Sopenharmony_ci#include "lp_debug.h" 40bf215546Sopenharmony_ci#include "lp_state_fs.h" 41bf215546Sopenharmony_ci#include "lp_linear_priv.h" 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci#if defined(PIPE_ARCH_SSE) 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci#define FIXED15_ONE 0x7fff 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci/* Translate floating point value to 1.15 unsigned fixed-point. 49bf215546Sopenharmony_ci */ 50bf215546Sopenharmony_cistatic inline ushort 51bf215546Sopenharmony_cifloat_to_ufixed_1_15(float f) 52bf215546Sopenharmony_ci{ 53bf215546Sopenharmony_ci return CLAMP((unsigned)(f * (float)FIXED15_ONE), 0, FIXED15_ONE); 54bf215546Sopenharmony_ci} 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci/* Translate floating point value to 1.15 signed fixed-point. 58bf215546Sopenharmony_ci */ 59bf215546Sopenharmony_cistatic inline int16_t 60bf215546Sopenharmony_cifloat_to_sfixed_1_15(float f) 61bf215546Sopenharmony_ci{ 62bf215546Sopenharmony_ci return CLAMP((signed)(f * (float)FIXED15_ONE), -FIXED15_ONE, FIXED15_ONE); 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ci/* Interpolate in 1.15 space, but produce a packed row of 0.8 values. 67bf215546Sopenharmony_ci */ 68bf215546Sopenharmony_cistatic const uint32_t * 69bf215546Sopenharmony_ciinterp_0_8(struct lp_linear_elem *elem) 70bf215546Sopenharmony_ci{ 71bf215546Sopenharmony_ci struct lp_linear_interp *interp = (struct lp_linear_interp *)elem; 72bf215546Sopenharmony_ci uint32_t *row = interp->row; 73bf215546Sopenharmony_ci __m128i a0 = interp->a0; 74bf215546Sopenharmony_ci const __m128i dadx = interp->dadx; 75bf215546Sopenharmony_ci const int width = (interp->width + 3) & ~3; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci for (int i = 0; i < width; i += 4) { 78bf215546Sopenharmony_ci __m128i l = _mm_srai_epi16(a0, 7); // l = a0 >> 7 79bf215546Sopenharmony_ci a0 = _mm_add_epi16(a0, dadx); // a0 += dadx 80bf215546Sopenharmony_ci 81bf215546Sopenharmony_ci __m128i h = _mm_srai_epi16(a0, 7); // h = a0 >> 7 82bf215546Sopenharmony_ci a0 = _mm_add_epi16(a0, dadx); // a0 += dadx 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci // pack l[0..7] and h[0..7] as 16 bytes 85bf215546Sopenharmony_ci *(__m128i *)&row[i] = _mm_packus_epi16(l, h); 86bf215546Sopenharmony_ci } 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci // advance to next row 89bf215546Sopenharmony_ci interp->a0 = _mm_add_epi16(interp->a0, interp->dady); 90bf215546Sopenharmony_ci return interp->row; 91bf215546Sopenharmony_ci} 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_cistatic const uint32_t * 94bf215546Sopenharmony_ciinterp_noop(struct lp_linear_elem *elem) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci struct lp_linear_interp *interp = (struct lp_linear_interp *)elem; 97bf215546Sopenharmony_ci return interp->row; 98bf215546Sopenharmony_ci} 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_cistatic const uint32_t * 102bf215546Sopenharmony_ciinterp_check(struct lp_linear_elem *elem) 103bf215546Sopenharmony_ci{ 104bf215546Sopenharmony_ci struct lp_linear_interp *interp = (struct lp_linear_interp *)elem; 105bf215546Sopenharmony_ci interp->row[0] = 1; 106bf215546Sopenharmony_ci return interp->row; 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci/* Not quite a noop - we use row[0] to track whether this gets called 110bf215546Sopenharmony_ci * or not, so we can optimize which interpolants we care about. 111bf215546Sopenharmony_ci */ 112bf215546Sopenharmony_civoid 113bf215546Sopenharmony_cilp_linear_init_noop_interp(struct lp_linear_interp *interp) 114bf215546Sopenharmony_ci{ 115bf215546Sopenharmony_ci interp->row[0] = 0; 116bf215546Sopenharmony_ci interp->base.fetch = interp_check; 117bf215546Sopenharmony_ci} 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ciboolean 120bf215546Sopenharmony_cilp_linear_init_interp(struct lp_linear_interp *interp, 121bf215546Sopenharmony_ci int x, int y, int width, int height, 122bf215546Sopenharmony_ci unsigned usage_mask, 123bf215546Sopenharmony_ci boolean perspective, 124bf215546Sopenharmony_ci float oow, 125bf215546Sopenharmony_ci const float *a0, 126bf215546Sopenharmony_ci const float *dadx, 127bf215546Sopenharmony_ci const float *dady) 128bf215546Sopenharmony_ci{ 129bf215546Sopenharmony_ci float s0[4]; 130bf215546Sopenharmony_ci float dsdx[4]; 131bf215546Sopenharmony_ci float dsdy[4]; 132bf215546Sopenharmony_ci int16_t s0_fp[8]; 133bf215546Sopenharmony_ci int16_t dsdx_fp[4]; 134bf215546Sopenharmony_ci int16_t dsdy_fp[4]; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci /* Zero coefficients to avoid using uninitialised values */ 137bf215546Sopenharmony_ci memset(s0, 0, sizeof(s0)); 138bf215546Sopenharmony_ci memset(dsdx, 0, sizeof(dsdx)); 139bf215546Sopenharmony_ci memset(dsdy, 0, sizeof(dsdy)); 140bf215546Sopenharmony_ci memset(s0_fp, 0, sizeof(s0_fp)); 141bf215546Sopenharmony_ci memset(dsdx_fp, 0, sizeof(dsdx_fp)); 142bf215546Sopenharmony_ci memset(dsdy_fp, 0, sizeof(dsdy_fp)); 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci if (perspective && oow != 1.0f) { 145bf215546Sopenharmony_ci for (unsigned j = 0; j < 4; j++) { 146bf215546Sopenharmony_ci if (usage_mask & (1<<j)) { 147bf215546Sopenharmony_ci s0[j] = a0[j] * oow; 148bf215546Sopenharmony_ci dsdx[j] = dadx[j] * oow; 149bf215546Sopenharmony_ci dsdy[j] = dady[j] * oow; 150bf215546Sopenharmony_ci } 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci } else { 153bf215546Sopenharmony_ci for (unsigned j = 0; j < 4; j++) { 154bf215546Sopenharmony_ci if (usage_mask & (1<<j)) { 155bf215546Sopenharmony_ci s0[j] = a0[j]; 156bf215546Sopenharmony_ci dsdx[j] = dadx[j]; 157bf215546Sopenharmony_ci dsdy[j] = dady[j]; 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci s0[0] += x * dsdx[0] + y * dsdy[0]; 163bf215546Sopenharmony_ci s0[1] += x * dsdx[1] + y * dsdy[1]; 164bf215546Sopenharmony_ci s0[2] += x * dsdx[2] + y * dsdy[2]; 165bf215546Sopenharmony_ci s0[3] += x * dsdx[3] + y * dsdy[3]; 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci /* XXX: lift all of this into the rectangle setup code. 168bf215546Sopenharmony_ci * 169bf215546Sopenharmony_ci * For rectangles with linear shaders, at setup time: 170bf215546Sopenharmony_ci * - if w is constant (else mark as non-fastpath) 171bf215546Sopenharmony_ci * - premultiply perspective interpolants by w 172bf215546Sopenharmony_ci * - set w = 1 in position 173bf215546Sopenharmony_ci * - check all interpolants for min/max 0..1 (else mark as 174bf215546Sopenharmony_ci * non-fastpath) 175bf215546Sopenharmony_ci */ 176bf215546Sopenharmony_ci for (unsigned j = 0; j < 4; j++) { 177bf215546Sopenharmony_ci if (usage_mask & (1<<j)) { 178bf215546Sopenharmony_ci // compute texcoords at rect corners 179bf215546Sopenharmony_ci float a = s0[j]; 180bf215546Sopenharmony_ci float b = s0[j] + (width - 1) * dsdx[j]; 181bf215546Sopenharmony_ci float c = s0[j] + (height - 1) * dsdy[j]; 182bf215546Sopenharmony_ci float d = s0[j] + (height - 1) * dsdy[j] + (width - 1) * dsdx[j]; 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci if (MIN4(a,b,c,d) < 0.0) 185bf215546Sopenharmony_ci FAIL("min < 0.0"); // out of bounds 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci if (MAX4(a,b,c,d) > 1.0) 188bf215546Sopenharmony_ci FAIL("max > 1.0"); // out of bounds 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci dsdx_fp[j] = float_to_sfixed_1_15(dsdx[j]); 191bf215546Sopenharmony_ci dsdy_fp[j] = float_to_sfixed_1_15(dsdy[j]); 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci s0_fp[j] = float_to_ufixed_1_15(s0[j]); // first pixel 194bf215546Sopenharmony_ci s0_fp[j + 4] = s0_fp[j] + dsdx_fp[j]; // second pixel 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci dsdx_fp[j] *= 2; 197bf215546Sopenharmony_ci } 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci interp->width = align(width, 4); 201bf215546Sopenharmony_ci /* RGBA->BGRA swizzle here */ 202bf215546Sopenharmony_ci interp->a0 = _mm_setr_epi16(s0_fp[2], s0_fp[1], s0_fp[0], s0_fp[3], 203bf215546Sopenharmony_ci s0_fp[6], s0_fp[5], s0_fp[4], s0_fp[7]); 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci interp->dadx = _mm_setr_epi16(dsdx_fp[2], dsdx_fp[1], dsdx_fp[0], dsdx_fp[3], 206bf215546Sopenharmony_ci dsdx_fp[2], dsdx_fp[1], dsdx_fp[0], dsdx_fp[3]); 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci interp->dady = _mm_setr_epi16(dsdy_fp[2], dsdy_fp[1], dsdy_fp[0], dsdy_fp[3], 209bf215546Sopenharmony_ci dsdy_fp[2], dsdy_fp[1], dsdy_fp[0], dsdy_fp[3]); 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci /* If the value is y-invariant, eagerly calculate it here and then 212bf215546Sopenharmony_ci * always return the precalculated value. 213bf215546Sopenharmony_ci */ 214bf215546Sopenharmony_ci if (dsdy[0] == 0 && 215bf215546Sopenharmony_ci dsdy[1] == 0 && 216bf215546Sopenharmony_ci dsdy[2] == 0 && 217bf215546Sopenharmony_ci dsdy[3] == 0) { 218bf215546Sopenharmony_ci interp_0_8(&interp->base); 219bf215546Sopenharmony_ci interp->base.fetch = interp_noop; 220bf215546Sopenharmony_ci } else { 221bf215546Sopenharmony_ci interp->base.fetch = interp_0_8; 222bf215546Sopenharmony_ci } 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci return TRUE; 225bf215546Sopenharmony_ci} 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci#else 228bf215546Sopenharmony_ciboolean 229bf215546Sopenharmony_cilp_linear_init_interp(struct lp_linear_interp *interp, 230bf215546Sopenharmony_ci int x, int y, int width, int height, 231bf215546Sopenharmony_ci unsigned usage_mask, 232bf215546Sopenharmony_ci boolean perspective, 233bf215546Sopenharmony_ci float oow, 234bf215546Sopenharmony_ci const float *a0, 235bf215546Sopenharmony_ci const float *dadx, 236bf215546Sopenharmony_ci const float *dady) 237bf215546Sopenharmony_ci{ 238bf215546Sopenharmony_ci return FALSE; 239bf215546Sopenharmony_ci} 240bf215546Sopenharmony_ci#endif 241