1/************************************************************************** 2 * 3 * Copyright 2010-2021 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20 * USE OR OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * The above copyright notice and this permission notice (including the 23 * next paragraph) shall be included in all copies or substantial portions 24 * of the Software. 25 * 26 **************************************************************************/ 27 28 29#include "pipe/p_config.h" 30 31#include "util/u_math.h" 32#include "util/u_cpu_detect.h" 33#include "util/u_pack_color.h" 34#include "util/u_rect.h" 35#include "util/u_sse.h" 36 37#include "lp_jit.h" 38#include "lp_rast.h" 39#include "lp_debug.h" 40#include "lp_state_fs.h" 41#include "lp_linear_priv.h" 42 43 44#if defined(PIPE_ARCH_SSE) 45 46 47/* For debugging (LP_DEBUG=linear), shade areas of run-time fallback 48 * purple. Keep blending active so we can see more of what's going 49 * on. 50 */ 51static boolean 52linear_fallback(const struct lp_rast_state *state, 53 unsigned x, unsigned y, 54 unsigned width, unsigned height, 55 uint8_t *color, 56 unsigned stride) 57{ 58 unsigned col = 0x808000ff; 59 int i; 60 61 for (y = 0; y < height; y++) { 62 for (i = 0; i < 64; i++) { 63 *((uint32_t *)(color + y*stride) + x + i) = col; 64 } 65 } 66 67 return TRUE; 68} 69 70 71/* 72 * Run our configurable linear shader pipeline: 73 * x,y is the surface position of the linear region, width, height is the size. 74 * Return TRUE for success, FALSE otherwise. 75 */ 76static boolean 77lp_fs_linear_run(const struct lp_rast_state *state, 78 unsigned x, unsigned y, 79 unsigned width, unsigned height, 80 const float (*a0)[4], 81 const float (*dadx)[4], 82 const float (*dady)[4], 83 uint8_t *color, 84 unsigned stride) 85{ 86 const struct lp_fragment_shader_variant *variant = state->variant; 87 const struct lp_tgsi_info *info = &variant->shader->info; 88 uint8_t constants[LP_MAX_LINEAR_CONSTANTS * 4]; 89 90 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 91 92 /* Require constant w in these rectangles: 93 */ 94 if (dadx[0][3] != 0.0f || 95 dady[0][3] != 0.0f) { 96 if (LP_DEBUG & DEBUG_LINEAR2) 97 debug_printf(" -- w not constant\n"); 98 goto fail; 99 } 100 101 /* XXX: Per statechange: 102 */ 103 int nr_consts; // in floats, not float[4] 104 if (variant->shader->base.type == PIPE_SHADER_IR_TGSI) { 105 nr_consts = (info->base.file_max[TGSI_FILE_CONSTANT] + 1) * 4; 106 } else { 107 nr_consts = state->jit_context.num_constants[0]; 108 } 109 for (int i = 0; i < nr_consts; i++){ 110 float val = state->jit_context.constants[0][i]; 111 if (val < 0.0f || val > 1.0f) { 112 if (LP_DEBUG & DEBUG_LINEAR2) 113 debug_printf(" -- const[%d] out of range %f\n", i, val); 114 goto fail; 115 } 116 constants[i] = (uint8_t)(val * 255.0f); 117 } 118 119 struct lp_jit_linear_context jit; 120 jit.constants = (const uint8_t (*)[4])constants; 121 122 /* We assume BGRA ordering */ 123 assert(variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8X8_UNORM || 124 variant->key.cbuf_format[0] == PIPE_FORMAT_B8G8R8A8_UNORM); 125 126 jit.blend_color = 127 state->jit_context.u8_blend_color[32] + 128 (state->jit_context.u8_blend_color[16] << 8) + 129 (state->jit_context.u8_blend_color[0] << 16) + 130 (state->jit_context.u8_blend_color[48] << 24); 131 132 jit.alpha_ref_value = float_to_ubyte(state->jit_context.alpha_ref_value); 133 134 /* XXX: Per primitive: 135 */ 136 struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS]; 137 const float oow = 1.0f / a0[0][3]; 138 unsigned input_mask = variant->linear_input_mask; 139 while (input_mask) { 140 int i = u_bit_scan(&input_mask); 141 unsigned usage_mask = info->base.input_usage_mask[i]; 142 boolean perspective = 143 info->base.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE || 144 (info->base.input_interpolate[i] == TGSI_INTERPOLATE_COLOR && 145 !variant->key.flatshade); 146 if (!lp_linear_init_interp(&interp[i], 147 x, y, width, height, 148 usage_mask, 149 perspective, 150 oow, 151 a0[i+1], 152 dadx[i+1], 153 dady[i+1])) { 154 if (LP_DEBUG & DEBUG_LINEAR2) 155 debug_printf(" -- init_interp(%d) failed\n", i); 156 goto fail; 157 } 158 159 jit.inputs[i] = &interp[i].base; 160 } 161 162 /* XXX: Per primitive: Initialize linear or nearest samplers: 163 */ 164 struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES]; 165 const int nr_tex = info->num_texs; 166 for (int i = 0; i < nr_tex; i++) { 167 const struct lp_tgsi_texture_info *tex_info = &info->tex[i]; 168 const unsigned tex_unit = tex_info->texture_unit; 169 const unsigned samp_unit = tex_info->sampler_unit; 170 //const unsigned fs_s_input = tex_info->coord[0].u.index; 171 //const unsigned fs_t_input = tex_info->coord[1].u.index; 172 173 // xxx investigate why these fail in deqp-vk 174 //assert(variant->linear_input_mask & (1 << fs_s_input)); 175 //assert(variant->linear_input_mask & (1 << fs_t_input)); 176 177 /* XXX: some texture coordinates are linear! 178 */ 179 //boolean perspective = (info->base.input_interpolate[i] == 180 // TGSI_INTERPOLATE_PERSPECTIVE); 181 182 if (!lp_linear_init_sampler(&samp[i], tex_info, 183 lp_fs_variant_key_sampler_idx(&variant->key, samp_unit), 184 &state->jit_context.textures[tex_unit], 185 x, y, width, height, a0, dadx, dady)) { 186 if (LP_DEBUG & DEBUG_LINEAR2) 187 debug_printf(" -- init_sampler(%d) failed\n", i); 188 goto fail; 189 } 190 191 jit.tex[i] = &samp[i].base; 192 } 193 194 /* JIT function already does blending */ 195 jit.color0 = color + x * 4 + y * stride; 196 lp_jit_linear_llvm_func jit_func = variant->jit_linear_llvm; 197 198 for (unsigned iy = 0; iy < height; iy++) { 199 jit_func(&jit, 0, 0, width); // x=0, y=0 200 jit.color0 += stride; 201 } 202 203 return TRUE; 204 205fail: 206 /* Visually distinguish this from other fallbacks: 207 */ 208 if (LP_DEBUG & DEBUG_LINEAR) { 209 return linear_fallback(state, x, y, width, height, color, stride); 210 } 211 212 return FALSE; 213} 214 215 216static void 217check_linear_interp_mask_a(struct lp_fragment_shader_variant *variant) 218{ 219 const struct lp_tgsi_info *info = &variant->shader->info; 220 struct lp_jit_linear_context jit; 221 222 struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES]; 223 struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS]; 224 uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4]; 225 alignas(16) uint8_t color0[TILE_SIZE*4]; 226 227 const int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1; 228 const int nr_tex = info->num_texs; 229 230 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 231 232 jit.constants = (const uint8_t (*)[4])constants; 233 234 for (int i = 0; i < nr_tex; i++) { 235 lp_linear_init_noop_sampler(&samp[i]); 236 jit.tex[i] = &samp[i].base; 237 } 238 239 for (int i = 0; i < nr_inputs; i++) { 240 lp_linear_init_noop_interp(&interp[i]); 241 jit.inputs[i] = &interp[i].base; 242 } 243 244 jit.color0 = color0; 245 246 (void)variant->jit_linear_llvm(&jit, 0, 0, 0); 247 248 /* Find out which interpolators were called, and store this as a 249 * mask: 250 */ 251 for (int i = 0; i < nr_inputs; i++) { 252 variant->linear_input_mask |= (interp[i].row[0] << i); 253 } 254} 255 256 257/* Until the above is working, look at texture information and guess 258 * that any input used as a texture coordinate is not used for 259 * anything else. 260 */ 261static void 262check_linear_interp_mask_b(struct lp_fragment_shader_variant *variant) 263{ 264 const struct lp_tgsi_info *info = &variant->shader->info; 265 int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1; 266 int nr_tex = info->num_texs; 267 unsigned tex_mask = 0; 268 int i; 269 270 LP_DBG(DEBUG_RAST, "%s\n", __FUNCTION__); 271 272 for (i = 0; i < nr_tex; i++) { 273 const struct lp_tgsi_texture_info *tex_info = &info->tex[i]; 274 const struct lp_tgsi_channel_info *schan = &tex_info->coord[0]; 275 const struct lp_tgsi_channel_info *tchan = &tex_info->coord[1]; 276 tex_mask |= 1 << schan->u.index; 277 tex_mask |= 1 << tchan->u.index; 278 } 279 280 variant->linear_input_mask = ((1 << nr_inputs) - 1) & ~tex_mask; 281} 282 283 284void 285lp_linear_check_variant(struct lp_fragment_shader_variant *variant) 286{ 287 const struct lp_fragment_shader_variant_key *key = &variant->key; 288 const struct lp_fragment_shader *shader = variant->shader; 289 const struct lp_tgsi_info *info = &shader->info; 290 291 if (info->base.file_max[TGSI_FILE_CONSTANT] >= LP_MAX_LINEAR_CONSTANTS || 292 info->base.file_max[TGSI_FILE_INPUT] >= LP_MAX_LINEAR_INPUTS) { 293 if (LP_DEBUG & DEBUG_LINEAR) 294 debug_printf(" -- too many inputs/constants\n"); 295 goto fail; 296 } 297 298 /* If we have a fastpath which implements the entire variant, use 299 * that. 300 */ 301 if (lp_linear_check_fastpath(variant)) { 302 return; 303 } 304 305 /* Otherwise, can we build up a spanline-based linear path for this 306 * variant? 307 */ 308 309 /* Check static sampler state. 310 */ 311 for (unsigned i = 0; i < info->num_texs; i++) { 312 const struct lp_tgsi_texture_info *tex_info = &info->tex[i]; 313 const unsigned unit = tex_info->sampler_unit; 314 315 /* XXX: Relax this once setup premultiplies by oow: 316 */ 317 if (info->base.input_interpolate[unit] != TGSI_INTERPOLATE_PERSPECTIVE) { 318 if (LP_DEBUG & DEBUG_LINEAR) 319 debug_printf(" -- samp[%d]: texcoord not perspective\n", i); 320 goto fail; 321 } 322 323 struct lp_sampler_static_state *samp = 324 lp_fs_variant_key_sampler_idx(key, unit); 325 if (!lp_linear_check_sampler(samp, tex_info)) { 326 if (LP_DEBUG & DEBUG_LINEAR) 327 debug_printf(" -- samp[%d]: check_sampler failed\n", i); 328 goto fail; 329 } 330 } 331 332 /* Check shader. May not have been jitted. 333 */ 334 if (variant->linear_function == NULL) { 335 if (LP_DEBUG & DEBUG_LINEAR) 336 debug_printf(" -- no linear shader\n"); 337 goto fail; 338 } 339 340 /* Hook in the catchall shader runner: 341 */ 342 variant->jit_linear = lp_fs_linear_run; 343 344 /* Figure out which inputs we don't need to interpolate (because 345 * they are only used as texture coordinates). This is important 346 * as we can cope with texture coordinates which exceed 1.0, but 347 * cannot do so for regular inputs. 348 */ 349 if (1) 350 check_linear_interp_mask_a(variant); 351 else 352 check_linear_interp_mask_b(variant); 353 354 355 if (0) { 356 lp_debug_fs_variant(variant); 357 debug_printf("linear input mask: 0x%x\n", variant->linear_input_mask); 358 } 359 360 return; 361 362fail: 363 if (LP_DEBUG & DEBUG_LINEAR) { 364 lp_debug_fs_variant(variant); 365 debug_printf(" ----> no linear path for this variant\n"); 366 } 367} 368 369 370#else 371void 372lp_linear_check_variant(struct lp_fragment_shader_variant *variant) 373{ 374} 375#endif 376