1/* 2 * Copyright © 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24/* 25 * This lowering pass supports (as configured via nir_lower_tex_options) 26 * various texture related conversions: 27 * + texture projector lowering: converts the coordinate division for 28 * texture projection to be done in ALU instructions instead of 29 * asking the texture operation to do so. 30 * + lowering RECT: converts the un-normalized RECT texture coordinates 31 * to normalized coordinates with txs plus ALU instructions 32 * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes, 33 * inserts instructions to clamp specified coordinates to [0.0, 1.0]. 34 * Note that this automatically triggers texture projector lowering if 35 * needed, since clamping must happen after projector lowering. 36 * + YUV-to-RGB conversion: to allow sampling YUV values as RGB values 37 * according to a specific YUV color space and range. 38 */ 39 40#include "nir.h" 41#include "nir_builder.h" 42#include "nir_builtin_builder.h" 43#include "nir_format_convert.h" 44 45typedef struct nir_const_value_3_4 { 46 nir_const_value v[3][4]; 47} nir_const_value_3_4; 48 49static const nir_const_value_3_4 bt601_limited_range_csc_coeffs = { { 50 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 51 { { .f32 = 0.0f }, { .f32 = -0.39176229f }, { .f32 = 2.01723214f } }, 52 { { .f32 = 1.59602678f }, { .f32 = -0.81296764f }, { .f32 = 0.0f } }, 53} }; 54static const nir_const_value_3_4 bt601_full_range_csc_coeffs = { { 55 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } }, 56 { { .f32 = 0.0f }, { .f32 = -0.34413629f }, { .f32 = 1.772f } }, 57 { { .f32 = 1.402f }, { .f32 = -0.71413629f }, { .f32 = 0.0f } }, 58} }; 59static const nir_const_value_3_4 bt709_limited_range_csc_coeffs = { { 60 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 61 { { .f32 = 0.0f }, { .f32 = -0.21324861f }, { .f32 = 2.11240179f } }, 62 { { .f32 = 1.79274107f }, { .f32 = -0.53290933f }, { .f32 = 0.0f } }, 63} }; 64static const nir_const_value_3_4 bt709_full_range_csc_coeffs = { { 65 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } }, 66 { { .f32 = 0.0f }, { .f32 = -0.18732427f }, { .f32 = 1.8556f } }, 67 { { .f32 = 1.5748f }, { .f32 = -0.46812427f }, { .f32 = 0.0f } }, 68} }; 69static const nir_const_value_3_4 bt2020_limited_range_csc_coeffs = { { 70 { { .f32 = 1.16438356f }, { .f32 = 1.16438356f }, { .f32 = 1.16438356f } }, 71 { { .f32 = 0.0f }, { .f32 = -0.18732610f }, { .f32 = 2.14177232f } }, 72 { { .f32 = 1.67878795f }, { .f32 = -0.65046843f }, { .f32 = 0.0f } }, 73} }; 74static const nir_const_value_3_4 bt2020_full_range_csc_coeffs = { { 75 { { .f32 = 1.0f }, { .f32 = 1.0f }, { .f32 = 1.0f } }, 76 { { .f32 = 0.0f }, { .f32 = -0.16455313f }, { .f32 = 1.88140000f } }, 77 { { .f32 = 1.4747f }, { .f32 = -0.57139187f }, { .f32 = 0.0f } }, 78} }; 79 80static const float bt601_limited_range_csc_offsets[3] = { 81 -0.874202218f, 0.531667823f, -1.085630789f 82}; 83static const float bt601_full_range_csc_offsets[3] = { 84 -0.701000000f, 0.529136286f, -0.886000000f 85}; 86static const float bt709_limited_range_csc_offsets[3] = { 87 -0.972945075f, 0.301482665f, -1.133402218f 88}; 89static const float bt709_full_range_csc_offsets[3] = { 90 -0.787400000f, 0.327724273f, -0.927800000f 91}; 92static const float bt2020_limited_range_csc_offsets[3] = { 93 -0.915745075f, 0.347480639f, -1.148145075f 94}; 95static const float bt2020_full_range_csc_offsets[3] = { 96 -0.737350000f, 0.367972500f, -0.940700000f 97}; 98 99static bool 100project_src(nir_builder *b, nir_tex_instr *tex) 101{ 102 /* Find the projector in the srcs list, if present. */ 103 int proj_index = nir_tex_instr_src_index(tex, nir_tex_src_projector); 104 if (proj_index < 0) 105 return false; 106 107 b->cursor = nir_before_instr(&tex->instr); 108 109 nir_ssa_def *inv_proj = 110 nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); 111 112 /* Walk through the sources projecting the arguments. */ 113 for (unsigned i = 0; i < tex->num_srcs; i++) { 114 switch (tex->src[i].src_type) { 115 case nir_tex_src_coord: 116 case nir_tex_src_comparator: 117 break; 118 default: 119 continue; 120 } 121 nir_ssa_def *unprojected = 122 nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); 123 nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); 124 125 /* Array indices don't get projected, so make an new vector with the 126 * coordinate's array index untouched. 127 */ 128 if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { 129 switch (tex->coord_components) { 130 case 4: 131 projected = nir_vec4(b, 132 nir_channel(b, projected, 0), 133 nir_channel(b, projected, 1), 134 nir_channel(b, projected, 2), 135 nir_channel(b, unprojected, 3)); 136 break; 137 case 3: 138 projected = nir_vec3(b, 139 nir_channel(b, projected, 0), 140 nir_channel(b, projected, 1), 141 nir_channel(b, unprojected, 2)); 142 break; 143 case 2: 144 projected = nir_vec2(b, 145 nir_channel(b, projected, 0), 146 nir_channel(b, unprojected, 1)); 147 break; 148 default: 149 unreachable("bad texture coord count for array"); 150 break; 151 } 152 } 153 154 nir_instr_rewrite_src(&tex->instr, 155 &tex->src[i].src, 156 nir_src_for_ssa(projected)); 157 } 158 159 nir_tex_instr_remove_src(tex, proj_index); 160 return true; 161} 162 163static bool 164lower_offset(nir_builder *b, nir_tex_instr *tex) 165{ 166 int offset_index = nir_tex_instr_src_index(tex, nir_tex_src_offset); 167 if (offset_index < 0) 168 return false; 169 170 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 171 assert(coord_index >= 0); 172 173 assert(tex->src[offset_index].src.is_ssa); 174 assert(tex->src[coord_index].src.is_ssa); 175 nir_ssa_def *offset = tex->src[offset_index].src.ssa; 176 nir_ssa_def *coord = tex->src[coord_index].src.ssa; 177 178 b->cursor = nir_before_instr(&tex->instr); 179 180 nir_ssa_def *offset_coord; 181 if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) { 182 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 183 offset_coord = nir_fadd(b, coord, nir_i2f32(b, offset)); 184 } else { 185 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 186 nir_ssa_def *scale = nir_frcp(b, txs); 187 188 offset_coord = nir_fadd(b, coord, 189 nir_fmul(b, 190 nir_i2f32(b, offset), 191 scale)); 192 } 193 } else { 194 offset_coord = nir_iadd(b, coord, offset); 195 } 196 197 if (tex->is_array) { 198 /* The offset is not applied to the array index */ 199 if (tex->coord_components == 2) { 200 offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0), 201 nir_channel(b, coord, 1)); 202 } else if (tex->coord_components == 3) { 203 offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0), 204 nir_channel(b, offset_coord, 1), 205 nir_channel(b, coord, 2)); 206 } else { 207 unreachable("Invalid number of components"); 208 } 209 } 210 211 nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src, 212 nir_src_for_ssa(offset_coord)); 213 214 nir_tex_instr_remove_src(tex, offset_index); 215 216 return true; 217} 218 219static void 220lower_rect(nir_builder *b, nir_tex_instr *tex) 221{ 222 /* Set the sampler_dim to 2D here so that get_texture_size picks up the 223 * right dimensionality. 224 */ 225 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 226 227 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 228 nir_ssa_def *scale = nir_frcp(b, txs); 229 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 230 231 if (coord_index != -1) { 232 nir_ssa_def *coords = 233 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 234 nir_instr_rewrite_src(&tex->instr, 235 &tex->src[coord_index].src, 236 nir_src_for_ssa(nir_fmul(b, coords, scale))); 237 } 238} 239 240static void 241lower_rect_tex_scale(nir_builder *b, nir_tex_instr *tex) 242{ 243 b->cursor = nir_before_instr(&tex->instr); 244 245 nir_ssa_def *idx = nir_imm_int(b, tex->texture_index); 246 nir_ssa_def *scale = nir_build_load_texture_rect_scaling(b, 32, idx); 247 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 248 249 if (coord_index != -1) { 250 nir_ssa_def *coords = 251 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 252 nir_instr_rewrite_src(&tex->instr, 253 &tex->src[coord_index].src, 254 nir_src_for_ssa(nir_fmul(b, coords, scale))); 255 } 256} 257 258static void 259lower_lod(nir_builder *b, nir_tex_instr *tex, nir_ssa_def *lod) 260{ 261 assert(tex->op == nir_texop_tex || tex->op == nir_texop_txb); 262 assert(nir_tex_instr_src_index(tex, nir_tex_src_lod) < 0); 263 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddx) < 0); 264 assert(nir_tex_instr_src_index(tex, nir_tex_src_ddy) < 0); 265 266 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 267 if (bias_idx >= 0) { 268 /* If we have a bias, add it in */ 269 lod = nir_fadd(b, lod, nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); 270 nir_tex_instr_remove_src(tex, bias_idx); 271 } 272 273 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); 274 if (min_lod_idx >= 0) { 275 /* If we have a minimum LOD, clamp LOD accordingly */ 276 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); 277 nir_tex_instr_remove_src(tex, min_lod_idx); 278 } 279 280 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod)); 281 tex->op = nir_texop_txl; 282} 283 284static void 285lower_implicit_lod(nir_builder *b, nir_tex_instr *tex) 286{ 287 b->cursor = nir_before_instr(&tex->instr); 288 lower_lod(b, tex, nir_get_texture_lod(b, tex)); 289} 290 291static void 292lower_zero_lod(nir_builder *b, nir_tex_instr *tex) 293{ 294 b->cursor = nir_before_instr(&tex->instr); 295 296 if (tex->op == nir_texop_lod) { 297 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_imm_int(b, 0)); 298 nir_instr_remove(&tex->instr); 299 return; 300 } 301 302 lower_lod(b, tex, nir_imm_int(b, 0)); 303} 304 305static nir_ssa_def * 306sample_plane(nir_builder *b, nir_tex_instr *tex, int plane, 307 const nir_lower_tex_options *options) 308{ 309 assert(tex->dest.is_ssa); 310 assert(nir_tex_instr_dest_size(tex) == 4); 311 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 312 assert(tex->op == nir_texop_tex); 313 assert(tex->coord_components == 2); 314 315 nir_tex_instr *plane_tex = 316 nir_tex_instr_create(b->shader, tex->num_srcs + 1); 317 for (unsigned i = 0; i < tex->num_srcs; i++) { 318 nir_src_copy(&plane_tex->src[i].src, &tex->src[i].src); 319 plane_tex->src[i].src_type = tex->src[i].src_type; 320 } 321 plane_tex->src[tex->num_srcs].src = nir_src_for_ssa(nir_imm_int(b, plane)); 322 plane_tex->src[tex->num_srcs].src_type = nir_tex_src_plane; 323 plane_tex->op = nir_texop_tex; 324 plane_tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 325 plane_tex->dest_type = nir_type_float | nir_dest_bit_size(tex->dest); 326 plane_tex->coord_components = 2; 327 328 plane_tex->texture_index = tex->texture_index; 329 plane_tex->sampler_index = tex->sampler_index; 330 331 nir_ssa_dest_init(&plane_tex->instr, &plane_tex->dest, 4, 332 nir_dest_bit_size(tex->dest), NULL); 333 334 nir_builder_instr_insert(b, &plane_tex->instr); 335 336 /* If scaling_factor is set, return a scaled value. */ 337 if (options->scale_factors[tex->texture_index]) 338 return nir_fmul_imm(b, &plane_tex->dest.ssa, 339 options->scale_factors[tex->texture_index]); 340 341 return &plane_tex->dest.ssa; 342} 343 344static void 345convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex, 346 nir_ssa_def *y, nir_ssa_def *u, nir_ssa_def *v, 347 nir_ssa_def *a, 348 const nir_lower_tex_options *options, 349 unsigned texture_index) 350{ 351 352 const float *offset_vals; 353 const nir_const_value_3_4 *m; 354 assert((options->bt709_external & options->bt2020_external) == 0); 355 if (options->yuv_full_range_external & (1u << texture_index)) { 356 if (options->bt709_external & (1u << texture_index)) { 357 m = &bt709_full_range_csc_coeffs; 358 offset_vals = bt709_full_range_csc_offsets; 359 } else if (options->bt2020_external & (1u << texture_index)) { 360 m = &bt2020_full_range_csc_coeffs; 361 offset_vals = bt2020_full_range_csc_offsets; 362 } else { 363 m = &bt601_full_range_csc_coeffs; 364 offset_vals = bt601_full_range_csc_offsets; 365 } 366 } else { 367 if (options->bt709_external & (1u << texture_index)) { 368 m = &bt709_limited_range_csc_coeffs; 369 offset_vals = bt709_limited_range_csc_offsets; 370 } else if (options->bt2020_external & (1u << texture_index)) { 371 m = &bt2020_limited_range_csc_coeffs; 372 offset_vals = bt2020_limited_range_csc_offsets; 373 } else { 374 m = &bt601_limited_range_csc_coeffs; 375 offset_vals = bt601_limited_range_csc_offsets; 376 } 377 } 378 379 unsigned bit_size = nir_dest_bit_size(tex->dest); 380 381 nir_ssa_def *offset = 382 nir_vec4(b, 383 nir_imm_floatN_t(b, offset_vals[0], a->bit_size), 384 nir_imm_floatN_t(b, offset_vals[1], a->bit_size), 385 nir_imm_floatN_t(b, offset_vals[2], a->bit_size), 386 a); 387 388 offset = nir_f2fN(b, offset, bit_size); 389 390 nir_ssa_def *m0 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[0]), bit_size); 391 nir_ssa_def *m1 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[1]), bit_size); 392 nir_ssa_def *m2 = nir_f2fN(b, nir_build_imm(b, 4, 32, m->v[2]), bit_size); 393 394 nir_ssa_def *result = 395 nir_ffma(b, y, m0, nir_ffma(b, u, m1, nir_ffma(b, v, m2, offset))); 396 397 nir_ssa_def_rewrite_uses(&tex->dest.ssa, result); 398} 399 400static void 401lower_y_uv_external(nir_builder *b, nir_tex_instr *tex, 402 const nir_lower_tex_options *options, 403 unsigned texture_index) 404{ 405 b->cursor = nir_after_instr(&tex->instr); 406 407 nir_ssa_def *y = sample_plane(b, tex, 0, options); 408 nir_ssa_def *uv = sample_plane(b, tex, 1, options); 409 410 convert_yuv_to_rgb(b, tex, 411 nir_channel(b, y, 0), 412 nir_channel(b, uv, 0), 413 nir_channel(b, uv, 1), 414 nir_imm_float(b, 1.0f), 415 options, 416 texture_index); 417} 418 419static void 420lower_y_u_v_external(nir_builder *b, nir_tex_instr *tex, 421 const nir_lower_tex_options *options, 422 unsigned texture_index) 423{ 424 b->cursor = nir_after_instr(&tex->instr); 425 426 nir_ssa_def *y = sample_plane(b, tex, 0, options); 427 nir_ssa_def *u = sample_plane(b, tex, 1, options); 428 nir_ssa_def *v = sample_plane(b, tex, 2, options); 429 430 convert_yuv_to_rgb(b, tex, 431 nir_channel(b, y, 0), 432 nir_channel(b, u, 0), 433 nir_channel(b, v, 0), 434 nir_imm_float(b, 1.0f), 435 options, 436 texture_index); 437} 438 439static void 440lower_yx_xuxv_external(nir_builder *b, nir_tex_instr *tex, 441 const nir_lower_tex_options *options, 442 unsigned texture_index) 443{ 444 b->cursor = nir_after_instr(&tex->instr); 445 446 nir_ssa_def *y = sample_plane(b, tex, 0, options); 447 nir_ssa_def *xuxv = sample_plane(b, tex, 1, options); 448 449 convert_yuv_to_rgb(b, tex, 450 nir_channel(b, y, 0), 451 nir_channel(b, xuxv, 1), 452 nir_channel(b, xuxv, 3), 453 nir_imm_float(b, 1.0f), 454 options, 455 texture_index); 456} 457 458static void 459lower_xy_uxvx_external(nir_builder *b, nir_tex_instr *tex, 460 const nir_lower_tex_options *options, 461 unsigned texture_index) 462{ 463 b->cursor = nir_after_instr(&tex->instr); 464 465 nir_ssa_def *y = sample_plane(b, tex, 0, options); 466 nir_ssa_def *uxvx = sample_plane(b, tex, 1, options); 467 468 convert_yuv_to_rgb(b, tex, 469 nir_channel(b, y, 1), 470 nir_channel(b, uxvx, 0), 471 nir_channel(b, uxvx, 2), 472 nir_imm_float(b, 1.0f), 473 options, 474 texture_index); 475} 476 477static void 478lower_ayuv_external(nir_builder *b, nir_tex_instr *tex, 479 const nir_lower_tex_options *options, 480 unsigned texture_index) 481{ 482 b->cursor = nir_after_instr(&tex->instr); 483 484 nir_ssa_def *ayuv = sample_plane(b, tex, 0, options); 485 486 convert_yuv_to_rgb(b, tex, 487 nir_channel(b, ayuv, 2), 488 nir_channel(b, ayuv, 1), 489 nir_channel(b, ayuv, 0), 490 nir_channel(b, ayuv, 3), 491 options, 492 texture_index); 493} 494 495static void 496lower_y41x_external(nir_builder *b, nir_tex_instr *tex, 497 const nir_lower_tex_options *options, 498 unsigned texture_index) 499{ 500 b->cursor = nir_after_instr(&tex->instr); 501 502 nir_ssa_def *y41x = sample_plane(b, tex, 0, options); 503 504 convert_yuv_to_rgb(b, tex, 505 nir_channel(b, y41x, 1), 506 nir_channel(b, y41x, 0), 507 nir_channel(b, y41x, 2), 508 nir_channel(b, y41x, 3), 509 options, 510 texture_index); 511} 512 513static void 514lower_xyuv_external(nir_builder *b, nir_tex_instr *tex, 515 const nir_lower_tex_options *options, 516 unsigned texture_index) 517{ 518 b->cursor = nir_after_instr(&tex->instr); 519 520 nir_ssa_def *xyuv = sample_plane(b, tex, 0, options); 521 522 convert_yuv_to_rgb(b, tex, 523 nir_channel(b, xyuv, 2), 524 nir_channel(b, xyuv, 1), 525 nir_channel(b, xyuv, 0), 526 nir_imm_float(b, 1.0f), 527 options, 528 texture_index); 529} 530 531static void 532lower_yuv_external(nir_builder *b, nir_tex_instr *tex, 533 const nir_lower_tex_options *options, 534 unsigned texture_index) 535{ 536 b->cursor = nir_after_instr(&tex->instr); 537 538 nir_ssa_def *yuv = sample_plane(b, tex, 0, options); 539 540 convert_yuv_to_rgb(b, tex, 541 nir_channel(b, yuv, 0), 542 nir_channel(b, yuv, 1), 543 nir_channel(b, yuv, 2), 544 nir_imm_float(b, 1.0f), 545 options, 546 texture_index); 547} 548 549static void 550lower_yu_yv_external(nir_builder *b, nir_tex_instr *tex, 551 const nir_lower_tex_options *options, 552 unsigned texture_index) 553{ 554 b->cursor = nir_after_instr(&tex->instr); 555 556 nir_ssa_def *yuv = sample_plane(b, tex, 0, options); 557 558 convert_yuv_to_rgb(b, tex, 559 nir_channel(b, yuv, 1), 560 nir_channel(b, yuv, 2), 561 nir_channel(b, yuv, 0), 562 nir_imm_float(b, 1.0f), 563 options, 564 texture_index); 565} 566 567/* 568 * Converts a nir_texop_txd instruction to nir_texop_txl with the given lod 569 * computed from the gradients. 570 */ 571static void 572replace_gradient_with_lod(nir_builder *b, nir_ssa_def *lod, nir_tex_instr *tex) 573{ 574 assert(tex->op == nir_texop_txd); 575 576 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddx)); 577 nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_ddy)); 578 579 int min_lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_min_lod); 580 if (min_lod_idx >= 0) { 581 /* If we have a minimum LOD, clamp LOD accordingly */ 582 lod = nir_fmax(b, lod, nir_ssa_for_src(b, tex->src[min_lod_idx].src, 1)); 583 nir_tex_instr_remove_src(tex, min_lod_idx); 584 } 585 586 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(lod)); 587 tex->op = nir_texop_txl; 588} 589 590static void 591lower_gradient_cube_map(nir_builder *b, nir_tex_instr *tex) 592{ 593 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE); 594 assert(tex->op == nir_texop_txd); 595 assert(tex->dest.is_ssa); 596 597 /* Use textureSize() to get the width and height of LOD 0 */ 598 nir_ssa_def *size = nir_i2f32(b, nir_get_texture_size(b, tex)); 599 600 /* Cubemap texture lookups first generate a texture coordinate normalized 601 * to [-1, 1] on the appropiate face. The appropiate face is determined 602 * by which component has largest magnitude and its sign. The texture 603 * coordinate is the quotient of the remaining texture coordinates against 604 * that absolute value of the component of largest magnitude. This 605 * division requires that the computing of the derivative of the texel 606 * coordinate must use the quotient rule. The high level GLSL code is as 607 * follows: 608 * 609 * Step 1: selection 610 * 611 * vec3 abs_p, Q, dQdx, dQdy; 612 * abs_p = abs(ir->coordinate); 613 * if (abs_p.x >= max(abs_p.y, abs_p.z)) { 614 * Q = ir->coordinate.yzx; 615 * dQdx = ir->lod_info.grad.dPdx.yzx; 616 * dQdy = ir->lod_info.grad.dPdy.yzx; 617 * } 618 * if (abs_p.y >= max(abs_p.x, abs_p.z)) { 619 * Q = ir->coordinate.xzy; 620 * dQdx = ir->lod_info.grad.dPdx.xzy; 621 * dQdy = ir->lod_info.grad.dPdy.xzy; 622 * } 623 * if (abs_p.z >= max(abs_p.x, abs_p.y)) { 624 * Q = ir->coordinate; 625 * dQdx = ir->lod_info.grad.dPdx; 626 * dQdy = ir->lod_info.grad.dPdy; 627 * } 628 * 629 * Step 2: use quotient rule to compute derivative. The normalized to 630 * [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are 631 * only concerned with the magnitudes of the derivatives whose values are 632 * not affected by the sign. We drop the sign from the computation. 633 * 634 * vec2 dx, dy; 635 * float recip; 636 * 637 * recip = 1.0 / Q.z; 638 * dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) ); 639 * dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) ); 640 * 641 * Step 3: compute LOD. At this point we have the derivatives of the 642 * texture coordinates normalized to [-1,1]. We take the LOD to be 643 * result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L) 644 * = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L) 645 * = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L) 646 * = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy)))) 647 * = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy))) 648 * where L is the dimension of the cubemap. The code is: 649 * 650 * float M, result; 651 * M = max(dot(dx, dx), dot(dy, dy)); 652 * L = textureSize(sampler, 0).x; 653 * result = -1.0 + 0.5 * log2(L * L * M); 654 */ 655 656 /* coordinate */ 657 nir_ssa_def *p = 658 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_coord)].src.ssa; 659 660 /* unmodified dPdx, dPdy values */ 661 nir_ssa_def *dPdx = 662 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 663 nir_ssa_def *dPdy = 664 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 665 666 nir_ssa_def *abs_p = nir_fabs(b, p); 667 nir_ssa_def *abs_p_x = nir_channel(b, abs_p, 0); 668 nir_ssa_def *abs_p_y = nir_channel(b, abs_p, 1); 669 nir_ssa_def *abs_p_z = nir_channel(b, abs_p, 2); 670 671 /* 1. compute selector */ 672 nir_ssa_def *Q, *dQdx, *dQdy; 673 674 nir_ssa_def *cond_z = nir_fge(b, abs_p_z, nir_fmax(b, abs_p_x, abs_p_y)); 675 nir_ssa_def *cond_y = nir_fge(b, abs_p_y, nir_fmax(b, abs_p_x, abs_p_z)); 676 677 unsigned yzx[3] = { 1, 2, 0 }; 678 unsigned xzy[3] = { 0, 2, 1 }; 679 680 Q = nir_bcsel(b, cond_z, 681 p, 682 nir_bcsel(b, cond_y, 683 nir_swizzle(b, p, xzy, 3), 684 nir_swizzle(b, p, yzx, 3))); 685 686 dQdx = nir_bcsel(b, cond_z, 687 dPdx, 688 nir_bcsel(b, cond_y, 689 nir_swizzle(b, dPdx, xzy, 3), 690 nir_swizzle(b, dPdx, yzx, 3))); 691 692 dQdy = nir_bcsel(b, cond_z, 693 dPdy, 694 nir_bcsel(b, cond_y, 695 nir_swizzle(b, dPdy, xzy, 3), 696 nir_swizzle(b, dPdy, yzx, 3))); 697 698 /* 2. quotient rule */ 699 700 /* tmp = Q.xy * recip; 701 * dx = recip * ( dQdx.xy - (tmp * dQdx.z) ); 702 * dy = recip * ( dQdy.xy - (tmp * dQdy.z) ); 703 */ 704 nir_ssa_def *rcp_Q_z = nir_frcp(b, nir_channel(b, Q, 2)); 705 706 nir_ssa_def *Q_xy = nir_channels(b, Q, 0x3); 707 nir_ssa_def *tmp = nir_fmul(b, Q_xy, rcp_Q_z); 708 709 nir_ssa_def *dQdx_xy = nir_channels(b, dQdx, 0x3); 710 nir_ssa_def *dQdx_z = nir_channel(b, dQdx, 2); 711 nir_ssa_def *dx = 712 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdx_xy, nir_fmul(b, tmp, dQdx_z))); 713 714 nir_ssa_def *dQdy_xy = nir_channels(b, dQdy, 0x3); 715 nir_ssa_def *dQdy_z = nir_channel(b, dQdy, 2); 716 nir_ssa_def *dy = 717 nir_fmul(b, rcp_Q_z, nir_fsub(b, dQdy_xy, nir_fmul(b, tmp, dQdy_z))); 718 719 /* M = max(dot(dx, dx), dot(dy, dy)); */ 720 nir_ssa_def *M = nir_fmax(b, nir_fdot(b, dx, dx), nir_fdot(b, dy, dy)); 721 722 /* size has textureSize() of LOD 0 */ 723 nir_ssa_def *L = nir_channel(b, size, 0); 724 725 /* lod = -1.0 + 0.5 * log2(L * L * M); */ 726 nir_ssa_def *lod = 727 nir_fadd(b, 728 nir_imm_float(b, -1.0f), 729 nir_fmul(b, 730 nir_imm_float(b, 0.5f), 731 nir_flog2(b, nir_fmul(b, L, nir_fmul(b, L, M))))); 732 733 /* 3. Replace the gradient instruction with an equivalent lod instruction */ 734 replace_gradient_with_lod(b, lod, tex); 735} 736 737static void 738lower_gradient(nir_builder *b, nir_tex_instr *tex) 739{ 740 /* Cubes are more complicated and have their own function */ 741 if (tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { 742 lower_gradient_cube_map(b, tex); 743 return; 744 } 745 746 assert(tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE); 747 assert(tex->op == nir_texop_txd); 748 assert(tex->dest.is_ssa); 749 750 /* Use textureSize() to get the width and height of LOD 0 */ 751 unsigned component_mask; 752 switch (tex->sampler_dim) { 753 case GLSL_SAMPLER_DIM_3D: 754 component_mask = 7; 755 break; 756 case GLSL_SAMPLER_DIM_1D: 757 component_mask = 1; 758 break; 759 default: 760 component_mask = 3; 761 break; 762 } 763 764 nir_ssa_def *size = 765 nir_channels(b, nir_i2f32(b, nir_get_texture_size(b, tex)), 766 component_mask); 767 768 /* Scale the gradients by width and height. Effectively, the incoming 769 * gradients are s'(x,y), t'(x,y), and r'(x,y) from equation 3.19 in the 770 * GL 3.0 spec; we want u'(x,y), which is w_t * s'(x,y). 771 */ 772 nir_ssa_def *ddx = 773 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddx)].src.ssa; 774 nir_ssa_def *ddy = 775 tex->src[nir_tex_instr_src_index(tex, nir_tex_src_ddy)].src.ssa; 776 777 nir_ssa_def *dPdx = nir_fmul(b, ddx, size); 778 nir_ssa_def *dPdy = nir_fmul(b, ddy, size); 779 780 nir_ssa_def *rho; 781 if (dPdx->num_components == 1) { 782 rho = nir_fmax(b, nir_fabs(b, dPdx), nir_fabs(b, dPdy)); 783 } else { 784 rho = nir_fmax(b, 785 nir_fsqrt(b, nir_fdot(b, dPdx, dPdx)), 786 nir_fsqrt(b, nir_fdot(b, dPdy, dPdy))); 787 } 788 789 /* lod = log2(rho). We're ignoring GL state biases for now. */ 790 nir_ssa_def *lod = nir_flog2(b, rho); 791 792 /* Replace the gradient instruction with an equivalent lod instruction */ 793 replace_gradient_with_lod(b, lod, tex); 794} 795 796/* tex(s, coord) = txd(s, coord, dfdx(coord), dfdy(coord)) */ 797static nir_tex_instr * 798lower_tex_to_txd(nir_builder *b, nir_tex_instr *tex) 799{ 800 b->cursor = nir_after_instr(&tex->instr); 801 nir_tex_instr *txd = nir_tex_instr_create(b->shader, tex->num_srcs + 2); 802 803 txd->op = nir_texop_txd; 804 txd->sampler_dim = tex->sampler_dim; 805 txd->dest_type = tex->dest_type; 806 txd->coord_components = tex->coord_components; 807 txd->texture_index = tex->texture_index; 808 txd->sampler_index = tex->sampler_index; 809 txd->is_array = tex->is_array; 810 txd->is_shadow = tex->is_shadow; 811 txd->is_new_style_shadow = tex->is_new_style_shadow; 812 813 /* reuse existing srcs */ 814 for (unsigned i = 0; i < tex->num_srcs; i++) { 815 nir_src_copy(&txd->src[i].src, &tex->src[i].src); 816 txd->src[i].src_type = tex->src[i].src_type; 817 } 818 int coord = nir_tex_instr_src_index(tex, nir_tex_src_coord); 819 assert(coord >= 0); 820 nir_ssa_def *dfdx = nir_fddx(b, tex->src[coord].src.ssa); 821 nir_ssa_def *dfdy = nir_fddy(b, tex->src[coord].src.ssa); 822 txd->src[tex->num_srcs].src = nir_src_for_ssa(dfdx); 823 txd->src[tex->num_srcs].src_type = nir_tex_src_ddx; 824 txd->src[tex->num_srcs + 1].src = nir_src_for_ssa(dfdy); 825 txd->src[tex->num_srcs + 1].src_type = nir_tex_src_ddy; 826 827 nir_ssa_dest_init(&txd->instr, &txd->dest, nir_dest_num_components(tex->dest), 828 nir_dest_bit_size(tex->dest), NULL); 829 nir_builder_instr_insert(b, &txd->instr); 830 nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txd->dest.ssa); 831 nir_instr_remove(&tex->instr); 832 return txd; 833} 834 835/* txb(s, coord, bias) = txl(s, coord, lod(s, coord).y + bias) */ 836static nir_tex_instr * 837lower_txb_to_txl(nir_builder *b, nir_tex_instr *tex) 838{ 839 b->cursor = nir_after_instr(&tex->instr); 840 nir_tex_instr *txl = nir_tex_instr_create(b->shader, tex->num_srcs); 841 842 txl->op = nir_texop_txl; 843 txl->sampler_dim = tex->sampler_dim; 844 txl->dest_type = tex->dest_type; 845 txl->coord_components = tex->coord_components; 846 txl->texture_index = tex->texture_index; 847 txl->sampler_index = tex->sampler_index; 848 txl->is_array = tex->is_array; 849 txl->is_shadow = tex->is_shadow; 850 txl->is_new_style_shadow = tex->is_new_style_shadow; 851 852 /* reuse all but bias src */ 853 for (int i = 0; i < 2; i++) { 854 if (tex->src[i].src_type != nir_tex_src_bias) { 855 nir_src_copy(&txl->src[i].src, &tex->src[i].src); 856 txl->src[i].src_type = tex->src[i].src_type; 857 } 858 } 859 nir_ssa_def *lod = nir_get_texture_lod(b, txl); 860 861 int bias_idx = nir_tex_instr_src_index(tex, nir_tex_src_bias); 862 assert(bias_idx >= 0); 863 lod = nir_fadd(b, nir_channel(b, lod, 1), nir_ssa_for_src(b, tex->src[bias_idx].src, 1)); 864 txl->src[tex->num_srcs - 1].src = nir_src_for_ssa(lod); 865 txl->src[tex->num_srcs - 1].src_type = nir_tex_src_lod; 866 867 nir_ssa_dest_init(&txl->instr, &txl->dest, nir_dest_num_components(tex->dest), 868 nir_dest_bit_size(tex->dest), NULL); 869 nir_builder_instr_insert(b, &txl->instr); 870 nir_ssa_def_rewrite_uses(&tex->dest.ssa, &txl->dest.ssa); 871 nir_instr_remove(&tex->instr); 872 return txl; 873} 874 875static nir_tex_instr * 876saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) 877{ 878 if (tex->op == nir_texop_tex) 879 tex = lower_tex_to_txd(b, tex); 880 else if (tex->op == nir_texop_txb) 881 tex = lower_txb_to_txl(b, tex); 882 883 b->cursor = nir_before_instr(&tex->instr); 884 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 885 886 if (coord_index != -1) { 887 nir_ssa_def *src = 888 nir_ssa_for_src(b, tex->src[coord_index].src, tex->coord_components); 889 890 /* split src into components: */ 891 nir_ssa_def *comp[4]; 892 893 assume(tex->coord_components >= 1); 894 895 for (unsigned j = 0; j < tex->coord_components; j++) 896 comp[j] = nir_channel(b, src, j); 897 898 /* clamp requested components, array index does not get clamped: */ 899 unsigned ncomp = tex->coord_components; 900 if (tex->is_array) 901 ncomp--; 902 903 for (unsigned j = 0; j < ncomp; j++) { 904 if ((1 << j) & sat_mask) { 905 if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { 906 /* non-normalized texture coords, so clamp to texture 907 * size rather than [0.0, 1.0] 908 */ 909 nir_ssa_def *txs = nir_i2f32(b, nir_get_texture_size(b, tex)); 910 comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); 911 comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); 912 } else { 913 comp[j] = nir_fsat(b, comp[j]); 914 } 915 } 916 } 917 918 /* and move the result back into a single vecN: */ 919 src = nir_vec(b, comp, tex->coord_components); 920 921 nir_instr_rewrite_src(&tex->instr, 922 &tex->src[coord_index].src, 923 nir_src_for_ssa(src)); 924 } 925 return tex; 926} 927 928static nir_ssa_def * 929get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) 930{ 931 nir_const_value v[4]; 932 933 memset(&v, 0, sizeof(v)); 934 935 if (swizzle_val == 4) { 936 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 0; 937 } else { 938 assert(swizzle_val == 5); 939 if (type == nir_type_float32) 940 v[0].f32 = v[1].f32 = v[2].f32 = v[3].f32 = 1.0; 941 else 942 v[0].u32 = v[1].u32 = v[2].u32 = v[3].u32 = 1; 943 } 944 945 return nir_build_imm(b, 4, 32, v); 946} 947 948static void 949swizzle_tg4_broadcom(nir_builder *b, nir_tex_instr *tex) 950{ 951 assert(tex->dest.is_ssa); 952 953 b->cursor = nir_after_instr(&tex->instr); 954 955 assert(nir_tex_instr_dest_size(tex) == 4); 956 unsigned swiz[4] = { 2, 3, 1, 0 }; 957 nir_ssa_def *swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4); 958 959 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled, 960 swizzled->parent_instr); 961} 962 963static void 964swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) 965{ 966 assert(tex->dest.is_ssa); 967 968 b->cursor = nir_after_instr(&tex->instr); 969 970 nir_ssa_def *swizzled; 971 if (tex->op == nir_texop_tg4) { 972 if (swizzle[tex->component] < 4) { 973 /* This one's easy */ 974 tex->component = swizzle[tex->component]; 975 return; 976 } else { 977 swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); 978 } 979 } else { 980 assert(nir_tex_instr_dest_size(tex) == 4); 981 if (swizzle[0] < 4 && swizzle[1] < 4 && 982 swizzle[2] < 4 && swizzle[3] < 4) { 983 unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; 984 /* We have no 0s or 1s, just emit a swizzling MOV */ 985 swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4); 986 } else { 987 nir_ssa_scalar srcs[4]; 988 for (unsigned i = 0; i < 4; i++) { 989 if (swizzle[i] < 4) { 990 srcs[i] = nir_get_ssa_scalar(&tex->dest.ssa, swizzle[i]); 991 } else { 992 srcs[i] = nir_get_ssa_scalar(get_zero_or_one(b, tex->dest_type, swizzle[i]), 0); 993 } 994 } 995 swizzled = nir_vec_scalars(b, srcs, 4); 996 } 997 } 998 999 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, swizzled, 1000 swizzled->parent_instr); 1001} 1002 1003static void 1004linearize_srgb_result(nir_builder *b, nir_tex_instr *tex) 1005{ 1006 assert(tex->dest.is_ssa); 1007 assert(nir_tex_instr_dest_size(tex) == 4); 1008 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 1009 1010 b->cursor = nir_after_instr(&tex->instr); 1011 1012 nir_ssa_def *rgb = 1013 nir_format_srgb_to_linear(b, nir_channels(b, &tex->dest.ssa, 0x7)); 1014 1015 /* alpha is untouched: */ 1016 nir_ssa_def *result = nir_vec4(b, 1017 nir_channel(b, rgb, 0), 1018 nir_channel(b, rgb, 1), 1019 nir_channel(b, rgb, 2), 1020 nir_channel(b, &tex->dest.ssa, 3)); 1021 1022 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result, 1023 result->parent_instr); 1024} 1025 1026/** 1027 * Lowers texture instructions from giving a vec4 result to a vec2 of f16, 1028 * i16, or u16, or a single unorm4x8 value. 1029 * 1030 * Note that we don't change the destination num_components, because 1031 * nir_tex_instr_dest_size() will still return 4. The driver is just expected 1032 * to not store the other channels, given that nothing at the NIR level will 1033 * read them. 1034 */ 1035static void 1036lower_tex_packing(nir_builder *b, nir_tex_instr *tex, 1037 const nir_lower_tex_options *options) 1038{ 1039 nir_ssa_def *color = &tex->dest.ssa; 1040 1041 b->cursor = nir_after_instr(&tex->instr); 1042 1043 switch (options->lower_tex_packing[tex->sampler_index]) { 1044 case nir_lower_tex_packing_none: 1045 return; 1046 1047 case nir_lower_tex_packing_16: { 1048 static const unsigned bits[4] = {16, 16, 16, 16}; 1049 1050 switch (nir_alu_type_get_base_type(tex->dest_type)) { 1051 case nir_type_float: 1052 switch (nir_tex_instr_dest_size(tex)) { 1053 case 1: 1054 assert(tex->is_shadow && tex->is_new_style_shadow); 1055 color = nir_unpack_half_2x16_split_x(b, nir_channel(b, color, 0)); 1056 break; 1057 case 2: { 1058 nir_ssa_def *rg = nir_channel(b, color, 0); 1059 color = nir_vec2(b, 1060 nir_unpack_half_2x16_split_x(b, rg), 1061 nir_unpack_half_2x16_split_y(b, rg)); 1062 break; 1063 } 1064 case 4: { 1065 nir_ssa_def *rg = nir_channel(b, color, 0); 1066 nir_ssa_def *ba = nir_channel(b, color, 1); 1067 color = nir_vec4(b, 1068 nir_unpack_half_2x16_split_x(b, rg), 1069 nir_unpack_half_2x16_split_y(b, rg), 1070 nir_unpack_half_2x16_split_x(b, ba), 1071 nir_unpack_half_2x16_split_y(b, ba)); 1072 break; 1073 } 1074 default: 1075 unreachable("wrong dest_size"); 1076 } 1077 break; 1078 1079 case nir_type_int: 1080 color = nir_format_unpack_sint(b, color, bits, 4); 1081 break; 1082 1083 case nir_type_uint: 1084 color = nir_format_unpack_uint(b, color, bits, 4); 1085 break; 1086 1087 default: 1088 unreachable("unknown base type"); 1089 } 1090 break; 1091 } 1092 1093 case nir_lower_tex_packing_8: 1094 assert(nir_alu_type_get_base_type(tex->dest_type) == nir_type_float); 1095 color = nir_unpack_unorm_4x8(b, nir_channel(b, color, 0)); 1096 break; 1097 } 1098 1099 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, color, 1100 color->parent_instr); 1101} 1102 1103static bool 1104lower_array_layer_round_even(nir_builder *b, nir_tex_instr *tex) 1105{ 1106 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 1107 if (coord_index < 0 || nir_tex_instr_src_type(tex, coord_index) != nir_type_float) 1108 return false; 1109 1110 assert(tex->src[coord_index].src.is_ssa); 1111 nir_ssa_def *coord = tex->src[coord_index].src.ssa; 1112 1113 b->cursor = nir_before_instr(&tex->instr); 1114 1115 unsigned layer = tex->coord_components - 1; 1116 nir_ssa_def *rounded_layer = nir_fround_even(b, nir_channel(b, coord, layer)); 1117 nir_ssa_def *new_coord = nir_vector_insert_imm(b, coord, rounded_layer, layer); 1118 1119 nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[coord_index].src, new_coord); 1120 1121 return true; 1122} 1123 1124static bool 1125sampler_index_lt(nir_tex_instr *tex, unsigned max) 1126{ 1127 assert(nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref) == -1); 1128 1129 unsigned sampler_index = tex->sampler_index; 1130 1131 int sampler_offset_idx = 1132 nir_tex_instr_src_index(tex, nir_tex_src_sampler_offset); 1133 if (sampler_offset_idx >= 0) { 1134 if (!nir_src_is_const(tex->src[sampler_offset_idx].src)) 1135 return false; 1136 1137 sampler_index += nir_src_as_uint(tex->src[sampler_offset_idx].src); 1138 } 1139 1140 return sampler_index < max; 1141} 1142 1143static bool 1144lower_tg4_offsets(nir_builder *b, nir_tex_instr *tex) 1145{ 1146 assert(tex->op == nir_texop_tg4); 1147 assert(nir_tex_instr_has_explicit_tg4_offsets(tex)); 1148 assert(nir_tex_instr_src_index(tex, nir_tex_src_offset) == -1); 1149 1150 b->cursor = nir_after_instr(&tex->instr); 1151 1152 nir_ssa_scalar dest[5] = { 0 }; 1153 nir_ssa_def *residency = NULL; 1154 for (unsigned i = 0; i < 4; ++i) { 1155 nir_tex_instr *tex_copy = nir_tex_instr_create(b->shader, tex->num_srcs + 1); 1156 tex_copy->op = tex->op; 1157 tex_copy->coord_components = tex->coord_components; 1158 tex_copy->sampler_dim = tex->sampler_dim; 1159 tex_copy->is_array = tex->is_array; 1160 tex_copy->is_shadow = tex->is_shadow; 1161 tex_copy->is_new_style_shadow = tex->is_new_style_shadow; 1162 tex_copy->is_sparse = tex->is_sparse; 1163 tex_copy->component = tex->component; 1164 tex_copy->dest_type = tex->dest_type; 1165 1166 for (unsigned j = 0; j < tex->num_srcs; ++j) { 1167 nir_src_copy(&tex_copy->src[j].src, &tex->src[j].src); 1168 tex_copy->src[j].src_type = tex->src[j].src_type; 1169 } 1170 1171 nir_tex_src src; 1172 src.src = nir_src_for_ssa(nir_imm_ivec2(b, tex->tg4_offsets[i][0], 1173 tex->tg4_offsets[i][1])); 1174 src.src_type = nir_tex_src_offset; 1175 tex_copy->src[tex_copy->num_srcs - 1] = src; 1176 1177 nir_ssa_dest_init(&tex_copy->instr, &tex_copy->dest, 1178 nir_tex_instr_dest_size(tex), 32, NULL); 1179 1180 nir_builder_instr_insert(b, &tex_copy->instr); 1181 1182 dest[i] = nir_get_ssa_scalar(&tex_copy->dest.ssa, 3); 1183 if (tex->is_sparse) { 1184 nir_ssa_def *code = nir_channel(b, &tex_copy->dest.ssa, 4); 1185 if (residency) 1186 residency = nir_sparse_residency_code_and(b, residency, code); 1187 else 1188 residency = code; 1189 } 1190 } 1191 dest[4] = nir_get_ssa_scalar(residency, 0); 1192 1193 nir_ssa_def *res = nir_vec_scalars(b, dest, tex->dest.ssa.num_components); 1194 nir_ssa_def_rewrite_uses(&tex->dest.ssa, res); 1195 nir_instr_remove(&tex->instr); 1196 1197 return true; 1198} 1199 1200static bool 1201nir_lower_txs_lod(nir_builder *b, nir_tex_instr *tex) 1202{ 1203 int lod_idx = nir_tex_instr_src_index(tex, nir_tex_src_lod); 1204 if (lod_idx < 0 || 1205 (nir_src_is_const(tex->src[lod_idx].src) && 1206 nir_src_as_int(tex->src[lod_idx].src) == 0)) 1207 return false; 1208 1209 unsigned dest_size = nir_tex_instr_dest_size(tex); 1210 1211 b->cursor = nir_before_instr(&tex->instr); 1212 nir_ssa_def *lod = nir_ssa_for_src(b, tex->src[lod_idx].src, 1); 1213 1214 /* Replace the non-0-LOD in the initial TXS operation by a 0-LOD. */ 1215 nir_instr_rewrite_src(&tex->instr, &tex->src[lod_idx].src, 1216 nir_src_for_ssa(nir_imm_int(b, 0))); 1217 1218 /* TXS(LOD) = max(TXS(0) >> LOD, 1) 1219 * But we do min(TXS(0), TXS(LOD)) to catch the case of a null surface, 1220 * which should return 0, not 1. 1221 */ 1222 b->cursor = nir_after_instr(&tex->instr); 1223 nir_ssa_def *minified = nir_imin(b, &tex->dest.ssa, 1224 nir_imax(b, nir_ushr(b, &tex->dest.ssa, lod), 1225 nir_imm_int(b, 1))); 1226 1227 /* Make sure the component encoding the array size (if any) is not 1228 * minified. 1229 */ 1230 if (tex->is_array) { 1231 nir_ssa_def *comp[3]; 1232 1233 assert(dest_size <= ARRAY_SIZE(comp)); 1234 for (unsigned i = 0; i < dest_size - 1; i++) 1235 comp[i] = nir_channel(b, minified, i); 1236 1237 comp[dest_size - 1] = nir_channel(b, &tex->dest.ssa, dest_size - 1); 1238 minified = nir_vec(b, comp, dest_size); 1239 } 1240 1241 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, minified, 1242 minified->parent_instr); 1243 return true; 1244} 1245 1246static void 1247nir_lower_txs_cube_array(nir_builder *b, nir_tex_instr *tex) 1248{ 1249 assert(tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array); 1250 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 1251 1252 b->cursor = nir_after_instr(&tex->instr); 1253 1254 assert(tex->dest.is_ssa); 1255 assert(tex->dest.ssa.num_components == 3); 1256 nir_ssa_def *size = &tex->dest.ssa; 1257 size = nir_vec3(b, nir_channel(b, size, 0), 1258 nir_channel(b, size, 1), 1259 nir_idiv(b, nir_channel(b, size, 2), 1260 nir_imm_int(b, 6))); 1261 1262 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, size, size->parent_instr); 1263} 1264 1265static void 1266nir_lower_ms_txf_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex) 1267{ 1268 lower_offset(b, tex); 1269 1270 b->cursor = nir_before_instr(&tex->instr); 1271 1272 /* Create FMASK fetch. */ 1273 assert(tex->texture_index == 0); 1274 nir_tex_instr *fmask_fetch = nir_tex_instr_create(b->shader, tex->num_srcs - 1); 1275 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd; 1276 fmask_fetch->coord_components = tex->coord_components; 1277 fmask_fetch->sampler_dim = tex->sampler_dim; 1278 fmask_fetch->is_array = tex->is_array; 1279 fmask_fetch->texture_non_uniform = tex->texture_non_uniform; 1280 fmask_fetch->dest_type = nir_type_uint32; 1281 nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL); 1282 1283 fmask_fetch->num_srcs = 0; 1284 for (unsigned i = 0; i < tex->num_srcs; i++) { 1285 if (tex->src[i].src_type == nir_tex_src_ms_index) 1286 continue; 1287 nir_tex_src *src = &fmask_fetch->src[fmask_fetch->num_srcs++]; 1288 src->src = nir_src_for_ssa(tex->src[i].src.ssa); 1289 src->src_type = tex->src[i].src_type; 1290 } 1291 1292 nir_builder_instr_insert(b, &fmask_fetch->instr); 1293 1294 /* Obtain new sample index. */ 1295 int ms_index = nir_tex_instr_src_index(tex, nir_tex_src_ms_index); 1296 assert(ms_index >= 0); 1297 nir_src sample = tex->src[ms_index].src; 1298 nir_ssa_def *new_sample = NULL; 1299 if (nir_src_is_const(sample) && (nir_src_as_uint(sample) == 0 || nir_src_as_uint(sample) == 7)) { 1300 if (nir_src_as_uint(sample) == 7) 1301 new_sample = nir_ushr(b, &fmask_fetch->dest.ssa, nir_imm_int(b, 28)); 1302 else 1303 new_sample = nir_iand_imm(b, &fmask_fetch->dest.ssa, 0xf); 1304 } else { 1305 new_sample = nir_ubitfield_extract(b, &fmask_fetch->dest.ssa, 1306 nir_imul_imm(b, sample.ssa, 4), nir_imm_int(b, 4)); 1307 } 1308 1309 /* Update instruction. */ 1310 tex->op = nir_texop_fragment_fetch_amd; 1311 nir_instr_rewrite_src_ssa(&tex->instr, &tex->src[ms_index].src, new_sample); 1312} 1313 1314static void 1315nir_lower_samples_identical_to_fragment_fetch(nir_builder *b, nir_tex_instr *tex) 1316{ 1317 b->cursor = nir_after_instr(&tex->instr); 1318 1319 nir_tex_instr *fmask_fetch = nir_instr_as_tex(nir_instr_clone(b->shader, &tex->instr)); 1320 fmask_fetch->op = nir_texop_fragment_mask_fetch_amd; 1321 fmask_fetch->dest_type = nir_type_uint32; 1322 nir_ssa_dest_init(&fmask_fetch->instr, &fmask_fetch->dest, 1, 32, NULL); 1323 nir_builder_instr_insert(b, &fmask_fetch->instr); 1324 1325 nir_ssa_def_rewrite_uses(&tex->dest.ssa, nir_ieq_imm(b, &fmask_fetch->dest.ssa, 0)); 1326 nir_instr_remove_v(&tex->instr); 1327} 1328 1329static void 1330nir_lower_lod_zero_width(nir_builder *b, nir_tex_instr *tex) 1331{ 1332 int coord_index = nir_tex_instr_src_index(tex, nir_tex_src_coord); 1333 assert(coord_index >= 0); 1334 1335 b->cursor = nir_after_instr(&tex->instr); 1336 1337 nir_ssa_def *is_zero = nir_imm_bool(b, true); 1338 for (unsigned i = 0; i < tex->coord_components; i++) { 1339 nir_ssa_def *coord = nir_channel(b, tex->src[coord_index].src.ssa, i); 1340 1341 /* Compute the sum of the absolute values of derivatives. */ 1342 nir_ssa_def *dfdx = nir_fddx(b, coord); 1343 nir_ssa_def *dfdy = nir_fddy(b, coord); 1344 nir_ssa_def *fwidth = nir_fadd(b, nir_fabs(b, dfdx), nir_fabs(b, dfdy)); 1345 1346 /* Check if the sum is 0. */ 1347 is_zero = nir_iand(b, is_zero, nir_feq(b, fwidth, nir_imm_float(b, 0.0))); 1348 } 1349 1350 /* Replace the raw LOD by -FLT_MAX if the sum is 0 for all coordinates. */ 1351 nir_ssa_def *adjusted_lod = 1352 nir_bcsel(b, is_zero, nir_imm_float(b, -FLT_MAX), 1353 nir_channel(b, &tex->dest.ssa, 1)); 1354 1355 nir_ssa_def *def = 1356 nir_vec2(b, nir_channel(b, &tex->dest.ssa, 0), adjusted_lod); 1357 1358 nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, def, def->parent_instr); 1359} 1360 1361static bool 1362nir_lower_tex_block(nir_block *block, nir_builder *b, 1363 const nir_lower_tex_options *options, 1364 const struct nir_shader_compiler_options *compiler_options) 1365{ 1366 bool progress = false; 1367 1368 nir_foreach_instr_safe(instr, block) { 1369 if (instr->type != nir_instr_type_tex) 1370 continue; 1371 1372 nir_tex_instr *tex = nir_instr_as_tex(instr); 1373 bool lower_txp = !!(options->lower_txp & (1 << tex->sampler_dim)); 1374 1375 /* mask of src coords to saturate (clamp): */ 1376 unsigned sat_mask = 0; 1377 1378 if ((1 << tex->sampler_index) & options->saturate_r) 1379 sat_mask |= (1 << 2); /* .z */ 1380 if ((1 << tex->sampler_index) & options->saturate_t) 1381 sat_mask |= (1 << 1); /* .y */ 1382 if ((1 << tex->sampler_index) & options->saturate_s) 1383 sat_mask |= (1 << 0); /* .x */ 1384 1385 /* If we are clamping any coords, we must lower projector first 1386 * as clamping happens *after* projection: 1387 */ 1388 if (lower_txp || sat_mask || 1389 (options->lower_txp_array && tex->is_array)) { 1390 progress |= project_src(b, tex); 1391 } 1392 1393 if ((tex->op == nir_texop_txf && options->lower_txf_offset) || 1394 (sat_mask && nir_tex_instr_src_index(tex, nir_tex_src_coord) >= 0) || 1395 (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT && 1396 options->lower_rect_offset) || 1397 (options->lower_offset_filter && 1398 options->lower_offset_filter(instr, options->callback_data))) { 1399 progress = lower_offset(b, tex) || progress; 1400 } 1401 1402 if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect && 1403 tex->op != nir_texop_txf) { 1404 if (nir_tex_instr_is_query(tex)) 1405 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 1406 else if (compiler_options->has_txs) 1407 lower_rect(b, tex); 1408 else 1409 lower_rect_tex_scale(b, tex); 1410 1411 progress = true; 1412 } 1413 1414 unsigned texture_index = tex->texture_index; 1415 uint32_t texture_mask = 1u << texture_index; 1416 int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref); 1417 if (tex_index >= 0) { 1418 nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src); 1419 nir_variable *var = nir_deref_instr_get_variable(deref); 1420 texture_index = var ? var->data.binding : 0; 1421 texture_mask = var && texture_index < 32 ? (1u << texture_index) : 0u; 1422 } 1423 1424 if (texture_mask & options->lower_y_uv_external) { 1425 lower_y_uv_external(b, tex, options, texture_index); 1426 progress = true; 1427 } 1428 1429 if (texture_mask & options->lower_y_u_v_external) { 1430 lower_y_u_v_external(b, tex, options, texture_index); 1431 progress = true; 1432 } 1433 1434 if (texture_mask & options->lower_yx_xuxv_external) { 1435 lower_yx_xuxv_external(b, tex, options, texture_index); 1436 progress = true; 1437 } 1438 1439 if (texture_mask & options->lower_xy_uxvx_external) { 1440 lower_xy_uxvx_external(b, tex, options, texture_index); 1441 progress = true; 1442 } 1443 1444 if (texture_mask & options->lower_ayuv_external) { 1445 lower_ayuv_external(b, tex, options, texture_index); 1446 progress = true; 1447 } 1448 1449 if (texture_mask & options->lower_xyuv_external) { 1450 lower_xyuv_external(b, tex, options, texture_index); 1451 progress = true; 1452 } 1453 1454 if (texture_mask & options->lower_yuv_external) { 1455 lower_yuv_external(b, tex, options, texture_index); 1456 progress = true; 1457 } 1458 1459 if ((1 << tex->texture_index) & options->lower_yu_yv_external) { 1460 lower_yu_yv_external(b, tex, options, texture_index); 1461 progress = true; 1462 } 1463 1464 if ((1 << tex->texture_index) & options->lower_y41x_external) { 1465 lower_y41x_external(b, tex, options, texture_index); 1466 progress = true; 1467 } 1468 1469 if (sat_mask) { 1470 tex = saturate_src(b, tex, sat_mask); 1471 progress = true; 1472 } 1473 1474 if (tex->op == nir_texop_tg4 && options->lower_tg4_broadcom_swizzle) { 1475 swizzle_tg4_broadcom(b, tex); 1476 progress = true; 1477 } 1478 1479 if ((texture_mask & options->swizzle_result) && 1480 !nir_tex_instr_is_query(tex) && 1481 !(tex->is_shadow && tex->is_new_style_shadow)) { 1482 swizzle_result(b, tex, options->swizzles[tex->texture_index]); 1483 progress = true; 1484 } 1485 1486 /* should be after swizzle so we know which channels are rgb: */ 1487 if ((texture_mask & options->lower_srgb) && 1488 !nir_tex_instr_is_query(tex) && !tex->is_shadow) { 1489 linearize_srgb_result(b, tex); 1490 progress = true; 1491 } 1492 1493 const bool has_min_lod = 1494 nir_tex_instr_src_index(tex, nir_tex_src_min_lod) >= 0; 1495 const bool has_offset = 1496 nir_tex_instr_src_index(tex, nir_tex_src_offset) >= 0; 1497 1498 if (tex->op == nir_texop_txb && tex->is_shadow && has_min_lod && 1499 options->lower_txb_shadow_clamp) { 1500 lower_implicit_lod(b, tex); 1501 progress = true; 1502 } 1503 1504 if (options->lower_tex_packing[tex->sampler_index] != 1505 nir_lower_tex_packing_none && 1506 tex->op != nir_texop_txs && 1507 tex->op != nir_texop_query_levels && 1508 tex->op != nir_texop_texture_samples) { 1509 lower_tex_packing(b, tex, options); 1510 progress = true; 1511 } 1512 1513 if (options->lower_array_layer_round_even && tex->is_array && 1514 tex->op != nir_texop_lod) { 1515 progress |= lower_array_layer_round_even(b, tex); 1516 } 1517 1518 if (tex->op == nir_texop_txd && 1519 (options->lower_txd || 1520 (options->lower_txd_shadow && tex->is_shadow) || 1521 (options->lower_txd_shadow_clamp && tex->is_shadow && has_min_lod) || 1522 (options->lower_txd_offset_clamp && has_offset && has_min_lod) || 1523 (options->lower_txd_clamp_bindless_sampler && has_min_lod && 1524 nir_tex_instr_src_index(tex, nir_tex_src_sampler_handle) != -1) || 1525 (options->lower_txd_clamp_if_sampler_index_not_lt_16 && 1526 has_min_lod && !sampler_index_lt(tex, 16)) || 1527 (options->lower_txd_cube_map && 1528 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE) || 1529 (options->lower_txd_3d && 1530 tex->sampler_dim == GLSL_SAMPLER_DIM_3D) || 1531 (options->lower_txd_array && tex->is_array))) { 1532 lower_gradient(b, tex); 1533 progress = true; 1534 continue; 1535 } 1536 1537 /* TXF, TXS and TXL require a LOD but not everything we implement using those 1538 * three opcodes provides one. Provide a default LOD of 0. 1539 */ 1540 if ((nir_tex_instr_src_index(tex, nir_tex_src_lod) == -1) && 1541 (tex->op == nir_texop_txf || tex->op == nir_texop_txs || 1542 tex->op == nir_texop_txl || tex->op == nir_texop_query_levels)) { 1543 b->cursor = nir_before_instr(&tex->instr); 1544 nir_tex_instr_add_src(tex, nir_tex_src_lod, nir_src_for_ssa(nir_imm_int(b, 0))); 1545 progress = true; 1546 continue; 1547 } 1548 1549 /* Only fragment and compute (in some cases) support implicit 1550 * derivatives. Lower those opcodes which use implicit derivatives to 1551 * use an explicit LOD of 0. 1552 * But don't touch RECT samplers because they don't have mips. 1553 */ 1554 if (options->lower_invalid_implicit_lod && 1555 nir_tex_instr_has_implicit_derivative(tex) && 1556 tex->sampler_dim != GLSL_SAMPLER_DIM_RECT && 1557 !nir_shader_supports_implicit_lod(b->shader)) { 1558 lower_zero_lod(b, tex); 1559 progress = true; 1560 } 1561 1562 if (options->lower_txs_lod && tex->op == nir_texop_txs) { 1563 progress |= nir_lower_txs_lod(b, tex); 1564 continue; 1565 } 1566 1567 if (options->lower_txs_cube_array && tex->op == nir_texop_txs && 1568 tex->sampler_dim == GLSL_SAMPLER_DIM_CUBE && tex->is_array) { 1569 nir_lower_txs_cube_array(b, tex); 1570 progress = true; 1571 continue; 1572 } 1573 1574 /* has to happen after all the other lowerings as the original tg4 gets 1575 * replaced by 4 tg4 instructions. 1576 */ 1577 if (tex->op == nir_texop_tg4 && 1578 nir_tex_instr_has_explicit_tg4_offsets(tex) && 1579 options->lower_tg4_offsets) { 1580 progress |= lower_tg4_offsets(b, tex); 1581 continue; 1582 } 1583 1584 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_txf_ms) { 1585 nir_lower_ms_txf_to_fragment_fetch(b, tex); 1586 progress = true; 1587 continue; 1588 } 1589 1590 if (options->lower_to_fragment_fetch_amd && tex->op == nir_texop_samples_identical) { 1591 nir_lower_samples_identical_to_fragment_fetch(b, tex); 1592 progress = true; 1593 continue; 1594 } 1595 1596 if (options->lower_lod_zero_width && tex->op == nir_texop_lod) { 1597 nir_lower_lod_zero_width(b, tex); 1598 progress = true; 1599 continue; 1600 } 1601 } 1602 1603 return progress; 1604} 1605 1606static bool 1607nir_lower_tex_impl(nir_function_impl *impl, 1608 const nir_lower_tex_options *options, 1609 const struct nir_shader_compiler_options *compiler_options) 1610{ 1611 bool progress = false; 1612 nir_builder builder; 1613 nir_builder_init(&builder, impl); 1614 1615 nir_foreach_block(block, impl) { 1616 progress |= nir_lower_tex_block(block, &builder, options, compiler_options); 1617 } 1618 1619 nir_metadata_preserve(impl, nir_metadata_block_index | 1620 nir_metadata_dominance); 1621 return progress; 1622} 1623 1624bool 1625nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) 1626{ 1627 bool progress = false; 1628 1629 nir_foreach_function(function, shader) { 1630 if (function->impl) 1631 progress |= nir_lower_tex_impl(function->impl, options, shader->options); 1632 } 1633 1634 return progress; 1635} 1636