1/* 2 * Copyright © 2021 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> 25 */ 26 27#include <stdbool.h> 28#include "main/image.h" 29#include "main/pbo.h" 30 31#include "state_tracker/st_nir.h" 32#include "state_tracker/st_format.h" 33#include "state_tracker/st_pbo.h" 34#include "state_tracker/st_texture.h" 35#include "compiler/nir/nir_builder.h" 36#include "compiler/nir/nir_format_convert.h" 37#include "compiler/glsl/gl_nir.h" 38#include "compiler/glsl/gl_nir_linker.h" 39#include "util/u_sampler.h" 40#include "util/streaming-load-memcpy.h" 41 42#define BGR_FORMAT(NAME) \ 43 {{ \ 44 [0] = PIPE_FORMAT_##NAME##_SNORM, \ 45 [1] = PIPE_FORMAT_##NAME##_SINT, \ 46 }, \ 47 { \ 48 [0] = PIPE_FORMAT_##NAME##_UNORM, \ 49 [1] = PIPE_FORMAT_##NAME##_UINT, \ 50 }} 51 52#define FORMAT(NAME, NAME16, NAME32) \ 53 {{ \ 54 [1] = PIPE_FORMAT_##NAME##_SNORM, \ 55 [2] = PIPE_FORMAT_##NAME16##_SNORM, \ 56 [4] = PIPE_FORMAT_##NAME32##_SNORM, \ 57 }, \ 58 { \ 59 [1] = PIPE_FORMAT_##NAME##_UNORM, \ 60 [2] = PIPE_FORMAT_##NAME16##_UNORM, \ 61 [4] = PIPE_FORMAT_##NAME32##_UNORM, \ 62 }} 63 64/* don't try these at home */ 65static enum pipe_format 66get_convert_format(struct gl_context *ctx, 67 enum pipe_format src_format, 68 GLenum format, GLenum type, 69 bool *need_bgra_swizzle) 70{ 71 struct st_context *st = st_context(ctx); 72 GLint bpp = _mesa_bytes_per_pixel(format, type); 73 if (_mesa_is_depth_format(format) || 74 format == GL_GREEN_INTEGER || 75 format == GL_BLUE_INTEGER) { 76 switch (bpp) { 77 case 1: 78 return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R8_UINT : PIPE_FORMAT_R8_SINT; 79 case 2: 80 return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R16_UINT : PIPE_FORMAT_R16_SINT; 81 case 4: 82 return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R32_SINT; 83 } 84 } 85 mesa_format mformat = _mesa_tex_format_from_format_and_type(ctx, format, type); 86 enum pipe_format pformat = st_mesa_format_to_pipe_format(st, mformat); 87 if (!pformat) { 88 GLint dst_components = _mesa_components_in_format(format); 89 bpp /= dst_components; 90 if (format == GL_BGR || format == GL_BGRA) { 91 pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR ? GL_RGB : GL_RGBA, type, 0); 92 if (!pformat) 93 pformat = get_convert_format(ctx, src_format, format == GL_BGR ? GL_RGB : GL_RGBA, type, need_bgra_swizzle); 94 assert(pformat); 95 *need_bgra_swizzle = true; 96 } else if (format == GL_BGR_INTEGER || format == GL_BGRA_INTEGER) { 97 pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, 0); 98 if (!pformat) 99 pformat = get_convert_format(ctx, src_format, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, need_bgra_swizzle); 100 assert(pformat); 101 *need_bgra_swizzle = true; 102 } else { 103 /* [signed,unsigned][bpp] */ 104 enum pipe_format rgb[5][2][5] = { 105 [1] = FORMAT(R8, R16, R32), 106 [2] = FORMAT(R8G8, R16G16, R32G32), 107 [3] = FORMAT(R8G8B8, R16G16B16, R32G32B32), 108 [4] = FORMAT(R8G8B8A8, R16G16B16A16, R32G32B32A32), 109 }; 110 pformat = rgb[dst_components][_mesa_is_type_unsigned(type)][bpp]; 111 } 112 assert(util_format_get_nr_components(pformat) == dst_components); 113 } 114 assert(pformat); 115 return pformat; 116} 117#undef BGR_FORMAT 118#undef FORMAT 119 120 121struct pbo_shader_data { 122 nir_ssa_def *offset; 123 nir_ssa_def *range; 124 nir_ssa_def *invert; 125 nir_ssa_def *blocksize; 126 nir_ssa_def *alignment; 127 nir_ssa_def *dst_bit_size; 128 nir_ssa_def *channels; 129 nir_ssa_def *normalized; 130 nir_ssa_def *integer; 131 nir_ssa_def *clamp_uint; 132 nir_ssa_def *r11g11b10_or_sint; 133 nir_ssa_def *r9g9b9e5; 134 nir_ssa_def *bits1; 135 nir_ssa_def *bits2; 136 nir_ssa_def *bits3; 137 nir_ssa_def *bits4; 138 nir_ssa_def *swap; 139 nir_ssa_def *bits; //vec4 140}; 141 142 143/* must be under 16bytes / sizeof(vec4) / 128 bits) */ 144struct pbo_data { 145 union { 146 struct { 147 struct { 148 uint16_t x, y; 149 }; 150 struct { 151 uint16_t width, height; 152 }; 153 struct { 154 uint16_t depth; 155 uint8_t invert : 1; 156 uint8_t blocksize : 7; 157 158 uint8_t clamp_uint : 1; 159 uint8_t r11g11b10_or_sint : 1; 160 uint8_t r9g9b9e5 : 1; 161 uint8_t swap : 1; 162 uint16_t alignment : 2; 163 uint8_t dst_bit_size : 2; //8, 16, 32, 64 164 }; 165 166 struct { 167 uint8_t channels : 2; 168 uint8_t bits1 : 6; 169 uint8_t normalized : 1; 170 uint8_t integer : 1; 171 uint8_t bits2 : 6; 172 uint8_t bits3 : 6; 173 uint8_t pad1 : 2; 174 uint8_t bits4 : 6; 175 uint8_t pad2 : 2; 176 }; 177 }; 178 float vec[4]; 179 }; 180}; 181 182 183#define STRUCT_OFFSET(name) (offsetof(struct pbo_data, name) * 8) 184 185#define STRUCT_BLOCK(offset, ...) \ 186 do { \ 187 assert(offset % 8 == 0); \ 188 nir_ssa_def *block##offset = nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, (offset), 1, 8)); \ 189 __VA_ARGS__ \ 190 } while (0) 191#define STRUCT_MEMBER(blockoffset, name, offset, size, op, clamp) \ 192 do { \ 193 assert(offset + size <= 8); \ 194 nir_ssa_def *val = nir_iand_imm(b, block##blockoffset, u_bit_consecutive(offset, size)); \ 195 if (offset) \ 196 val = nir_ushr_imm(b, val, offset); \ 197 sd->name = op; \ 198 if (clamp) \ 199 sd->name = nir_umin(b, sd->name, nir_imm_int(b, clamp)); \ 200 } while (0) 201#define STRUCT_MEMBER_SHIFTED_2BIT(blockoffset, name, offset, shift, clamp) \ 202 STRUCT_MEMBER(blockoffset, name, offset, 2, nir_ishl(b, nir_imm_int(b, shift), val), clamp) 203 204#define STRUCT_MEMBER_BOOL(blockoffset, name, offset) \ 205 STRUCT_MEMBER(blockoffset, name, offset, 1, nir_ieq_imm(b, val, 1), 0) 206 207/* this function extracts the conversion data from pbo_data using the 208 * size annotations for each grouping. data is compacted into bitfields, 209 * so bitwise operations must be used to "unpact" everything 210 */ 211static void 212init_pbo_shader_data(nir_builder *b, struct pbo_shader_data *sd) 213{ 214 nir_variable *ubo = nir_variable_create(b->shader, nir_var_uniform, glsl_uvec4_type(), "offset"); 215 nir_ssa_def *ubo_load = nir_load_var(b, ubo); 216 217 sd->offset = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(x), 2, 16)), nir_imm_int(b, 65535)); 218 sd->range = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(width), 3, 16)), nir_imm_int(b, 65535)); 219 220 STRUCT_BLOCK(80, 221 STRUCT_MEMBER_BOOL(80, invert, 0); 222 STRUCT_MEMBER(80, blocksize, 1, 7, nir_iadd_imm(b, val, 1), 128); 223 ); 224 225 STRUCT_BLOCK(88, 226 STRUCT_MEMBER_BOOL(88, clamp_uint, 0); 227 STRUCT_MEMBER_BOOL(88, r11g11b10_or_sint, 1); 228 STRUCT_MEMBER_BOOL(88, r9g9b9e5, 2); 229 STRUCT_MEMBER_BOOL(88, swap, 3); 230 STRUCT_MEMBER_SHIFTED_2BIT(88, alignment, 4, 1, 8); 231 STRUCT_MEMBER_SHIFTED_2BIT(88, dst_bit_size, 6, 8, 64); 232 ); 233 234 STRUCT_BLOCK(96, 235 STRUCT_MEMBER(96, channels, 0, 2, nir_iadd_imm(b, val, 1), 4); 236 STRUCT_MEMBER(96, bits1, 2, 6, val, 32); 237 ); 238 239 STRUCT_BLOCK(104, 240 STRUCT_MEMBER_BOOL(104, normalized, 0); 241 STRUCT_MEMBER_BOOL(104, integer, 1); 242 STRUCT_MEMBER(104, bits2, 2, 6, val, 32); 243 ); 244 245 246 STRUCT_BLOCK(112, 247 STRUCT_MEMBER(112, bits3, 0, 6, val, 32); 248 ); 249 250 STRUCT_BLOCK(120, 251 STRUCT_MEMBER(120, bits4, 0, 6, val, 32); 252 ); 253 sd->bits = nir_vec4(b, sd->bits1, sd->bits2, sd->bits3, sd->bits4); 254 255 /* clamp swap in the shader to enable better optimizing */ 256 /* TODO? 257 sd->swap = nir_bcsel(b, nir_ior(b, 258 nir_ieq_imm(b, sd->blocksize, 8), 259 nir_bcsel(b, 260 nir_ieq_imm(b, sd->bits1, 8), 261 nir_bcsel(b, 262 nir_uge(b, sd->channels, nir_imm_int(b, 2)), 263 nir_bcsel(b, 264 nir_uge(b, sd->channels, nir_imm_int(b, 3)), 265 nir_bcsel(b, 266 nir_ieq(b, sd->channels, nir_imm_int(b, 4)), 267 nir_ball(b, nir_ieq(b, sd->bits, nir_imm_ivec4(b, 8, 8, 8, 8))), 268 nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 7), nir_imm_ivec3(b, 8, 8, 8)))), 269 nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 3), nir_imm_ivec2(b, 8, 8)))), 270 nir_imm_bool(b, 0)), 271 nir_imm_bool(b, 0))), 272 nir_imm_bool(b, 0), 273 sd->swap); 274 */ 275} 276 277static unsigned 278fill_pbo_data(struct pbo_data *pd, enum pipe_format src_format, enum pipe_format dst_format, bool swap) 279{ 280 unsigned bits[4] = {0}; 281 bool weird_packed = false; 282 const struct util_format_description *dst_desc = util_format_description(dst_format); 283 bool is_8bit = true; 284 285 for (unsigned c = 0; c < 4; c++) { 286 bits[c] = dst_desc->channel[c].size; 287 if (c < dst_desc->nr_channels) { 288 weird_packed |= bits[c] != bits[0] || bits[c] % 8 != 0; 289 if (bits[c] != 8) 290 is_8bit = false; 291 } 292 } 293 294 if (is_8bit || dst_desc->block.bits == 8) 295 swap = false; 296 297 unsigned dst_bit_size = 0; 298 if (weird_packed) { 299 dst_bit_size = dst_desc->block.bits; 300 } else { 301 dst_bit_size = dst_desc->block.bits / dst_desc->nr_channels; 302 } 303 assert(dst_bit_size); 304 assert(dst_bit_size <= 64); 305 306 pd->dst_bit_size = dst_bit_size >> 4; 307 pd->channels = dst_desc->nr_channels - 1; 308 pd->normalized = dst_desc->is_unorm || dst_desc->is_snorm; 309 pd->clamp_uint = dst_desc->is_unorm || 310 (util_format_is_pure_sint(dst_format) && 311 !util_format_is_pure_sint(src_format) && 312 !util_format_is_snorm(src_format)) || 313 util_format_is_pure_uint(dst_format); 314 pd->integer = util_format_is_pure_uint(dst_format) || util_format_is_pure_sint(dst_format); 315 pd->r11g11b10_or_sint = dst_format == PIPE_FORMAT_R11G11B10_FLOAT || util_format_is_pure_sint(dst_format); 316 pd->r9g9b9e5 = dst_format == PIPE_FORMAT_R9G9B9E5_FLOAT; 317 pd->bits1 = bits[0]; 318 pd->bits2 = bits[1]; 319 pd->bits3 = bits[2]; 320 pd->bits4 = bits[3]; 321 pd->swap = swap; 322 323 return weird_packed ? 1 : dst_desc->nr_channels; 324} 325 326static nir_ssa_def * 327get_buffer_offset(nir_builder *b, nir_ssa_def *coord, struct pbo_shader_data *sd) 328{ 329/* from _mesa_image_offset(): 330 offset = topOfImage 331 + (skippixels + column) * bytes_per_pixel 332 + (skiprows + row) * bytes_per_row 333 + (skipimages + img) * bytes_per_image; 334 */ 335 nir_ssa_def *bytes_per_row = nir_imul(b, nir_channel(b, sd->range, 0), sd->blocksize); 336 bytes_per_row = nir_bcsel(b, nir_ult(b, sd->alignment, nir_imm_int(b, 2)), 337 bytes_per_row, 338 nir_iand(b, 339 nir_isub(b, nir_iadd(b, bytes_per_row, sd->alignment), nir_imm_int(b, 1)), 340 nir_inot(b, nir_isub(b, sd->alignment, nir_imm_int(b, 1))))); 341 nir_ssa_def *bytes_per_image = nir_imul(b, bytes_per_row, nir_channel(b, sd->range, 1)); 342 bytes_per_row = nir_bcsel(b, sd->invert, 343 nir_isub(b, nir_imm_int(b, 0), bytes_per_row), 344 bytes_per_row); 345 return nir_iadd(b, 346 nir_imul(b, nir_channel(b, coord, 0), sd->blocksize), 347 nir_iadd(b, 348 nir_imul(b, nir_channel(b, coord, 1), bytes_per_row), 349 nir_imul(b, nir_channel(b, coord, 2), bytes_per_image))); 350} 351 352static inline void 353write_ssbo(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset) 354{ 355 nir_store_ssbo(b, pixel, nir_imm_zero(b, 1, 32), buffer_offset, 356 .align_mul = pixel->bit_size / 8, 357 .write_mask = (1 << pixel->num_components) - 1); 358} 359 360static void 361write_conversion(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd) 362{ 363 nir_push_if(b, nir_ilt(b, sd->dst_bit_size, nir_imm_int(b, 32))); 364 nir_push_if(b, nir_ieq_imm(b, sd->dst_bit_size, 16)); 365 write_ssbo(b, nir_u2u16(b, pixel), buffer_offset); 366 nir_push_else(b, NULL); 367 write_ssbo(b, nir_u2u8(b, pixel), buffer_offset); 368 nir_pop_if(b, NULL); 369 nir_push_else(b, NULL); 370 write_ssbo(b, pixel, buffer_offset); 371 nir_pop_if(b, NULL); 372} 373 374static nir_ssa_def * 375swap2(nir_builder *b, nir_ssa_def *src) 376{ 377 /* dst[i] = (src[i] >> 8) | ((src[i] << 8) & 0xff00); */ 378 return nir_ior(b, 379 nir_ushr_imm(b, src, 8), 380 nir_iand_imm(b, nir_ishl(b, src, nir_imm_int(b, 8)), 0xff00)); 381} 382 383static nir_ssa_def * 384swap4(nir_builder *b, nir_ssa_def *src) 385{ 386 /* a = (b >> 24) | ((b >> 8) & 0xff00) | ((b << 8) & 0xff0000) | ((b << 24) & 0xff000000); */ 387 return nir_ior(b, 388 /* (b >> 24) */ 389 nir_ushr_imm(b, src, 24), 390 nir_ior(b, 391 /* ((b >> 8) & 0xff00) */ 392 nir_iand(b, nir_ushr_imm(b, src, 8), nir_imm_int(b, 0xff00)), 393 nir_ior(b, 394 /* ((b << 8) & 0xff0000) */ 395 nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 8)), nir_imm_int(b, 0xff0000)), 396 /* ((b << 24) & 0xff000000) */ 397 nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 24)), nir_imm_int(b, 0xff000000))))); 398} 399 400/* explode the cf to handle channel counts in the shader */ 401static void 402grab_components(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd, bool weird_packed) 403{ 404 if (weird_packed) { 405 nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32)); 406 write_conversion(b, nir_channels(b, pixel, 3), buffer_offset, sd); 407 nir_push_else(b, NULL); 408 write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd); 409 nir_pop_if(b, NULL); 410 } else { 411 nir_push_if(b, nir_ieq_imm(b, sd->channels, 1)); 412 write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd); 413 nir_push_else(b, NULL); 414 nir_push_if(b, nir_ieq_imm(b, sd->channels, 2)); 415 write_conversion(b, nir_channels(b, pixel, (1 << 2) - 1), buffer_offset, sd); 416 nir_push_else(b, NULL); 417 nir_push_if(b, nir_ieq_imm(b, sd->channels, 3)); 418 write_conversion(b, nir_channels(b, pixel, (1 << 3) - 1), buffer_offset, sd); 419 nir_push_else(b, NULL); 420 write_conversion(b, nir_channels(b, pixel, (1 << 4) - 1), buffer_offset, sd); 421 nir_pop_if(b, NULL); 422 nir_pop_if(b, NULL); 423 nir_pop_if(b, NULL); 424 } 425} 426 427/* if byteswap is enabled, handle that and then write the components */ 428static void 429handle_swap(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, 430 struct pbo_shader_data *sd, unsigned num_components, bool weird_packed) 431{ 432 nir_push_if(b, sd->swap); { 433 nir_push_if(b, nir_ieq_imm(b, nir_udiv_imm(b, sd->blocksize, num_components), 2)); { 434 /* this is a single high/low swap per component */ 435 nir_ssa_def *components[4]; 436 for (unsigned i = 0; i < 4; i++) 437 components[i] = swap2(b, nir_channel(b, pixel, i)); 438 nir_ssa_def *v = nir_vec(b, components, 4); 439 grab_components(b, v, buffer_offset, sd, weird_packed); 440 } nir_push_else(b, NULL); { 441 /* this is a pair of high/low swaps for each half of the component */ 442 nir_ssa_def *components[4]; 443 for (unsigned i = 0; i < 4; i++) 444 components[i] = swap4(b, nir_channel(b, pixel, i)); 445 nir_ssa_def *v = nir_vec(b, components, 4); 446 grab_components(b, v, buffer_offset, sd, weird_packed); 447 } nir_pop_if(b, NULL); 448 } nir_push_else(b, NULL); { 449 /* swap disabled */ 450 grab_components(b, pixel, buffer_offset, sd, weird_packed); 451 } nir_pop_if(b, NULL); 452} 453 454static nir_ssa_def * 455check_for_weird_packing(nir_builder *b, struct pbo_shader_data *sd, unsigned component) 456{ 457 nir_ssa_def *c = nir_channel(b, sd->bits, component - 1); 458 459 return nir_bcsel(b, 460 nir_ige(b, sd->channels, nir_imm_int(b, component)), 461 nir_ior(b, 462 nir_ine(b, c, sd->bits1), 463 nir_ine(b, nir_imod(b, c, nir_imm_int(b, 8)), nir_imm_int(b, 0))), 464 nir_imm_bool(b, 0)); 465} 466 467/* convenience function for clamping signed integers */ 468static inline nir_ssa_def * 469nir_imin_imax(nir_builder *build, nir_ssa_def *src, nir_ssa_def *clamp_to_min, nir_ssa_def *clamp_to_max) 470{ 471 return nir_imax(build, nir_imin(build, src, clamp_to_min), clamp_to_max); 472} 473 474static inline nir_ssa_def * 475nir_format_float_to_unorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor) 476{ 477 /* Clamp to the range [0, 1] */ 478 f = nir_fsat(b, f); 479 480 return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor))); 481} 482 483static inline nir_ssa_def * 484nir_format_float_to_snorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor) 485{ 486 /* Clamp to the range [-1, 1] */ 487 f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1)); 488 489 return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor))); 490} 491 492static nir_ssa_def * 493clamp_and_mask(nir_builder *b, nir_ssa_def *src, nir_ssa_def *channels) 494{ 495 nir_ssa_def *one = nir_imm_ivec4(b, 1, 0, 0, 0); 496 nir_ssa_def *two = nir_imm_ivec4(b, 1, 1, 0, 0); 497 nir_ssa_def *three = nir_imm_ivec4(b, 1, 1, 1, 0); 498 nir_ssa_def *four = nir_imm_ivec4(b, 1, 1, 1, 1); 499 /* avoid underflow by clamping to channel count */ 500 src = nir_bcsel(b, 501 nir_ieq(b, channels, one), 502 nir_isub(b, src, one), 503 nir_bcsel(b, 504 nir_ieq_imm(b, channels, 2), 505 nir_isub(b, src, two), 506 nir_bcsel(b, 507 nir_ieq_imm(b, channels, 3), 508 nir_isub(b, src, three), 509 nir_isub(b, src, four)))); 510 511 return nir_mask(b, src, 32); 512} 513 514static void 515convert_swap_write(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, 516 unsigned num_components, 517 struct pbo_shader_data *sd) 518{ 519 520 nir_ssa_def *weird_packed = nir_ior(b, 521 nir_ior(b, 522 check_for_weird_packing(b, sd, 4), 523 check_for_weird_packing(b, sd, 3)), 524 check_for_weird_packing(b, sd, 2)); 525 if (num_components == 1) { 526 nir_push_if(b, weird_packed); 527 nir_push_if(b, sd->r11g11b10_or_sint); 528 handle_swap(b, nir_pad_vec4(b, nir_format_pack_11f11f10f(b, pixel)), buffer_offset, sd, 1, true); 529 nir_push_else(b, NULL); 530 nir_push_if(b, sd->r9g9b9e5); 531 handle_swap(b, nir_pad_vec4(b, nir_format_pack_r9g9b9e5(b, pixel)), buffer_offset, sd, 1, true); 532 nir_push_else(b, NULL); 533 nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32)); { //PIPE_FORMAT_Z32_FLOAT_S8X24_UINT 534 nir_ssa_def *pack[2]; 535 pack[0] = nir_format_pack_uint_unmasked_ssa(b, nir_channel(b, pixel, 0), nir_channel(b, sd->bits, 0)); 536 pack[1] = nir_format_pack_uint_unmasked_ssa(b, nir_channels(b, pixel, 6), nir_channels(b, sd->bits, 6)); 537 handle_swap(b, nir_pad_vec4(b, nir_vec2(b, pack[0], pack[1])), buffer_offset, sd, 2, true); 538 } nir_push_else(b, NULL); 539 handle_swap(b, nir_pad_vec4(b, nir_format_pack_uint_unmasked_ssa(b, pixel, sd->bits)), buffer_offset, sd, 1, true); 540 nir_pop_if(b, NULL); 541 nir_pop_if(b, NULL); 542 nir_pop_if(b, NULL); 543 nir_push_else(b, NULL); 544 handle_swap(b, pixel, buffer_offset, sd, num_components, false); 545 nir_pop_if(b, NULL); 546 } else { 547 nir_push_if(b, weird_packed); 548 handle_swap(b, pixel, buffer_offset, sd, num_components, true); 549 nir_push_else(b, NULL); 550 handle_swap(b, pixel, buffer_offset, sd, num_components, false); 551 nir_pop_if(b, NULL); 552 } 553} 554 555static void 556do_shader_conversion(nir_builder *b, nir_ssa_def *pixel, 557 unsigned num_components, 558 nir_ssa_def *coord, struct pbo_shader_data *sd) 559{ 560 nir_ssa_def *buffer_offset = get_buffer_offset(b, coord, sd); 561 562 nir_ssa_def *signed_bit_mask = clamp_and_mask(b, sd->bits, sd->channels); 563 564#define CONVERT_SWAP_WRITE(PIXEL) \ 565 convert_swap_write(b, PIXEL, buffer_offset, num_components, sd); 566 nir_push_if(b, sd->normalized); 567 nir_push_if(b, sd->clamp_uint); //unorm 568 CONVERT_SWAP_WRITE(nir_format_float_to_unorm_with_factor(b, pixel, nir_u2f32(b, nir_mask(b, sd->bits, 32)))); 569 nir_push_else(b, NULL); 570 CONVERT_SWAP_WRITE(nir_format_float_to_snorm_with_factor(b, pixel, nir_u2f32(b, signed_bit_mask))); 571 nir_pop_if(b, NULL); 572 nir_push_else(b, NULL); 573 nir_push_if(b, sd->integer); 574 nir_push_if(b, sd->r11g11b10_or_sint); //sint 575 nir_push_if(b, sd->clamp_uint); //uint -> sint 576 CONVERT_SWAP_WRITE(nir_umin(b, pixel, signed_bit_mask)); 577 nir_push_else(b, NULL); 578 CONVERT_SWAP_WRITE(nir_imin_imax(b, pixel, signed_bit_mask, nir_isub(b, nir_ineg(b, signed_bit_mask), nir_imm_int(b, 1)))); 579 nir_pop_if(b, NULL); 580 nir_push_else(b, NULL); 581 nir_push_if(b, sd->clamp_uint); //uint 582 /* nir_format_clamp_uint */ 583 CONVERT_SWAP_WRITE(nir_umin(b, pixel, nir_mask(b, sd->bits, 32))); 584 nir_pop_if(b, NULL); 585 nir_pop_if(b, NULL); 586 nir_push_else(b, NULL); 587 nir_push_if(b, nir_ieq_imm(b, sd->bits1, 16)); //half 588 CONVERT_SWAP_WRITE(nir_format_float_to_half(b, pixel)); 589 nir_push_else(b, NULL); 590 CONVERT_SWAP_WRITE(pixel); 591 nir_pop_if(b, NULL); 592 nir_pop_if(b, NULL); 593} 594 595static void * 596create_conversion_shader(struct st_context *st, enum pipe_texture_target target, unsigned num_components) 597{ 598 const nir_shader_compiler_options *options = st_get_nir_compiler_options(st, MESA_SHADER_COMPUTE); 599 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "%s", "convert"); 600 b.shader->info.workgroup_size[0] = target != PIPE_TEXTURE_1D ? 8 : 64; 601 b.shader->info.workgroup_size[1] = target != PIPE_TEXTURE_1D ? 8 : 1; 602 603 b.shader->info.workgroup_size[2] = 1; 604 b.shader->info.textures_used[0] = 1; 605 b.shader->info.num_ssbos = 1; 606 b.shader->num_uniforms = 2; 607 nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(glsl_float_type(), 0, 4), "ssbo"); 608 nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, st_pbo_sampler_type_for_target(target, ST_PBO_CONVERT_FLOAT), "sampler"); 609 unsigned coord_components = glsl_get_sampler_coordinate_components(sampler->type); 610 sampler->data.explicit_binding = 1; 611 612 struct pbo_shader_data sd; 613 init_pbo_shader_data(&b, &sd); 614 615 nir_ssa_def *bsize = nir_imm_ivec4(&b, 616 b.shader->info.workgroup_size[0], 617 b.shader->info.workgroup_size[1], 618 b.shader->info.workgroup_size[2], 619 0); 620 nir_ssa_def *wid = nir_load_workgroup_id(&b, 32); 621 nir_ssa_def *iid = nir_load_local_invocation_id(&b); 622 nir_ssa_def *tile = nir_imul(&b, wid, bsize); 623 nir_ssa_def *global_id = nir_iadd(&b, tile, iid); 624 nir_ssa_def *start = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), sd.offset); 625 626 nir_ssa_def *coord; 627 if (coord_components < 3) 628 coord = start; 629 else { 630 /* pad offset vec with global_id to get correct z offset */ 631 assert(coord_components == 3); 632 coord = nir_vec3(&b, nir_channel(&b, start, 0), 633 nir_channel(&b, start, 1), 634 nir_channel(&b, global_id, 2)); 635 } 636 coord = nir_trim_vector(&b, coord, coord_components); 637 nir_ssa_def *offset = coord_components > 2 ? 638 nir_pad_vector_imm_int(&b, sd.offset, 0, 3) : 639 nir_trim_vector(&b, sd.offset, coord_components); 640 nir_ssa_def *range = nir_trim_vector(&b, sd.range, coord_components); 641 nir_ssa_def *max = nir_iadd(&b, offset, range); 642 nir_push_if(&b, nir_ball(&b, nir_ilt(&b, coord, max))); 643 nir_tex_instr *txf = nir_tex_instr_create(b.shader, 3); 644 txf->is_array = glsl_sampler_type_is_array(sampler->type); 645 txf->op = nir_texop_txf; 646 txf->sampler_dim = glsl_get_sampler_dim(sampler->type); 647 txf->dest_type = nir_type_float32; 648 txf->coord_components = coord_components; 649 txf->texture_index = 0; 650 txf->sampler_index = 0; 651 txf->src[0].src_type = nir_tex_src_coord; 652 txf->src[0].src = nir_src_for_ssa(coord); 653 txf->src[1].src_type = nir_tex_src_lod; 654 txf->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 655 txf->src[2].src_type = nir_tex_src_texture_deref; 656 nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler); 657 txf->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa); 658 659 nir_ssa_dest_init(&txf->instr, &txf->dest, 4, 32, NULL); 660 nir_builder_instr_insert(&b, &txf->instr); 661 662 /* pass the grid offset as the coord to get the zero-indexed buffer offset */ 663 do_shader_conversion(&b, &txf->dest.ssa, num_components, global_id, &sd); 664 665 nir_pop_if(&b, NULL); 666 667 nir_validate_shader(b.shader, NULL); 668 gl_nir_opts(b.shader); 669 return st_nir_finish_builtin_shader(st, b.shader); 670} 671 672static void 673invert_swizzle(uint8_t *out, const uint8_t *in) 674{ 675 /* First, default to all zeroes to prevent uninitialized junk */ 676 for (unsigned c = 0; c < 4; ++c) 677 out[c] = PIPE_SWIZZLE_0; 678 679 /* Now "do" what the swizzle says */ 680 for (unsigned c = 0; c < 4; ++c) { 681 unsigned char i = in[c]; 682 683 /* Who cares? */ 684 assert(PIPE_SWIZZLE_X == 0); 685 if (i > PIPE_SWIZZLE_W) 686 continue; 687 /* Invert */ 688 unsigned idx = i - PIPE_SWIZZLE_X; 689 out[idx] = PIPE_SWIZZLE_X + c; 690 } 691} 692 693static uint32_t 694compute_shader_key(enum pipe_texture_target target, unsigned num_components) 695{ 696 uint8_t key_target[] = { 697 [PIPE_BUFFER] = UINT8_MAX, 698 [PIPE_TEXTURE_1D] = 1, 699 [PIPE_TEXTURE_2D] = 2, 700 [PIPE_TEXTURE_3D] = 3, 701 [PIPE_TEXTURE_CUBE] = 4, 702 [PIPE_TEXTURE_RECT] = UINT8_MAX, 703 [PIPE_TEXTURE_1D_ARRAY] = 5, 704 [PIPE_TEXTURE_2D_ARRAY] = 6, 705 [PIPE_TEXTURE_CUBE_ARRAY] = UINT8_MAX, 706 }; 707 assert(target < ARRAY_SIZE(key_target)); 708 assert(key_target[target] != UINT8_MAX); 709 return key_target[target] | (num_components << 3); 710} 711 712static unsigned 713get_dim_from_target(enum pipe_texture_target target) 714{ 715 switch (target) { 716 case PIPE_TEXTURE_1D: 717 return 1; 718 case PIPE_TEXTURE_2D_ARRAY: 719 case PIPE_TEXTURE_3D: 720 return 3; 721 default: 722 return 2; 723 } 724} 725 726static enum pipe_texture_target 727get_target_from_texture(struct pipe_resource *src) 728{ 729 enum pipe_texture_target view_target; 730 switch (src->target) { 731 case PIPE_TEXTURE_RECT: 732 view_target = PIPE_TEXTURE_2D; 733 break; 734 case PIPE_TEXTURE_CUBE: 735 case PIPE_TEXTURE_CUBE_ARRAY: 736 view_target = PIPE_TEXTURE_2D_ARRAY; 737 break; 738 default: 739 view_target = src->target; 740 break; 741 } 742 return view_target; 743} 744 745/* force swizzling behavior for sampling */ 746enum swizzle_clamp { 747 /* force component selection for named format */ 748 SWIZZLE_CLAMP_LUMINANCE = 1, 749 SWIZZLE_CLAMP_ALPHA = 2, 750 SWIZZLE_CLAMP_LUMINANCE_ALPHA = 3, 751 SWIZZLE_CLAMP_INTENSITY = 4, 752 SWIZZLE_CLAMP_RGBX = 5, 753 754 /* select only 1 component */ 755 SWIZZLE_CLAMP_GREEN = 8, 756 SWIZZLE_CLAMP_BLUE = 16, 757 758 /* reverse ordering for format emulation */ 759 SWIZZLE_CLAMP_BGRA = 32, 760}; 761 762static bool 763can_copy_direct(const struct gl_pixelstore_attrib *pack) 764{ 765 return !(pack->RowLength || 766 pack->SkipPixels || 767 pack->SkipRows || 768 pack->ImageHeight || 769 pack->SkipImages); 770} 771 772static struct pipe_resource * 773download_texture_compute(struct st_context *st, 774 const struct gl_pixelstore_attrib *pack, 775 GLint xoffset, GLint yoffset, GLint zoffset, 776 GLsizei width, GLsizei height, GLint depth, 777 unsigned level, unsigned layer, 778 GLenum format, GLenum type, 779 enum pipe_format src_format, 780 enum pipe_texture_target view_target, 781 struct pipe_resource *src, 782 enum pipe_format dst_format, 783 enum swizzle_clamp swizzle_clamp) 784{ 785 struct pipe_context *pipe = st->pipe; 786 struct pipe_screen *screen = st->screen; 787 struct pipe_resource *dst = NULL; 788 unsigned dim = get_dim_from_target(view_target); 789 790 /* clamp 3d offsets based on slice */ 791 if (view_target == PIPE_TEXTURE_3D) 792 zoffset += layer; 793 794 unsigned num_components = 0; 795 /* Upload constants */ 796 { 797 struct pipe_constant_buffer cb; 798 assert(view_target != PIPE_TEXTURE_1D_ARRAY || !zoffset); 799 struct pbo_data pd = { 800 .x = xoffset, 801 .y = view_target == PIPE_TEXTURE_1D_ARRAY ? 0 : yoffset, 802 .width = width, .height = height, .depth = depth, 803 .invert = pack->Invert, 804 .blocksize = util_format_get_blocksize(dst_format) - 1, 805 .alignment = ffs(MAX2(pack->Alignment, 1)) - 1, 806 }; 807 num_components = fill_pbo_data(&pd, src_format, dst_format, pack->SwapBytes == 1); 808 809 cb.buffer = NULL; 810 cb.user_buffer = &pd; 811 cb.buffer_offset = 0; 812 cb.buffer_size = sizeof(pd); 813 814 pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &cb); 815 } 816 817 uint32_t hash_key = compute_shader_key(view_target, num_components); 818 assert(hash_key != 0); 819 820 struct hash_entry *he = _mesa_hash_table_search(st->pbo.shaders, (void*)(uintptr_t)hash_key); 821 void *cs; 822 if (!he) { 823 cs = create_conversion_shader(st, view_target, num_components); 824 he = _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, cs); 825 } 826 cs = he->data; 827 assert(cs); 828 struct cso_context *cso = st->cso_context; 829 830 cso_save_compute_state(cso, CSO_BIT_COMPUTE_SHADER | CSO_BIT_COMPUTE_SAMPLERS); 831 cso_set_compute_shader_handle(cso, cs); 832 833 /* Set up the sampler_view */ 834 { 835 struct pipe_sampler_view templ; 836 struct pipe_sampler_view *sampler_view; 837 struct pipe_sampler_state sampler = {0}; 838 sampler.normalized_coords = true; 839 const struct pipe_sampler_state *samplers[1] = {&sampler}; 840 const struct util_format_description *desc = util_format_description(dst_format); 841 842 u_sampler_view_default_template(&templ, src, src_format); 843 if (util_format_is_depth_or_stencil(dst_format)) { 844 templ.swizzle_r = PIPE_SWIZZLE_X; 845 templ.swizzle_g = PIPE_SWIZZLE_X; 846 templ.swizzle_b = PIPE_SWIZZLE_X; 847 templ.swizzle_a = PIPE_SWIZZLE_X; 848 } else { 849 uint8_t invswizzle[4]; 850 const uint8_t *swizzle; 851 852 /* these swizzle output bits require explicit component selection/ordering */ 853 if (swizzle_clamp & SWIZZLE_CLAMP_GREEN) { 854 for (unsigned i = 0; i < 4; i++) 855 invswizzle[i] = PIPE_SWIZZLE_Y; 856 } else if (swizzle_clamp & SWIZZLE_CLAMP_BLUE) { 857 for (unsigned i = 0; i < 4; i++) 858 invswizzle[i] = PIPE_SWIZZLE_Z; 859 } else { 860 if (swizzle_clamp & SWIZZLE_CLAMP_BGRA) { 861 if (util_format_get_nr_components(dst_format) == 3) 862 swizzle = util_format_description(PIPE_FORMAT_B8G8R8_UNORM)->swizzle; 863 else 864 swizzle = util_format_description(PIPE_FORMAT_B8G8R8A8_UNORM)->swizzle; 865 } else { 866 swizzle = desc->swizzle; 867 } 868 invert_swizzle(invswizzle, swizzle); 869 } 870 swizzle_clamp &= ~(SWIZZLE_CLAMP_BGRA | SWIZZLE_CLAMP_GREEN | SWIZZLE_CLAMP_BLUE); 871 872 /* these swizzle input modes clamp unused components to 0 and (sometimes) alpha to 1 */ 873 switch (swizzle_clamp) { 874 case SWIZZLE_CLAMP_LUMINANCE: 875 if (util_format_is_luminance(dst_format)) 876 break; 877 for (unsigned i = 0; i < 4; i++) { 878 if (invswizzle[i] != PIPE_SWIZZLE_X) 879 invswizzle[i] = invswizzle[i] == PIPE_SWIZZLE_W ? PIPE_SWIZZLE_1 : PIPE_SWIZZLE_0; 880 } 881 break; 882 case SWIZZLE_CLAMP_ALPHA: 883 for (unsigned i = 0; i < 4; i++) { 884 if (invswizzle[i] != PIPE_SWIZZLE_W) 885 invswizzle[i] = PIPE_SWIZZLE_0; 886 } 887 break; 888 case SWIZZLE_CLAMP_LUMINANCE_ALPHA: 889 if (util_format_is_luminance_alpha(dst_format)) 890 break; 891 for (unsigned i = 0; i < 4; i++) { 892 if (invswizzle[i] != PIPE_SWIZZLE_X && invswizzle[i] != PIPE_SWIZZLE_W) 893 invswizzle[i] = PIPE_SWIZZLE_0; 894 } 895 break; 896 case SWIZZLE_CLAMP_INTENSITY: 897 for (unsigned i = 0; i < 4; i++) { 898 if (invswizzle[i] == PIPE_SWIZZLE_W) 899 invswizzle[i] = PIPE_SWIZZLE_1; 900 else if (invswizzle[i] != PIPE_SWIZZLE_X) 901 invswizzle[i] = PIPE_SWIZZLE_0; 902 } 903 break; 904 case SWIZZLE_CLAMP_RGBX: 905 for (unsigned i = 0; i < 4; i++) { 906 if (invswizzle[i] == PIPE_SWIZZLE_W) 907 invswizzle[i] = PIPE_SWIZZLE_1; 908 } 909 break; 910 default: break; 911 } 912 templ.swizzle_r = invswizzle[0]; 913 templ.swizzle_g = invswizzle[1]; 914 templ.swizzle_b = invswizzle[2]; 915 templ.swizzle_a = invswizzle[3]; 916 } 917 templ.target = view_target; 918 templ.u.tex.first_level = level; 919 templ.u.tex.last_level = level; 920 921 /* array textures expect to have array index provided */ 922 if (view_target != PIPE_TEXTURE_3D && src->array_size) { 923 templ.u.tex.first_layer = layer; 924 if (view_target == PIPE_TEXTURE_1D_ARRAY) { 925 templ.u.tex.first_layer += yoffset; 926 templ.u.tex.last_layer = templ.u.tex.first_layer + height - 1; 927 } else { 928 templ.u.tex.first_layer += zoffset; 929 templ.u.tex.last_layer = templ.u.tex.first_layer + depth - 1; 930 } 931 } 932 933 sampler_view = pipe->create_sampler_view(pipe, src, &templ); 934 if (sampler_view == NULL) 935 goto fail; 936 937 pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, false, 938 &sampler_view); 939 st->state.num_sampler_views[PIPE_SHADER_COMPUTE] = 940 MAX2(st->state.num_sampler_views[PIPE_SHADER_COMPUTE], 1); 941 942 pipe_sampler_view_reference(&sampler_view, NULL); 943 944 cso_set_samplers(cso, PIPE_SHADER_COMPUTE, 1, samplers); 945 } 946 947 /* Set up destination buffer */ 948 unsigned img_stride = src->target == PIPE_TEXTURE_3D || 949 src->target == PIPE_TEXTURE_2D_ARRAY || 950 src->target == PIPE_TEXTURE_CUBE_ARRAY ? 951 /* only use image stride for 3d images to avoid pulling in IMAGE_HEIGHT pixelstore */ 952 _mesa_image_image_stride(pack, width, height, format, type) : 953 _mesa_image_row_stride(pack, width, format, type) * height; 954 unsigned buffer_size = (depth + (dim == 3 ? pack->SkipImages : 0)) * img_stride; 955 { 956 struct pipe_shader_buffer buffer; 957 memset(&buffer, 0, sizeof(buffer)); 958 if (can_copy_direct(pack) && pack->BufferObj) { 959 dst = pack->BufferObj->buffer; 960 assert(pack->BufferObj->Size >= buffer_size); 961 } else { 962 dst = pipe_buffer_create(screen, PIPE_BIND_SHADER_BUFFER, PIPE_USAGE_STAGING, buffer_size); 963 if (!dst) 964 goto fail; 965 } 966 buffer.buffer = dst; 967 buffer.buffer_size = buffer_size; 968 969 pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, &buffer, 0x1); 970 } 971 972 struct pipe_grid_info info = { 0 }; 973 info.block[0] = src->target != PIPE_TEXTURE_1D ? 8 : 64; 974 info.block[1] = src->target != PIPE_TEXTURE_1D ? 8 : 1; 975 info.last_block[0] = width % info.block[0]; 976 info.last_block[1] = height % info.block[1]; 977 info.block[2] = 1; 978 info.grid[0] = DIV_ROUND_UP(width, info.block[0]); 979 info.grid[1] = DIV_ROUND_UP(height, info.block[1]); 980 info.grid[2] = depth; 981 982 pipe->launch_grid(pipe, &info); 983 984fail: 985 cso_restore_compute_state(cso); 986 987 /* Unbind all because st/mesa won't do it if the current shader doesn't 988 * use them. 989 */ 990 pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0, 991 st->state.num_sampler_views[PIPE_SHADER_COMPUTE], 992 false, NULL); 993 st->state.num_sampler_views[PIPE_SHADER_COMPUTE] = 0; 994 pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL, 0); 995 996 st->dirty |= ST_NEW_CS_CONSTANTS | 997 ST_NEW_CS_SSBOS | 998 ST_NEW_CS_SAMPLER_VIEWS; 999 1000 return dst; 1001} 1002 1003static void 1004copy_converted_buffer(struct gl_context * ctx, 1005 struct gl_pixelstore_attrib *pack, 1006 enum pipe_texture_target view_target, 1007 struct pipe_resource *dst, enum pipe_format dst_format, 1008 GLint xoffset, GLint yoffset, GLint zoffset, 1009 GLsizei width, GLsizei height, GLint depth, 1010 GLenum format, GLenum type, void *pixels) 1011{ 1012 struct pipe_transfer *xfer; 1013 struct st_context *st = st_context(ctx); 1014 unsigned dim = get_dim_from_target(view_target); 1015 uint8_t *map = pipe_buffer_map(st->pipe, dst, PIPE_MAP_READ | PIPE_MAP_ONCE, &xfer); 1016 if (!map) 1017 return; 1018 1019 pixels = _mesa_map_pbo_dest(ctx, pack, pixels); 1020 /* compute shader doesn't handle these to cut down on uniform size */ 1021 if (!can_copy_direct(pack)) { 1022 if (view_target == PIPE_TEXTURE_1D_ARRAY) { 1023 depth = height; 1024 height = 1; 1025 zoffset = yoffset; 1026 yoffset = 0; 1027 } 1028 struct gl_pixelstore_attrib packing = *pack; 1029 memset(&packing.RowLength, 0, offsetof(struct gl_pixelstore_attrib, SwapBytes) - offsetof(struct gl_pixelstore_attrib, RowLength)); 1030 for (unsigned z = 0; z < depth; z++) { 1031 for (unsigned y = 0; y < height; y++) { 1032 GLubyte *dst = _mesa_image_address(dim, pack, pixels, 1033 width, height, format, type, 1034 z, y, 0); 1035 GLubyte *srcpx = _mesa_image_address(dim, &packing, map, 1036 width, height, format, type, 1037 z, y, 0); 1038 util_streaming_load_memcpy(dst, srcpx, util_format_get_stride(dst_format, width)); 1039 } 1040 } 1041 } else { 1042 /* direct copy for all other cases */ 1043 util_streaming_load_memcpy(pixels, map, dst->width0); 1044 } 1045 1046 _mesa_unmap_pbo_dest(ctx, pack); 1047 pipe_buffer_unmap(st->pipe, xfer); 1048} 1049 1050bool 1051st_GetTexSubImage_shader(struct gl_context * ctx, 1052 GLint xoffset, GLint yoffset, GLint zoffset, 1053 GLsizei width, GLsizei height, GLint depth, 1054 GLenum format, GLenum type, void * pixels, 1055 struct gl_texture_image *texImage) 1056{ 1057 struct st_context *st = st_context(ctx); 1058 struct pipe_screen *screen = st->screen; 1059 struct gl_texture_object *stObj = texImage->TexObject; 1060 struct pipe_resource *src = texImage->pt; 1061 struct pipe_resource *dst = NULL; 1062 enum pipe_format dst_format, src_format; 1063 unsigned level = (texImage->pt != stObj->pt ? 0 : texImage->Level) + texImage->TexObject->Attrib.MinLevel; 1064 unsigned layer = texImage->Face + texImage->TexObject->Attrib.MinLayer; 1065 enum pipe_texture_target view_target; 1066 1067 assert(!_mesa_is_format_etc2(texImage->TexFormat) && 1068 !_mesa_is_format_astc_2d(texImage->TexFormat) && 1069 texImage->TexFormat != MESA_FORMAT_ETC1_RGB8); 1070 1071 /* See if the texture format already matches the format and type, 1072 * in which case the memcpy-based fast path will be used. */ 1073 if (_mesa_format_matches_format_and_type(texImage->TexFormat, format, 1074 type, ctx->Pack.SwapBytes, NULL)) { 1075 return false; 1076 } 1077 enum swizzle_clamp swizzle_clamp = 0; 1078 src_format = st_pbo_get_src_format(screen, stObj->surface_based ? stObj->surface_format : src->format, src); 1079 if (src_format == PIPE_FORMAT_NONE) 1080 return false; 1081 1082 if (texImage->_BaseFormat != _mesa_get_format_base_format(texImage->TexFormat)) { 1083 /* special handling for drivers that don't support these formats natively */ 1084 if (texImage->_BaseFormat == GL_LUMINANCE) 1085 swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE; 1086 else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA) 1087 swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE_ALPHA; 1088 else if (texImage->_BaseFormat == GL_ALPHA) 1089 swizzle_clamp = SWIZZLE_CLAMP_ALPHA; 1090 else if (texImage->_BaseFormat == GL_INTENSITY) 1091 swizzle_clamp = SWIZZLE_CLAMP_INTENSITY; 1092 else if (texImage->_BaseFormat == GL_RGB) 1093 swizzle_clamp = SWIZZLE_CLAMP_RGBX; 1094 } 1095 1096 dst_format = st_pbo_get_dst_format(ctx, PIPE_BUFFER, src_format, false, format, type, 0); 1097 1098 if (dst_format == PIPE_FORMAT_NONE) { 1099 bool need_bgra_swizzle = false; 1100 dst_format = get_convert_format(ctx, src_format, format, type, &need_bgra_swizzle); 1101 if (dst_format == PIPE_FORMAT_NONE) 1102 return false; 1103 /* special swizzling for component selection */ 1104 if (need_bgra_swizzle) 1105 swizzle_clamp |= SWIZZLE_CLAMP_BGRA; 1106 else if (format == GL_GREEN_INTEGER) 1107 swizzle_clamp |= SWIZZLE_CLAMP_GREEN; 1108 else if (format == GL_BLUE_INTEGER) 1109 swizzle_clamp |= SWIZZLE_CLAMP_BLUE; 1110 } 1111 1112 /* check with the driver to see if memcpy is likely to be faster */ 1113 if (!screen->is_compute_copy_faster(screen, src_format, dst_format, width, height, depth, true)) 1114 return false; 1115 1116 view_target = get_target_from_texture(src); 1117 /* I don't know why this works 1118 * only for the texture rects 1119 * but that's how it is 1120 */ 1121 if ((src->target != PIPE_TEXTURE_RECT && 1122 /* this would need multiple samplerviews */ 1123 ((util_format_is_depth_and_stencil(src_format) && util_format_is_depth_and_stencil(dst_format)) || 1124 /* these format just doesn't work and science can't explain why */ 1125 dst_format == PIPE_FORMAT_Z32_FLOAT)) || 1126 /* L8 -> L32_FLOAT is another thinker */ 1127 (!util_format_is_float(src_format) && dst_format == PIPE_FORMAT_L32_FLOAT)) 1128 return false; 1129 1130 dst = download_texture_compute(st, &ctx->Pack, xoffset, yoffset, zoffset, width, height, depth, 1131 level, layer, format, type, src_format, view_target, src, dst_format, 1132 swizzle_clamp); 1133 1134 if (!can_copy_direct(&ctx->Pack) || !ctx->Pack.BufferObj) { 1135 copy_converted_buffer(ctx, &ctx->Pack, view_target, dst, dst_format, xoffset, yoffset, zoffset, 1136 width, height, depth, format, type, pixels); 1137 1138 pipe_resource_reference(&dst, NULL); 1139 } 1140 1141 return true; 1142} 1143 1144