1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Mike Blumenkrantz <michael.blumenkrantz@gmail.com> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <stdbool.h> 28bf215546Sopenharmony_ci#include "main/image.h" 29bf215546Sopenharmony_ci#include "main/pbo.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "state_tracker/st_nir.h" 32bf215546Sopenharmony_ci#include "state_tracker/st_format.h" 33bf215546Sopenharmony_ci#include "state_tracker/st_pbo.h" 34bf215546Sopenharmony_ci#include "state_tracker/st_texture.h" 35bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 36bf215546Sopenharmony_ci#include "compiler/nir/nir_format_convert.h" 37bf215546Sopenharmony_ci#include "compiler/glsl/gl_nir.h" 38bf215546Sopenharmony_ci#include "compiler/glsl/gl_nir_linker.h" 39bf215546Sopenharmony_ci#include "util/u_sampler.h" 40bf215546Sopenharmony_ci#include "util/streaming-load-memcpy.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci#define BGR_FORMAT(NAME) \ 43bf215546Sopenharmony_ci {{ \ 44bf215546Sopenharmony_ci [0] = PIPE_FORMAT_##NAME##_SNORM, \ 45bf215546Sopenharmony_ci [1] = PIPE_FORMAT_##NAME##_SINT, \ 46bf215546Sopenharmony_ci }, \ 47bf215546Sopenharmony_ci { \ 48bf215546Sopenharmony_ci [0] = PIPE_FORMAT_##NAME##_UNORM, \ 49bf215546Sopenharmony_ci [1] = PIPE_FORMAT_##NAME##_UINT, \ 50bf215546Sopenharmony_ci }} 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci#define FORMAT(NAME, NAME16, NAME32) \ 53bf215546Sopenharmony_ci {{ \ 54bf215546Sopenharmony_ci [1] = PIPE_FORMAT_##NAME##_SNORM, \ 55bf215546Sopenharmony_ci [2] = PIPE_FORMAT_##NAME16##_SNORM, \ 56bf215546Sopenharmony_ci [4] = PIPE_FORMAT_##NAME32##_SNORM, \ 57bf215546Sopenharmony_ci }, \ 58bf215546Sopenharmony_ci { \ 59bf215546Sopenharmony_ci [1] = PIPE_FORMAT_##NAME##_UNORM, \ 60bf215546Sopenharmony_ci [2] = PIPE_FORMAT_##NAME16##_UNORM, \ 61bf215546Sopenharmony_ci [4] = PIPE_FORMAT_##NAME32##_UNORM, \ 62bf215546Sopenharmony_ci }} 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci/* don't try these at home */ 65bf215546Sopenharmony_cistatic enum pipe_format 66bf215546Sopenharmony_ciget_convert_format(struct gl_context *ctx, 67bf215546Sopenharmony_ci enum pipe_format src_format, 68bf215546Sopenharmony_ci GLenum format, GLenum type, 69bf215546Sopenharmony_ci bool *need_bgra_swizzle) 70bf215546Sopenharmony_ci{ 71bf215546Sopenharmony_ci struct st_context *st = st_context(ctx); 72bf215546Sopenharmony_ci GLint bpp = _mesa_bytes_per_pixel(format, type); 73bf215546Sopenharmony_ci if (_mesa_is_depth_format(format) || 74bf215546Sopenharmony_ci format == GL_GREEN_INTEGER || 75bf215546Sopenharmony_ci format == GL_BLUE_INTEGER) { 76bf215546Sopenharmony_ci switch (bpp) { 77bf215546Sopenharmony_ci case 1: 78bf215546Sopenharmony_ci return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R8_UINT : PIPE_FORMAT_R8_SINT; 79bf215546Sopenharmony_ci case 2: 80bf215546Sopenharmony_ci return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R16_UINT : PIPE_FORMAT_R16_SINT; 81bf215546Sopenharmony_ci case 4: 82bf215546Sopenharmony_ci return _mesa_is_type_unsigned(type) ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R32_SINT; 83bf215546Sopenharmony_ci } 84bf215546Sopenharmony_ci } 85bf215546Sopenharmony_ci mesa_format mformat = _mesa_tex_format_from_format_and_type(ctx, format, type); 86bf215546Sopenharmony_ci enum pipe_format pformat = st_mesa_format_to_pipe_format(st, mformat); 87bf215546Sopenharmony_ci if (!pformat) { 88bf215546Sopenharmony_ci GLint dst_components = _mesa_components_in_format(format); 89bf215546Sopenharmony_ci bpp /= dst_components; 90bf215546Sopenharmony_ci if (format == GL_BGR || format == GL_BGRA) { 91bf215546Sopenharmony_ci pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR ? GL_RGB : GL_RGBA, type, 0); 92bf215546Sopenharmony_ci if (!pformat) 93bf215546Sopenharmony_ci pformat = get_convert_format(ctx, src_format, format == GL_BGR ? GL_RGB : GL_RGBA, type, need_bgra_swizzle); 94bf215546Sopenharmony_ci assert(pformat); 95bf215546Sopenharmony_ci *need_bgra_swizzle = true; 96bf215546Sopenharmony_ci } else if (format == GL_BGR_INTEGER || format == GL_BGRA_INTEGER) { 97bf215546Sopenharmony_ci pformat = st_pbo_get_dst_format(ctx, PIPE_TEXTURE_2D, src_format, false, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, 0); 98bf215546Sopenharmony_ci if (!pformat) 99bf215546Sopenharmony_ci pformat = get_convert_format(ctx, src_format, format == GL_BGR_INTEGER ? GL_RGB_INTEGER : GL_RGBA_INTEGER, type, need_bgra_swizzle); 100bf215546Sopenharmony_ci assert(pformat); 101bf215546Sopenharmony_ci *need_bgra_swizzle = true; 102bf215546Sopenharmony_ci } else { 103bf215546Sopenharmony_ci /* [signed,unsigned][bpp] */ 104bf215546Sopenharmony_ci enum pipe_format rgb[5][2][5] = { 105bf215546Sopenharmony_ci [1] = FORMAT(R8, R16, R32), 106bf215546Sopenharmony_ci [2] = FORMAT(R8G8, R16G16, R32G32), 107bf215546Sopenharmony_ci [3] = FORMAT(R8G8B8, R16G16B16, R32G32B32), 108bf215546Sopenharmony_ci [4] = FORMAT(R8G8B8A8, R16G16B16A16, R32G32B32A32), 109bf215546Sopenharmony_ci }; 110bf215546Sopenharmony_ci pformat = rgb[dst_components][_mesa_is_type_unsigned(type)][bpp]; 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci assert(util_format_get_nr_components(pformat) == dst_components); 113bf215546Sopenharmony_ci } 114bf215546Sopenharmony_ci assert(pformat); 115bf215546Sopenharmony_ci return pformat; 116bf215546Sopenharmony_ci} 117bf215546Sopenharmony_ci#undef BGR_FORMAT 118bf215546Sopenharmony_ci#undef FORMAT 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_cistruct pbo_shader_data { 122bf215546Sopenharmony_ci nir_ssa_def *offset; 123bf215546Sopenharmony_ci nir_ssa_def *range; 124bf215546Sopenharmony_ci nir_ssa_def *invert; 125bf215546Sopenharmony_ci nir_ssa_def *blocksize; 126bf215546Sopenharmony_ci nir_ssa_def *alignment; 127bf215546Sopenharmony_ci nir_ssa_def *dst_bit_size; 128bf215546Sopenharmony_ci nir_ssa_def *channels; 129bf215546Sopenharmony_ci nir_ssa_def *normalized; 130bf215546Sopenharmony_ci nir_ssa_def *integer; 131bf215546Sopenharmony_ci nir_ssa_def *clamp_uint; 132bf215546Sopenharmony_ci nir_ssa_def *r11g11b10_or_sint; 133bf215546Sopenharmony_ci nir_ssa_def *r9g9b9e5; 134bf215546Sopenharmony_ci nir_ssa_def *bits1; 135bf215546Sopenharmony_ci nir_ssa_def *bits2; 136bf215546Sopenharmony_ci nir_ssa_def *bits3; 137bf215546Sopenharmony_ci nir_ssa_def *bits4; 138bf215546Sopenharmony_ci nir_ssa_def *swap; 139bf215546Sopenharmony_ci nir_ssa_def *bits; //vec4 140bf215546Sopenharmony_ci}; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci/* must be under 16bytes / sizeof(vec4) / 128 bits) */ 144bf215546Sopenharmony_cistruct pbo_data { 145bf215546Sopenharmony_ci union { 146bf215546Sopenharmony_ci struct { 147bf215546Sopenharmony_ci struct { 148bf215546Sopenharmony_ci uint16_t x, y; 149bf215546Sopenharmony_ci }; 150bf215546Sopenharmony_ci struct { 151bf215546Sopenharmony_ci uint16_t width, height; 152bf215546Sopenharmony_ci }; 153bf215546Sopenharmony_ci struct { 154bf215546Sopenharmony_ci uint16_t depth; 155bf215546Sopenharmony_ci uint8_t invert : 1; 156bf215546Sopenharmony_ci uint8_t blocksize : 7; 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci uint8_t clamp_uint : 1; 159bf215546Sopenharmony_ci uint8_t r11g11b10_or_sint : 1; 160bf215546Sopenharmony_ci uint8_t r9g9b9e5 : 1; 161bf215546Sopenharmony_ci uint8_t swap : 1; 162bf215546Sopenharmony_ci uint16_t alignment : 2; 163bf215546Sopenharmony_ci uint8_t dst_bit_size : 2; //8, 16, 32, 64 164bf215546Sopenharmony_ci }; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci struct { 167bf215546Sopenharmony_ci uint8_t channels : 2; 168bf215546Sopenharmony_ci uint8_t bits1 : 6; 169bf215546Sopenharmony_ci uint8_t normalized : 1; 170bf215546Sopenharmony_ci uint8_t integer : 1; 171bf215546Sopenharmony_ci uint8_t bits2 : 6; 172bf215546Sopenharmony_ci uint8_t bits3 : 6; 173bf215546Sopenharmony_ci uint8_t pad1 : 2; 174bf215546Sopenharmony_ci uint8_t bits4 : 6; 175bf215546Sopenharmony_ci uint8_t pad2 : 2; 176bf215546Sopenharmony_ci }; 177bf215546Sopenharmony_ci }; 178bf215546Sopenharmony_ci float vec[4]; 179bf215546Sopenharmony_ci }; 180bf215546Sopenharmony_ci}; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci#define STRUCT_OFFSET(name) (offsetof(struct pbo_data, name) * 8) 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci#define STRUCT_BLOCK(offset, ...) \ 186bf215546Sopenharmony_ci do { \ 187bf215546Sopenharmony_ci assert(offset % 8 == 0); \ 188bf215546Sopenharmony_ci nir_ssa_def *block##offset = nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, (offset), 1, 8)); \ 189bf215546Sopenharmony_ci __VA_ARGS__ \ 190bf215546Sopenharmony_ci } while (0) 191bf215546Sopenharmony_ci#define STRUCT_MEMBER(blockoffset, name, offset, size, op, clamp) \ 192bf215546Sopenharmony_ci do { \ 193bf215546Sopenharmony_ci assert(offset + size <= 8); \ 194bf215546Sopenharmony_ci nir_ssa_def *val = nir_iand_imm(b, block##blockoffset, u_bit_consecutive(offset, size)); \ 195bf215546Sopenharmony_ci if (offset) \ 196bf215546Sopenharmony_ci val = nir_ushr_imm(b, val, offset); \ 197bf215546Sopenharmony_ci sd->name = op; \ 198bf215546Sopenharmony_ci if (clamp) \ 199bf215546Sopenharmony_ci sd->name = nir_umin(b, sd->name, nir_imm_int(b, clamp)); \ 200bf215546Sopenharmony_ci } while (0) 201bf215546Sopenharmony_ci#define STRUCT_MEMBER_SHIFTED_2BIT(blockoffset, name, offset, shift, clamp) \ 202bf215546Sopenharmony_ci STRUCT_MEMBER(blockoffset, name, offset, 2, nir_ishl(b, nir_imm_int(b, shift), val), clamp) 203bf215546Sopenharmony_ci 204bf215546Sopenharmony_ci#define STRUCT_MEMBER_BOOL(blockoffset, name, offset) \ 205bf215546Sopenharmony_ci STRUCT_MEMBER(blockoffset, name, offset, 1, nir_ieq_imm(b, val, 1), 0) 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci/* this function extracts the conversion data from pbo_data using the 208bf215546Sopenharmony_ci * size annotations for each grouping. data is compacted into bitfields, 209bf215546Sopenharmony_ci * so bitwise operations must be used to "unpact" everything 210bf215546Sopenharmony_ci */ 211bf215546Sopenharmony_cistatic void 212bf215546Sopenharmony_ciinit_pbo_shader_data(nir_builder *b, struct pbo_shader_data *sd) 213bf215546Sopenharmony_ci{ 214bf215546Sopenharmony_ci nir_variable *ubo = nir_variable_create(b->shader, nir_var_uniform, glsl_uvec4_type(), "offset"); 215bf215546Sopenharmony_ci nir_ssa_def *ubo_load = nir_load_var(b, ubo); 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci sd->offset = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(x), 2, 16)), nir_imm_int(b, 65535)); 218bf215546Sopenharmony_ci sd->range = nir_umin(b, nir_u2u32(b, nir_extract_bits(b, &ubo_load, 1, STRUCT_OFFSET(width), 3, 16)), nir_imm_int(b, 65535)); 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci STRUCT_BLOCK(80, 221bf215546Sopenharmony_ci STRUCT_MEMBER_BOOL(80, invert, 0); 222bf215546Sopenharmony_ci STRUCT_MEMBER(80, blocksize, 1, 7, nir_iadd_imm(b, val, 1), 128); 223bf215546Sopenharmony_ci ); 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci STRUCT_BLOCK(88, 226bf215546Sopenharmony_ci STRUCT_MEMBER_BOOL(88, clamp_uint, 0); 227bf215546Sopenharmony_ci STRUCT_MEMBER_BOOL(88, r11g11b10_or_sint, 1); 228bf215546Sopenharmony_ci STRUCT_MEMBER_BOOL(88, r9g9b9e5, 2); 229bf215546Sopenharmony_ci STRUCT_MEMBER_BOOL(88, swap, 3); 230bf215546Sopenharmony_ci STRUCT_MEMBER_SHIFTED_2BIT(88, alignment, 4, 1, 8); 231bf215546Sopenharmony_ci STRUCT_MEMBER_SHIFTED_2BIT(88, dst_bit_size, 6, 8, 64); 232bf215546Sopenharmony_ci ); 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci STRUCT_BLOCK(96, 235bf215546Sopenharmony_ci STRUCT_MEMBER(96, channels, 0, 2, nir_iadd_imm(b, val, 1), 4); 236bf215546Sopenharmony_ci STRUCT_MEMBER(96, bits1, 2, 6, val, 32); 237bf215546Sopenharmony_ci ); 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci STRUCT_BLOCK(104, 240bf215546Sopenharmony_ci STRUCT_MEMBER_BOOL(104, normalized, 0); 241bf215546Sopenharmony_ci STRUCT_MEMBER_BOOL(104, integer, 1); 242bf215546Sopenharmony_ci STRUCT_MEMBER(104, bits2, 2, 6, val, 32); 243bf215546Sopenharmony_ci ); 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci STRUCT_BLOCK(112, 247bf215546Sopenharmony_ci STRUCT_MEMBER(112, bits3, 0, 6, val, 32); 248bf215546Sopenharmony_ci ); 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci STRUCT_BLOCK(120, 251bf215546Sopenharmony_ci STRUCT_MEMBER(120, bits4, 0, 6, val, 32); 252bf215546Sopenharmony_ci ); 253bf215546Sopenharmony_ci sd->bits = nir_vec4(b, sd->bits1, sd->bits2, sd->bits3, sd->bits4); 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci /* clamp swap in the shader to enable better optimizing */ 256bf215546Sopenharmony_ci /* TODO? 257bf215546Sopenharmony_ci sd->swap = nir_bcsel(b, nir_ior(b, 258bf215546Sopenharmony_ci nir_ieq_imm(b, sd->blocksize, 8), 259bf215546Sopenharmony_ci nir_bcsel(b, 260bf215546Sopenharmony_ci nir_ieq_imm(b, sd->bits1, 8), 261bf215546Sopenharmony_ci nir_bcsel(b, 262bf215546Sopenharmony_ci nir_uge(b, sd->channels, nir_imm_int(b, 2)), 263bf215546Sopenharmony_ci nir_bcsel(b, 264bf215546Sopenharmony_ci nir_uge(b, sd->channels, nir_imm_int(b, 3)), 265bf215546Sopenharmony_ci nir_bcsel(b, 266bf215546Sopenharmony_ci nir_ieq(b, sd->channels, nir_imm_int(b, 4)), 267bf215546Sopenharmony_ci nir_ball(b, nir_ieq(b, sd->bits, nir_imm_ivec4(b, 8, 8, 8, 8))), 268bf215546Sopenharmony_ci nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 7), nir_imm_ivec3(b, 8, 8, 8)))), 269bf215546Sopenharmony_ci nir_ball(b, nir_ieq(b, nir_channels(b, sd->bits, 3), nir_imm_ivec2(b, 8, 8)))), 270bf215546Sopenharmony_ci nir_imm_bool(b, 0)), 271bf215546Sopenharmony_ci nir_imm_bool(b, 0))), 272bf215546Sopenharmony_ci nir_imm_bool(b, 0), 273bf215546Sopenharmony_ci sd->swap); 274bf215546Sopenharmony_ci */ 275bf215546Sopenharmony_ci} 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_cistatic unsigned 278bf215546Sopenharmony_cifill_pbo_data(struct pbo_data *pd, enum pipe_format src_format, enum pipe_format dst_format, bool swap) 279bf215546Sopenharmony_ci{ 280bf215546Sopenharmony_ci unsigned bits[4] = {0}; 281bf215546Sopenharmony_ci bool weird_packed = false; 282bf215546Sopenharmony_ci const struct util_format_description *dst_desc = util_format_description(dst_format); 283bf215546Sopenharmony_ci bool is_8bit = true; 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci for (unsigned c = 0; c < 4; c++) { 286bf215546Sopenharmony_ci bits[c] = dst_desc->channel[c].size; 287bf215546Sopenharmony_ci if (c < dst_desc->nr_channels) { 288bf215546Sopenharmony_ci weird_packed |= bits[c] != bits[0] || bits[c] % 8 != 0; 289bf215546Sopenharmony_ci if (bits[c] != 8) 290bf215546Sopenharmony_ci is_8bit = false; 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci } 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci if (is_8bit || dst_desc->block.bits == 8) 295bf215546Sopenharmony_ci swap = false; 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_ci unsigned dst_bit_size = 0; 298bf215546Sopenharmony_ci if (weird_packed) { 299bf215546Sopenharmony_ci dst_bit_size = dst_desc->block.bits; 300bf215546Sopenharmony_ci } else { 301bf215546Sopenharmony_ci dst_bit_size = dst_desc->block.bits / dst_desc->nr_channels; 302bf215546Sopenharmony_ci } 303bf215546Sopenharmony_ci assert(dst_bit_size); 304bf215546Sopenharmony_ci assert(dst_bit_size <= 64); 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci pd->dst_bit_size = dst_bit_size >> 4; 307bf215546Sopenharmony_ci pd->channels = dst_desc->nr_channels - 1; 308bf215546Sopenharmony_ci pd->normalized = dst_desc->is_unorm || dst_desc->is_snorm; 309bf215546Sopenharmony_ci pd->clamp_uint = dst_desc->is_unorm || 310bf215546Sopenharmony_ci (util_format_is_pure_sint(dst_format) && 311bf215546Sopenharmony_ci !util_format_is_pure_sint(src_format) && 312bf215546Sopenharmony_ci !util_format_is_snorm(src_format)) || 313bf215546Sopenharmony_ci util_format_is_pure_uint(dst_format); 314bf215546Sopenharmony_ci pd->integer = util_format_is_pure_uint(dst_format) || util_format_is_pure_sint(dst_format); 315bf215546Sopenharmony_ci pd->r11g11b10_or_sint = dst_format == PIPE_FORMAT_R11G11B10_FLOAT || util_format_is_pure_sint(dst_format); 316bf215546Sopenharmony_ci pd->r9g9b9e5 = dst_format == PIPE_FORMAT_R9G9B9E5_FLOAT; 317bf215546Sopenharmony_ci pd->bits1 = bits[0]; 318bf215546Sopenharmony_ci pd->bits2 = bits[1]; 319bf215546Sopenharmony_ci pd->bits3 = bits[2]; 320bf215546Sopenharmony_ci pd->bits4 = bits[3]; 321bf215546Sopenharmony_ci pd->swap = swap; 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci return weird_packed ? 1 : dst_desc->nr_channels; 324bf215546Sopenharmony_ci} 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_cistatic nir_ssa_def * 327bf215546Sopenharmony_ciget_buffer_offset(nir_builder *b, nir_ssa_def *coord, struct pbo_shader_data *sd) 328bf215546Sopenharmony_ci{ 329bf215546Sopenharmony_ci/* from _mesa_image_offset(): 330bf215546Sopenharmony_ci offset = topOfImage 331bf215546Sopenharmony_ci + (skippixels + column) * bytes_per_pixel 332bf215546Sopenharmony_ci + (skiprows + row) * bytes_per_row 333bf215546Sopenharmony_ci + (skipimages + img) * bytes_per_image; 334bf215546Sopenharmony_ci */ 335bf215546Sopenharmony_ci nir_ssa_def *bytes_per_row = nir_imul(b, nir_channel(b, sd->range, 0), sd->blocksize); 336bf215546Sopenharmony_ci bytes_per_row = nir_bcsel(b, nir_ult(b, sd->alignment, nir_imm_int(b, 2)), 337bf215546Sopenharmony_ci bytes_per_row, 338bf215546Sopenharmony_ci nir_iand(b, 339bf215546Sopenharmony_ci nir_isub(b, nir_iadd(b, bytes_per_row, sd->alignment), nir_imm_int(b, 1)), 340bf215546Sopenharmony_ci nir_inot(b, nir_isub(b, sd->alignment, nir_imm_int(b, 1))))); 341bf215546Sopenharmony_ci nir_ssa_def *bytes_per_image = nir_imul(b, bytes_per_row, nir_channel(b, sd->range, 1)); 342bf215546Sopenharmony_ci bytes_per_row = nir_bcsel(b, sd->invert, 343bf215546Sopenharmony_ci nir_isub(b, nir_imm_int(b, 0), bytes_per_row), 344bf215546Sopenharmony_ci bytes_per_row); 345bf215546Sopenharmony_ci return nir_iadd(b, 346bf215546Sopenharmony_ci nir_imul(b, nir_channel(b, coord, 0), sd->blocksize), 347bf215546Sopenharmony_ci nir_iadd(b, 348bf215546Sopenharmony_ci nir_imul(b, nir_channel(b, coord, 1), bytes_per_row), 349bf215546Sopenharmony_ci nir_imul(b, nir_channel(b, coord, 2), bytes_per_image))); 350bf215546Sopenharmony_ci} 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_cistatic inline void 353bf215546Sopenharmony_ciwrite_ssbo(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset) 354bf215546Sopenharmony_ci{ 355bf215546Sopenharmony_ci nir_store_ssbo(b, pixel, nir_imm_zero(b, 1, 32), buffer_offset, 356bf215546Sopenharmony_ci .align_mul = pixel->bit_size / 8, 357bf215546Sopenharmony_ci .write_mask = (1 << pixel->num_components) - 1); 358bf215546Sopenharmony_ci} 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_cistatic void 361bf215546Sopenharmony_ciwrite_conversion(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd) 362bf215546Sopenharmony_ci{ 363bf215546Sopenharmony_ci nir_push_if(b, nir_ilt(b, sd->dst_bit_size, nir_imm_int(b, 32))); 364bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, sd->dst_bit_size, 16)); 365bf215546Sopenharmony_ci write_ssbo(b, nir_u2u16(b, pixel), buffer_offset); 366bf215546Sopenharmony_ci nir_push_else(b, NULL); 367bf215546Sopenharmony_ci write_ssbo(b, nir_u2u8(b, pixel), buffer_offset); 368bf215546Sopenharmony_ci nir_pop_if(b, NULL); 369bf215546Sopenharmony_ci nir_push_else(b, NULL); 370bf215546Sopenharmony_ci write_ssbo(b, pixel, buffer_offset); 371bf215546Sopenharmony_ci nir_pop_if(b, NULL); 372bf215546Sopenharmony_ci} 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_cistatic nir_ssa_def * 375bf215546Sopenharmony_ciswap2(nir_builder *b, nir_ssa_def *src) 376bf215546Sopenharmony_ci{ 377bf215546Sopenharmony_ci /* dst[i] = (src[i] >> 8) | ((src[i] << 8) & 0xff00); */ 378bf215546Sopenharmony_ci return nir_ior(b, 379bf215546Sopenharmony_ci nir_ushr_imm(b, src, 8), 380bf215546Sopenharmony_ci nir_iand_imm(b, nir_ishl(b, src, nir_imm_int(b, 8)), 0xff00)); 381bf215546Sopenharmony_ci} 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_cistatic nir_ssa_def * 384bf215546Sopenharmony_ciswap4(nir_builder *b, nir_ssa_def *src) 385bf215546Sopenharmony_ci{ 386bf215546Sopenharmony_ci /* a = (b >> 24) | ((b >> 8) & 0xff00) | ((b << 8) & 0xff0000) | ((b << 24) & 0xff000000); */ 387bf215546Sopenharmony_ci return nir_ior(b, 388bf215546Sopenharmony_ci /* (b >> 24) */ 389bf215546Sopenharmony_ci nir_ushr_imm(b, src, 24), 390bf215546Sopenharmony_ci nir_ior(b, 391bf215546Sopenharmony_ci /* ((b >> 8) & 0xff00) */ 392bf215546Sopenharmony_ci nir_iand(b, nir_ushr_imm(b, src, 8), nir_imm_int(b, 0xff00)), 393bf215546Sopenharmony_ci nir_ior(b, 394bf215546Sopenharmony_ci /* ((b << 8) & 0xff0000) */ 395bf215546Sopenharmony_ci nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 8)), nir_imm_int(b, 0xff0000)), 396bf215546Sopenharmony_ci /* ((b << 24) & 0xff000000) */ 397bf215546Sopenharmony_ci nir_iand(b, nir_ishl(b, src, nir_imm_int(b, 24)), nir_imm_int(b, 0xff000000))))); 398bf215546Sopenharmony_ci} 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci/* explode the cf to handle channel counts in the shader */ 401bf215546Sopenharmony_cistatic void 402bf215546Sopenharmony_cigrab_components(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, struct pbo_shader_data *sd, bool weird_packed) 403bf215546Sopenharmony_ci{ 404bf215546Sopenharmony_ci if (weird_packed) { 405bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32)); 406bf215546Sopenharmony_ci write_conversion(b, nir_channels(b, pixel, 3), buffer_offset, sd); 407bf215546Sopenharmony_ci nir_push_else(b, NULL); 408bf215546Sopenharmony_ci write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd); 409bf215546Sopenharmony_ci nir_pop_if(b, NULL); 410bf215546Sopenharmony_ci } else { 411bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, sd->channels, 1)); 412bf215546Sopenharmony_ci write_conversion(b, nir_channel(b, pixel, 0), buffer_offset, sd); 413bf215546Sopenharmony_ci nir_push_else(b, NULL); 414bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, sd->channels, 2)); 415bf215546Sopenharmony_ci write_conversion(b, nir_channels(b, pixel, (1 << 2) - 1), buffer_offset, sd); 416bf215546Sopenharmony_ci nir_push_else(b, NULL); 417bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, sd->channels, 3)); 418bf215546Sopenharmony_ci write_conversion(b, nir_channels(b, pixel, (1 << 3) - 1), buffer_offset, sd); 419bf215546Sopenharmony_ci nir_push_else(b, NULL); 420bf215546Sopenharmony_ci write_conversion(b, nir_channels(b, pixel, (1 << 4) - 1), buffer_offset, sd); 421bf215546Sopenharmony_ci nir_pop_if(b, NULL); 422bf215546Sopenharmony_ci nir_pop_if(b, NULL); 423bf215546Sopenharmony_ci nir_pop_if(b, NULL); 424bf215546Sopenharmony_ci } 425bf215546Sopenharmony_ci} 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci/* if byteswap is enabled, handle that and then write the components */ 428bf215546Sopenharmony_cistatic void 429bf215546Sopenharmony_cihandle_swap(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, 430bf215546Sopenharmony_ci struct pbo_shader_data *sd, unsigned num_components, bool weird_packed) 431bf215546Sopenharmony_ci{ 432bf215546Sopenharmony_ci nir_push_if(b, sd->swap); { 433bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, nir_udiv_imm(b, sd->blocksize, num_components), 2)); { 434bf215546Sopenharmony_ci /* this is a single high/low swap per component */ 435bf215546Sopenharmony_ci nir_ssa_def *components[4]; 436bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) 437bf215546Sopenharmony_ci components[i] = swap2(b, nir_channel(b, pixel, i)); 438bf215546Sopenharmony_ci nir_ssa_def *v = nir_vec(b, components, 4); 439bf215546Sopenharmony_ci grab_components(b, v, buffer_offset, sd, weird_packed); 440bf215546Sopenharmony_ci } nir_push_else(b, NULL); { 441bf215546Sopenharmony_ci /* this is a pair of high/low swaps for each half of the component */ 442bf215546Sopenharmony_ci nir_ssa_def *components[4]; 443bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) 444bf215546Sopenharmony_ci components[i] = swap4(b, nir_channel(b, pixel, i)); 445bf215546Sopenharmony_ci nir_ssa_def *v = nir_vec(b, components, 4); 446bf215546Sopenharmony_ci grab_components(b, v, buffer_offset, sd, weird_packed); 447bf215546Sopenharmony_ci } nir_pop_if(b, NULL); 448bf215546Sopenharmony_ci } nir_push_else(b, NULL); { 449bf215546Sopenharmony_ci /* swap disabled */ 450bf215546Sopenharmony_ci grab_components(b, pixel, buffer_offset, sd, weird_packed); 451bf215546Sopenharmony_ci } nir_pop_if(b, NULL); 452bf215546Sopenharmony_ci} 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_cistatic nir_ssa_def * 455bf215546Sopenharmony_cicheck_for_weird_packing(nir_builder *b, struct pbo_shader_data *sd, unsigned component) 456bf215546Sopenharmony_ci{ 457bf215546Sopenharmony_ci nir_ssa_def *c = nir_channel(b, sd->bits, component - 1); 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci return nir_bcsel(b, 460bf215546Sopenharmony_ci nir_ige(b, sd->channels, nir_imm_int(b, component)), 461bf215546Sopenharmony_ci nir_ior(b, 462bf215546Sopenharmony_ci nir_ine(b, c, sd->bits1), 463bf215546Sopenharmony_ci nir_ine(b, nir_imod(b, c, nir_imm_int(b, 8)), nir_imm_int(b, 0))), 464bf215546Sopenharmony_ci nir_imm_bool(b, 0)); 465bf215546Sopenharmony_ci} 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci/* convenience function for clamping signed integers */ 468bf215546Sopenharmony_cistatic inline nir_ssa_def * 469bf215546Sopenharmony_cinir_imin_imax(nir_builder *build, nir_ssa_def *src, nir_ssa_def *clamp_to_min, nir_ssa_def *clamp_to_max) 470bf215546Sopenharmony_ci{ 471bf215546Sopenharmony_ci return nir_imax(build, nir_imin(build, src, clamp_to_min), clamp_to_max); 472bf215546Sopenharmony_ci} 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_cistatic inline nir_ssa_def * 475bf215546Sopenharmony_cinir_format_float_to_unorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor) 476bf215546Sopenharmony_ci{ 477bf215546Sopenharmony_ci /* Clamp to the range [0, 1] */ 478bf215546Sopenharmony_ci f = nir_fsat(b, f); 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci return nir_f2u32(b, nir_fround_even(b, nir_fmul(b, f, factor))); 481bf215546Sopenharmony_ci} 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_cistatic inline nir_ssa_def * 484bf215546Sopenharmony_cinir_format_float_to_snorm_with_factor(nir_builder *b, nir_ssa_def *f, nir_ssa_def *factor) 485bf215546Sopenharmony_ci{ 486bf215546Sopenharmony_ci /* Clamp to the range [-1, 1] */ 487bf215546Sopenharmony_ci f = nir_fmin(b, nir_fmax(b, f, nir_imm_float(b, -1)), nir_imm_float(b, 1)); 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci return nir_f2i32(b, nir_fround_even(b, nir_fmul(b, f, factor))); 490bf215546Sopenharmony_ci} 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_cistatic nir_ssa_def * 493bf215546Sopenharmony_ciclamp_and_mask(nir_builder *b, nir_ssa_def *src, nir_ssa_def *channels) 494bf215546Sopenharmony_ci{ 495bf215546Sopenharmony_ci nir_ssa_def *one = nir_imm_ivec4(b, 1, 0, 0, 0); 496bf215546Sopenharmony_ci nir_ssa_def *two = nir_imm_ivec4(b, 1, 1, 0, 0); 497bf215546Sopenharmony_ci nir_ssa_def *three = nir_imm_ivec4(b, 1, 1, 1, 0); 498bf215546Sopenharmony_ci nir_ssa_def *four = nir_imm_ivec4(b, 1, 1, 1, 1); 499bf215546Sopenharmony_ci /* avoid underflow by clamping to channel count */ 500bf215546Sopenharmony_ci src = nir_bcsel(b, 501bf215546Sopenharmony_ci nir_ieq(b, channels, one), 502bf215546Sopenharmony_ci nir_isub(b, src, one), 503bf215546Sopenharmony_ci nir_bcsel(b, 504bf215546Sopenharmony_ci nir_ieq_imm(b, channels, 2), 505bf215546Sopenharmony_ci nir_isub(b, src, two), 506bf215546Sopenharmony_ci nir_bcsel(b, 507bf215546Sopenharmony_ci nir_ieq_imm(b, channels, 3), 508bf215546Sopenharmony_ci nir_isub(b, src, three), 509bf215546Sopenharmony_ci nir_isub(b, src, four)))); 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci return nir_mask(b, src, 32); 512bf215546Sopenharmony_ci} 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_cistatic void 515bf215546Sopenharmony_ciconvert_swap_write(nir_builder *b, nir_ssa_def *pixel, nir_ssa_def *buffer_offset, 516bf215546Sopenharmony_ci unsigned num_components, 517bf215546Sopenharmony_ci struct pbo_shader_data *sd) 518bf215546Sopenharmony_ci{ 519bf215546Sopenharmony_ci 520bf215546Sopenharmony_ci nir_ssa_def *weird_packed = nir_ior(b, 521bf215546Sopenharmony_ci nir_ior(b, 522bf215546Sopenharmony_ci check_for_weird_packing(b, sd, 4), 523bf215546Sopenharmony_ci check_for_weird_packing(b, sd, 3)), 524bf215546Sopenharmony_ci check_for_weird_packing(b, sd, 2)); 525bf215546Sopenharmony_ci if (num_components == 1) { 526bf215546Sopenharmony_ci nir_push_if(b, weird_packed); 527bf215546Sopenharmony_ci nir_push_if(b, sd->r11g11b10_or_sint); 528bf215546Sopenharmony_ci handle_swap(b, nir_pad_vec4(b, nir_format_pack_11f11f10f(b, pixel)), buffer_offset, sd, 1, true); 529bf215546Sopenharmony_ci nir_push_else(b, NULL); 530bf215546Sopenharmony_ci nir_push_if(b, sd->r9g9b9e5); 531bf215546Sopenharmony_ci handle_swap(b, nir_pad_vec4(b, nir_format_pack_r9g9b9e5(b, pixel)), buffer_offset, sd, 1, true); 532bf215546Sopenharmony_ci nir_push_else(b, NULL); 533bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, sd->bits1, 32)); { //PIPE_FORMAT_Z32_FLOAT_S8X24_UINT 534bf215546Sopenharmony_ci nir_ssa_def *pack[2]; 535bf215546Sopenharmony_ci pack[0] = nir_format_pack_uint_unmasked_ssa(b, nir_channel(b, pixel, 0), nir_channel(b, sd->bits, 0)); 536bf215546Sopenharmony_ci pack[1] = nir_format_pack_uint_unmasked_ssa(b, nir_channels(b, pixel, 6), nir_channels(b, sd->bits, 6)); 537bf215546Sopenharmony_ci handle_swap(b, nir_pad_vec4(b, nir_vec2(b, pack[0], pack[1])), buffer_offset, sd, 2, true); 538bf215546Sopenharmony_ci } nir_push_else(b, NULL); 539bf215546Sopenharmony_ci handle_swap(b, nir_pad_vec4(b, nir_format_pack_uint_unmasked_ssa(b, pixel, sd->bits)), buffer_offset, sd, 1, true); 540bf215546Sopenharmony_ci nir_pop_if(b, NULL); 541bf215546Sopenharmony_ci nir_pop_if(b, NULL); 542bf215546Sopenharmony_ci nir_pop_if(b, NULL); 543bf215546Sopenharmony_ci nir_push_else(b, NULL); 544bf215546Sopenharmony_ci handle_swap(b, pixel, buffer_offset, sd, num_components, false); 545bf215546Sopenharmony_ci nir_pop_if(b, NULL); 546bf215546Sopenharmony_ci } else { 547bf215546Sopenharmony_ci nir_push_if(b, weird_packed); 548bf215546Sopenharmony_ci handle_swap(b, pixel, buffer_offset, sd, num_components, true); 549bf215546Sopenharmony_ci nir_push_else(b, NULL); 550bf215546Sopenharmony_ci handle_swap(b, pixel, buffer_offset, sd, num_components, false); 551bf215546Sopenharmony_ci nir_pop_if(b, NULL); 552bf215546Sopenharmony_ci } 553bf215546Sopenharmony_ci} 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_cistatic void 556bf215546Sopenharmony_cido_shader_conversion(nir_builder *b, nir_ssa_def *pixel, 557bf215546Sopenharmony_ci unsigned num_components, 558bf215546Sopenharmony_ci nir_ssa_def *coord, struct pbo_shader_data *sd) 559bf215546Sopenharmony_ci{ 560bf215546Sopenharmony_ci nir_ssa_def *buffer_offset = get_buffer_offset(b, coord, sd); 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci nir_ssa_def *signed_bit_mask = clamp_and_mask(b, sd->bits, sd->channels); 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci#define CONVERT_SWAP_WRITE(PIXEL) \ 565bf215546Sopenharmony_ci convert_swap_write(b, PIXEL, buffer_offset, num_components, sd); 566bf215546Sopenharmony_ci nir_push_if(b, sd->normalized); 567bf215546Sopenharmony_ci nir_push_if(b, sd->clamp_uint); //unorm 568bf215546Sopenharmony_ci CONVERT_SWAP_WRITE(nir_format_float_to_unorm_with_factor(b, pixel, nir_u2f32(b, nir_mask(b, sd->bits, 32)))); 569bf215546Sopenharmony_ci nir_push_else(b, NULL); 570bf215546Sopenharmony_ci CONVERT_SWAP_WRITE(nir_format_float_to_snorm_with_factor(b, pixel, nir_u2f32(b, signed_bit_mask))); 571bf215546Sopenharmony_ci nir_pop_if(b, NULL); 572bf215546Sopenharmony_ci nir_push_else(b, NULL); 573bf215546Sopenharmony_ci nir_push_if(b, sd->integer); 574bf215546Sopenharmony_ci nir_push_if(b, sd->r11g11b10_or_sint); //sint 575bf215546Sopenharmony_ci nir_push_if(b, sd->clamp_uint); //uint -> sint 576bf215546Sopenharmony_ci CONVERT_SWAP_WRITE(nir_umin(b, pixel, signed_bit_mask)); 577bf215546Sopenharmony_ci nir_push_else(b, NULL); 578bf215546Sopenharmony_ci CONVERT_SWAP_WRITE(nir_imin_imax(b, pixel, signed_bit_mask, nir_isub(b, nir_ineg(b, signed_bit_mask), nir_imm_int(b, 1)))); 579bf215546Sopenharmony_ci nir_pop_if(b, NULL); 580bf215546Sopenharmony_ci nir_push_else(b, NULL); 581bf215546Sopenharmony_ci nir_push_if(b, sd->clamp_uint); //uint 582bf215546Sopenharmony_ci /* nir_format_clamp_uint */ 583bf215546Sopenharmony_ci CONVERT_SWAP_WRITE(nir_umin(b, pixel, nir_mask(b, sd->bits, 32))); 584bf215546Sopenharmony_ci nir_pop_if(b, NULL); 585bf215546Sopenharmony_ci nir_pop_if(b, NULL); 586bf215546Sopenharmony_ci nir_push_else(b, NULL); 587bf215546Sopenharmony_ci nir_push_if(b, nir_ieq_imm(b, sd->bits1, 16)); //half 588bf215546Sopenharmony_ci CONVERT_SWAP_WRITE(nir_format_float_to_half(b, pixel)); 589bf215546Sopenharmony_ci nir_push_else(b, NULL); 590bf215546Sopenharmony_ci CONVERT_SWAP_WRITE(pixel); 591bf215546Sopenharmony_ci nir_pop_if(b, NULL); 592bf215546Sopenharmony_ci nir_pop_if(b, NULL); 593bf215546Sopenharmony_ci} 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_cistatic void * 596bf215546Sopenharmony_cicreate_conversion_shader(struct st_context *st, enum pipe_texture_target target, unsigned num_components) 597bf215546Sopenharmony_ci{ 598bf215546Sopenharmony_ci const nir_shader_compiler_options *options = st_get_nir_compiler_options(st, MESA_SHADER_COMPUTE); 599bf215546Sopenharmony_ci nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, options, "%s", "convert"); 600bf215546Sopenharmony_ci b.shader->info.workgroup_size[0] = target != PIPE_TEXTURE_1D ? 8 : 64; 601bf215546Sopenharmony_ci b.shader->info.workgroup_size[1] = target != PIPE_TEXTURE_1D ? 8 : 1; 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci b.shader->info.workgroup_size[2] = 1; 604bf215546Sopenharmony_ci b.shader->info.textures_used[0] = 1; 605bf215546Sopenharmony_ci b.shader->info.num_ssbos = 1; 606bf215546Sopenharmony_ci b.shader->num_uniforms = 2; 607bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_mem_ssbo, glsl_array_type(glsl_float_type(), 0, 4), "ssbo"); 608bf215546Sopenharmony_ci nir_variable *sampler = nir_variable_create(b.shader, nir_var_uniform, st_pbo_sampler_type_for_target(target, ST_PBO_CONVERT_FLOAT), "sampler"); 609bf215546Sopenharmony_ci unsigned coord_components = glsl_get_sampler_coordinate_components(sampler->type); 610bf215546Sopenharmony_ci sampler->data.explicit_binding = 1; 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci struct pbo_shader_data sd; 613bf215546Sopenharmony_ci init_pbo_shader_data(&b, &sd); 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci nir_ssa_def *bsize = nir_imm_ivec4(&b, 616bf215546Sopenharmony_ci b.shader->info.workgroup_size[0], 617bf215546Sopenharmony_ci b.shader->info.workgroup_size[1], 618bf215546Sopenharmony_ci b.shader->info.workgroup_size[2], 619bf215546Sopenharmony_ci 0); 620bf215546Sopenharmony_ci nir_ssa_def *wid = nir_load_workgroup_id(&b, 32); 621bf215546Sopenharmony_ci nir_ssa_def *iid = nir_load_local_invocation_id(&b); 622bf215546Sopenharmony_ci nir_ssa_def *tile = nir_imul(&b, wid, bsize); 623bf215546Sopenharmony_ci nir_ssa_def *global_id = nir_iadd(&b, tile, iid); 624bf215546Sopenharmony_ci nir_ssa_def *start = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), sd.offset); 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci nir_ssa_def *coord; 627bf215546Sopenharmony_ci if (coord_components < 3) 628bf215546Sopenharmony_ci coord = start; 629bf215546Sopenharmony_ci else { 630bf215546Sopenharmony_ci /* pad offset vec with global_id to get correct z offset */ 631bf215546Sopenharmony_ci assert(coord_components == 3); 632bf215546Sopenharmony_ci coord = nir_vec3(&b, nir_channel(&b, start, 0), 633bf215546Sopenharmony_ci nir_channel(&b, start, 1), 634bf215546Sopenharmony_ci nir_channel(&b, global_id, 2)); 635bf215546Sopenharmony_ci } 636bf215546Sopenharmony_ci coord = nir_trim_vector(&b, coord, coord_components); 637bf215546Sopenharmony_ci nir_ssa_def *offset = coord_components > 2 ? 638bf215546Sopenharmony_ci nir_pad_vector_imm_int(&b, sd.offset, 0, 3) : 639bf215546Sopenharmony_ci nir_trim_vector(&b, sd.offset, coord_components); 640bf215546Sopenharmony_ci nir_ssa_def *range = nir_trim_vector(&b, sd.range, coord_components); 641bf215546Sopenharmony_ci nir_ssa_def *max = nir_iadd(&b, offset, range); 642bf215546Sopenharmony_ci nir_push_if(&b, nir_ball(&b, nir_ilt(&b, coord, max))); 643bf215546Sopenharmony_ci nir_tex_instr *txf = nir_tex_instr_create(b.shader, 3); 644bf215546Sopenharmony_ci txf->is_array = glsl_sampler_type_is_array(sampler->type); 645bf215546Sopenharmony_ci txf->op = nir_texop_txf; 646bf215546Sopenharmony_ci txf->sampler_dim = glsl_get_sampler_dim(sampler->type); 647bf215546Sopenharmony_ci txf->dest_type = nir_type_float32; 648bf215546Sopenharmony_ci txf->coord_components = coord_components; 649bf215546Sopenharmony_ci txf->texture_index = 0; 650bf215546Sopenharmony_ci txf->sampler_index = 0; 651bf215546Sopenharmony_ci txf->src[0].src_type = nir_tex_src_coord; 652bf215546Sopenharmony_ci txf->src[0].src = nir_src_for_ssa(coord); 653bf215546Sopenharmony_ci txf->src[1].src_type = nir_tex_src_lod; 654bf215546Sopenharmony_ci txf->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 655bf215546Sopenharmony_ci txf->src[2].src_type = nir_tex_src_texture_deref; 656bf215546Sopenharmony_ci nir_deref_instr *sampler_deref = nir_build_deref_var(&b, sampler); 657bf215546Sopenharmony_ci txf->src[2].src = nir_src_for_ssa(&sampler_deref->dest.ssa); 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci nir_ssa_dest_init(&txf->instr, &txf->dest, 4, 32, NULL); 660bf215546Sopenharmony_ci nir_builder_instr_insert(&b, &txf->instr); 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci /* pass the grid offset as the coord to get the zero-indexed buffer offset */ 663bf215546Sopenharmony_ci do_shader_conversion(&b, &txf->dest.ssa, num_components, global_id, &sd); 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 666bf215546Sopenharmony_ci 667bf215546Sopenharmony_ci nir_validate_shader(b.shader, NULL); 668bf215546Sopenharmony_ci gl_nir_opts(b.shader); 669bf215546Sopenharmony_ci return st_nir_finish_builtin_shader(st, b.shader); 670bf215546Sopenharmony_ci} 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_cistatic void 673bf215546Sopenharmony_ciinvert_swizzle(uint8_t *out, const uint8_t *in) 674bf215546Sopenharmony_ci{ 675bf215546Sopenharmony_ci /* First, default to all zeroes to prevent uninitialized junk */ 676bf215546Sopenharmony_ci for (unsigned c = 0; c < 4; ++c) 677bf215546Sopenharmony_ci out[c] = PIPE_SWIZZLE_0; 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci /* Now "do" what the swizzle says */ 680bf215546Sopenharmony_ci for (unsigned c = 0; c < 4; ++c) { 681bf215546Sopenharmony_ci unsigned char i = in[c]; 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci /* Who cares? */ 684bf215546Sopenharmony_ci assert(PIPE_SWIZZLE_X == 0); 685bf215546Sopenharmony_ci if (i > PIPE_SWIZZLE_W) 686bf215546Sopenharmony_ci continue; 687bf215546Sopenharmony_ci /* Invert */ 688bf215546Sopenharmony_ci unsigned idx = i - PIPE_SWIZZLE_X; 689bf215546Sopenharmony_ci out[idx] = PIPE_SWIZZLE_X + c; 690bf215546Sopenharmony_ci } 691bf215546Sopenharmony_ci} 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_cistatic uint32_t 694bf215546Sopenharmony_cicompute_shader_key(enum pipe_texture_target target, unsigned num_components) 695bf215546Sopenharmony_ci{ 696bf215546Sopenharmony_ci uint8_t key_target[] = { 697bf215546Sopenharmony_ci [PIPE_BUFFER] = UINT8_MAX, 698bf215546Sopenharmony_ci [PIPE_TEXTURE_1D] = 1, 699bf215546Sopenharmony_ci [PIPE_TEXTURE_2D] = 2, 700bf215546Sopenharmony_ci [PIPE_TEXTURE_3D] = 3, 701bf215546Sopenharmony_ci [PIPE_TEXTURE_CUBE] = 4, 702bf215546Sopenharmony_ci [PIPE_TEXTURE_RECT] = UINT8_MAX, 703bf215546Sopenharmony_ci [PIPE_TEXTURE_1D_ARRAY] = 5, 704bf215546Sopenharmony_ci [PIPE_TEXTURE_2D_ARRAY] = 6, 705bf215546Sopenharmony_ci [PIPE_TEXTURE_CUBE_ARRAY] = UINT8_MAX, 706bf215546Sopenharmony_ci }; 707bf215546Sopenharmony_ci assert(target < ARRAY_SIZE(key_target)); 708bf215546Sopenharmony_ci assert(key_target[target] != UINT8_MAX); 709bf215546Sopenharmony_ci return key_target[target] | (num_components << 3); 710bf215546Sopenharmony_ci} 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_cistatic unsigned 713bf215546Sopenharmony_ciget_dim_from_target(enum pipe_texture_target target) 714bf215546Sopenharmony_ci{ 715bf215546Sopenharmony_ci switch (target) { 716bf215546Sopenharmony_ci case PIPE_TEXTURE_1D: 717bf215546Sopenharmony_ci return 1; 718bf215546Sopenharmony_ci case PIPE_TEXTURE_2D_ARRAY: 719bf215546Sopenharmony_ci case PIPE_TEXTURE_3D: 720bf215546Sopenharmony_ci return 3; 721bf215546Sopenharmony_ci default: 722bf215546Sopenharmony_ci return 2; 723bf215546Sopenharmony_ci } 724bf215546Sopenharmony_ci} 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_cistatic enum pipe_texture_target 727bf215546Sopenharmony_ciget_target_from_texture(struct pipe_resource *src) 728bf215546Sopenharmony_ci{ 729bf215546Sopenharmony_ci enum pipe_texture_target view_target; 730bf215546Sopenharmony_ci switch (src->target) { 731bf215546Sopenharmony_ci case PIPE_TEXTURE_RECT: 732bf215546Sopenharmony_ci view_target = PIPE_TEXTURE_2D; 733bf215546Sopenharmony_ci break; 734bf215546Sopenharmony_ci case PIPE_TEXTURE_CUBE: 735bf215546Sopenharmony_ci case PIPE_TEXTURE_CUBE_ARRAY: 736bf215546Sopenharmony_ci view_target = PIPE_TEXTURE_2D_ARRAY; 737bf215546Sopenharmony_ci break; 738bf215546Sopenharmony_ci default: 739bf215546Sopenharmony_ci view_target = src->target; 740bf215546Sopenharmony_ci break; 741bf215546Sopenharmony_ci } 742bf215546Sopenharmony_ci return view_target; 743bf215546Sopenharmony_ci} 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_ci/* force swizzling behavior for sampling */ 746bf215546Sopenharmony_cienum swizzle_clamp { 747bf215546Sopenharmony_ci /* force component selection for named format */ 748bf215546Sopenharmony_ci SWIZZLE_CLAMP_LUMINANCE = 1, 749bf215546Sopenharmony_ci SWIZZLE_CLAMP_ALPHA = 2, 750bf215546Sopenharmony_ci SWIZZLE_CLAMP_LUMINANCE_ALPHA = 3, 751bf215546Sopenharmony_ci SWIZZLE_CLAMP_INTENSITY = 4, 752bf215546Sopenharmony_ci SWIZZLE_CLAMP_RGBX = 5, 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_ci /* select only 1 component */ 755bf215546Sopenharmony_ci SWIZZLE_CLAMP_GREEN = 8, 756bf215546Sopenharmony_ci SWIZZLE_CLAMP_BLUE = 16, 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci /* reverse ordering for format emulation */ 759bf215546Sopenharmony_ci SWIZZLE_CLAMP_BGRA = 32, 760bf215546Sopenharmony_ci}; 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_cistatic bool 763bf215546Sopenharmony_cican_copy_direct(const struct gl_pixelstore_attrib *pack) 764bf215546Sopenharmony_ci{ 765bf215546Sopenharmony_ci return !(pack->RowLength || 766bf215546Sopenharmony_ci pack->SkipPixels || 767bf215546Sopenharmony_ci pack->SkipRows || 768bf215546Sopenharmony_ci pack->ImageHeight || 769bf215546Sopenharmony_ci pack->SkipImages); 770bf215546Sopenharmony_ci} 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_cistatic struct pipe_resource * 773bf215546Sopenharmony_cidownload_texture_compute(struct st_context *st, 774bf215546Sopenharmony_ci const struct gl_pixelstore_attrib *pack, 775bf215546Sopenharmony_ci GLint xoffset, GLint yoffset, GLint zoffset, 776bf215546Sopenharmony_ci GLsizei width, GLsizei height, GLint depth, 777bf215546Sopenharmony_ci unsigned level, unsigned layer, 778bf215546Sopenharmony_ci GLenum format, GLenum type, 779bf215546Sopenharmony_ci enum pipe_format src_format, 780bf215546Sopenharmony_ci enum pipe_texture_target view_target, 781bf215546Sopenharmony_ci struct pipe_resource *src, 782bf215546Sopenharmony_ci enum pipe_format dst_format, 783bf215546Sopenharmony_ci enum swizzle_clamp swizzle_clamp) 784bf215546Sopenharmony_ci{ 785bf215546Sopenharmony_ci struct pipe_context *pipe = st->pipe; 786bf215546Sopenharmony_ci struct pipe_screen *screen = st->screen; 787bf215546Sopenharmony_ci struct pipe_resource *dst = NULL; 788bf215546Sopenharmony_ci unsigned dim = get_dim_from_target(view_target); 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_ci /* clamp 3d offsets based on slice */ 791bf215546Sopenharmony_ci if (view_target == PIPE_TEXTURE_3D) 792bf215546Sopenharmony_ci zoffset += layer; 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci unsigned num_components = 0; 795bf215546Sopenharmony_ci /* Upload constants */ 796bf215546Sopenharmony_ci { 797bf215546Sopenharmony_ci struct pipe_constant_buffer cb; 798bf215546Sopenharmony_ci assert(view_target != PIPE_TEXTURE_1D_ARRAY || !zoffset); 799bf215546Sopenharmony_ci struct pbo_data pd = { 800bf215546Sopenharmony_ci .x = xoffset, 801bf215546Sopenharmony_ci .y = view_target == PIPE_TEXTURE_1D_ARRAY ? 0 : yoffset, 802bf215546Sopenharmony_ci .width = width, .height = height, .depth = depth, 803bf215546Sopenharmony_ci .invert = pack->Invert, 804bf215546Sopenharmony_ci .blocksize = util_format_get_blocksize(dst_format) - 1, 805bf215546Sopenharmony_ci .alignment = ffs(MAX2(pack->Alignment, 1)) - 1, 806bf215546Sopenharmony_ci }; 807bf215546Sopenharmony_ci num_components = fill_pbo_data(&pd, src_format, dst_format, pack->SwapBytes == 1); 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_ci cb.buffer = NULL; 810bf215546Sopenharmony_ci cb.user_buffer = &pd; 811bf215546Sopenharmony_ci cb.buffer_offset = 0; 812bf215546Sopenharmony_ci cb.buffer_size = sizeof(pd); 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, false, &cb); 815bf215546Sopenharmony_ci } 816bf215546Sopenharmony_ci 817bf215546Sopenharmony_ci uint32_t hash_key = compute_shader_key(view_target, num_components); 818bf215546Sopenharmony_ci assert(hash_key != 0); 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci struct hash_entry *he = _mesa_hash_table_search(st->pbo.shaders, (void*)(uintptr_t)hash_key); 821bf215546Sopenharmony_ci void *cs; 822bf215546Sopenharmony_ci if (!he) { 823bf215546Sopenharmony_ci cs = create_conversion_shader(st, view_target, num_components); 824bf215546Sopenharmony_ci he = _mesa_hash_table_insert(st->pbo.shaders, (void*)(uintptr_t)hash_key, cs); 825bf215546Sopenharmony_ci } 826bf215546Sopenharmony_ci cs = he->data; 827bf215546Sopenharmony_ci assert(cs); 828bf215546Sopenharmony_ci struct cso_context *cso = st->cso_context; 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci cso_save_compute_state(cso, CSO_BIT_COMPUTE_SHADER | CSO_BIT_COMPUTE_SAMPLERS); 831bf215546Sopenharmony_ci cso_set_compute_shader_handle(cso, cs); 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci /* Set up the sampler_view */ 834bf215546Sopenharmony_ci { 835bf215546Sopenharmony_ci struct pipe_sampler_view templ; 836bf215546Sopenharmony_ci struct pipe_sampler_view *sampler_view; 837bf215546Sopenharmony_ci struct pipe_sampler_state sampler = {0}; 838bf215546Sopenharmony_ci sampler.normalized_coords = true; 839bf215546Sopenharmony_ci const struct pipe_sampler_state *samplers[1] = {&sampler}; 840bf215546Sopenharmony_ci const struct util_format_description *desc = util_format_description(dst_format); 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci u_sampler_view_default_template(&templ, src, src_format); 843bf215546Sopenharmony_ci if (util_format_is_depth_or_stencil(dst_format)) { 844bf215546Sopenharmony_ci templ.swizzle_r = PIPE_SWIZZLE_X; 845bf215546Sopenharmony_ci templ.swizzle_g = PIPE_SWIZZLE_X; 846bf215546Sopenharmony_ci templ.swizzle_b = PIPE_SWIZZLE_X; 847bf215546Sopenharmony_ci templ.swizzle_a = PIPE_SWIZZLE_X; 848bf215546Sopenharmony_ci } else { 849bf215546Sopenharmony_ci uint8_t invswizzle[4]; 850bf215546Sopenharmony_ci const uint8_t *swizzle; 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci /* these swizzle output bits require explicit component selection/ordering */ 853bf215546Sopenharmony_ci if (swizzle_clamp & SWIZZLE_CLAMP_GREEN) { 854bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) 855bf215546Sopenharmony_ci invswizzle[i] = PIPE_SWIZZLE_Y; 856bf215546Sopenharmony_ci } else if (swizzle_clamp & SWIZZLE_CLAMP_BLUE) { 857bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) 858bf215546Sopenharmony_ci invswizzle[i] = PIPE_SWIZZLE_Z; 859bf215546Sopenharmony_ci } else { 860bf215546Sopenharmony_ci if (swizzle_clamp & SWIZZLE_CLAMP_BGRA) { 861bf215546Sopenharmony_ci if (util_format_get_nr_components(dst_format) == 3) 862bf215546Sopenharmony_ci swizzle = util_format_description(PIPE_FORMAT_B8G8R8_UNORM)->swizzle; 863bf215546Sopenharmony_ci else 864bf215546Sopenharmony_ci swizzle = util_format_description(PIPE_FORMAT_B8G8R8A8_UNORM)->swizzle; 865bf215546Sopenharmony_ci } else { 866bf215546Sopenharmony_ci swizzle = desc->swizzle; 867bf215546Sopenharmony_ci } 868bf215546Sopenharmony_ci invert_swizzle(invswizzle, swizzle); 869bf215546Sopenharmony_ci } 870bf215546Sopenharmony_ci swizzle_clamp &= ~(SWIZZLE_CLAMP_BGRA | SWIZZLE_CLAMP_GREEN | SWIZZLE_CLAMP_BLUE); 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci /* these swizzle input modes clamp unused components to 0 and (sometimes) alpha to 1 */ 873bf215546Sopenharmony_ci switch (swizzle_clamp) { 874bf215546Sopenharmony_ci case SWIZZLE_CLAMP_LUMINANCE: 875bf215546Sopenharmony_ci if (util_format_is_luminance(dst_format)) 876bf215546Sopenharmony_ci break; 877bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 878bf215546Sopenharmony_ci if (invswizzle[i] != PIPE_SWIZZLE_X) 879bf215546Sopenharmony_ci invswizzle[i] = invswizzle[i] == PIPE_SWIZZLE_W ? PIPE_SWIZZLE_1 : PIPE_SWIZZLE_0; 880bf215546Sopenharmony_ci } 881bf215546Sopenharmony_ci break; 882bf215546Sopenharmony_ci case SWIZZLE_CLAMP_ALPHA: 883bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 884bf215546Sopenharmony_ci if (invswizzle[i] != PIPE_SWIZZLE_W) 885bf215546Sopenharmony_ci invswizzle[i] = PIPE_SWIZZLE_0; 886bf215546Sopenharmony_ci } 887bf215546Sopenharmony_ci break; 888bf215546Sopenharmony_ci case SWIZZLE_CLAMP_LUMINANCE_ALPHA: 889bf215546Sopenharmony_ci if (util_format_is_luminance_alpha(dst_format)) 890bf215546Sopenharmony_ci break; 891bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 892bf215546Sopenharmony_ci if (invswizzle[i] != PIPE_SWIZZLE_X && invswizzle[i] != PIPE_SWIZZLE_W) 893bf215546Sopenharmony_ci invswizzle[i] = PIPE_SWIZZLE_0; 894bf215546Sopenharmony_ci } 895bf215546Sopenharmony_ci break; 896bf215546Sopenharmony_ci case SWIZZLE_CLAMP_INTENSITY: 897bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 898bf215546Sopenharmony_ci if (invswizzle[i] == PIPE_SWIZZLE_W) 899bf215546Sopenharmony_ci invswizzle[i] = PIPE_SWIZZLE_1; 900bf215546Sopenharmony_ci else if (invswizzle[i] != PIPE_SWIZZLE_X) 901bf215546Sopenharmony_ci invswizzle[i] = PIPE_SWIZZLE_0; 902bf215546Sopenharmony_ci } 903bf215546Sopenharmony_ci break; 904bf215546Sopenharmony_ci case SWIZZLE_CLAMP_RGBX: 905bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 906bf215546Sopenharmony_ci if (invswizzle[i] == PIPE_SWIZZLE_W) 907bf215546Sopenharmony_ci invswizzle[i] = PIPE_SWIZZLE_1; 908bf215546Sopenharmony_ci } 909bf215546Sopenharmony_ci break; 910bf215546Sopenharmony_ci default: break; 911bf215546Sopenharmony_ci } 912bf215546Sopenharmony_ci templ.swizzle_r = invswizzle[0]; 913bf215546Sopenharmony_ci templ.swizzle_g = invswizzle[1]; 914bf215546Sopenharmony_ci templ.swizzle_b = invswizzle[2]; 915bf215546Sopenharmony_ci templ.swizzle_a = invswizzle[3]; 916bf215546Sopenharmony_ci } 917bf215546Sopenharmony_ci templ.target = view_target; 918bf215546Sopenharmony_ci templ.u.tex.first_level = level; 919bf215546Sopenharmony_ci templ.u.tex.last_level = level; 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci /* array textures expect to have array index provided */ 922bf215546Sopenharmony_ci if (view_target != PIPE_TEXTURE_3D && src->array_size) { 923bf215546Sopenharmony_ci templ.u.tex.first_layer = layer; 924bf215546Sopenharmony_ci if (view_target == PIPE_TEXTURE_1D_ARRAY) { 925bf215546Sopenharmony_ci templ.u.tex.first_layer += yoffset; 926bf215546Sopenharmony_ci templ.u.tex.last_layer = templ.u.tex.first_layer + height - 1; 927bf215546Sopenharmony_ci } else { 928bf215546Sopenharmony_ci templ.u.tex.first_layer += zoffset; 929bf215546Sopenharmony_ci templ.u.tex.last_layer = templ.u.tex.first_layer + depth - 1; 930bf215546Sopenharmony_ci } 931bf215546Sopenharmony_ci } 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci sampler_view = pipe->create_sampler_view(pipe, src, &templ); 934bf215546Sopenharmony_ci if (sampler_view == NULL) 935bf215546Sopenharmony_ci goto fail; 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 1, 0, false, 938bf215546Sopenharmony_ci &sampler_view); 939bf215546Sopenharmony_ci st->state.num_sampler_views[PIPE_SHADER_COMPUTE] = 940bf215546Sopenharmony_ci MAX2(st->state.num_sampler_views[PIPE_SHADER_COMPUTE], 1); 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci pipe_sampler_view_reference(&sampler_view, NULL); 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci cso_set_samplers(cso, PIPE_SHADER_COMPUTE, 1, samplers); 945bf215546Sopenharmony_ci } 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci /* Set up destination buffer */ 948bf215546Sopenharmony_ci unsigned img_stride = src->target == PIPE_TEXTURE_3D || 949bf215546Sopenharmony_ci src->target == PIPE_TEXTURE_2D_ARRAY || 950bf215546Sopenharmony_ci src->target == PIPE_TEXTURE_CUBE_ARRAY ? 951bf215546Sopenharmony_ci /* only use image stride for 3d images to avoid pulling in IMAGE_HEIGHT pixelstore */ 952bf215546Sopenharmony_ci _mesa_image_image_stride(pack, width, height, format, type) : 953bf215546Sopenharmony_ci _mesa_image_row_stride(pack, width, format, type) * height; 954bf215546Sopenharmony_ci unsigned buffer_size = (depth + (dim == 3 ? pack->SkipImages : 0)) * img_stride; 955bf215546Sopenharmony_ci { 956bf215546Sopenharmony_ci struct pipe_shader_buffer buffer; 957bf215546Sopenharmony_ci memset(&buffer, 0, sizeof(buffer)); 958bf215546Sopenharmony_ci if (can_copy_direct(pack) && pack->BufferObj) { 959bf215546Sopenharmony_ci dst = pack->BufferObj->buffer; 960bf215546Sopenharmony_ci assert(pack->BufferObj->Size >= buffer_size); 961bf215546Sopenharmony_ci } else { 962bf215546Sopenharmony_ci dst = pipe_buffer_create(screen, PIPE_BIND_SHADER_BUFFER, PIPE_USAGE_STAGING, buffer_size); 963bf215546Sopenharmony_ci if (!dst) 964bf215546Sopenharmony_ci goto fail; 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci buffer.buffer = dst; 967bf215546Sopenharmony_ci buffer.buffer_size = buffer_size; 968bf215546Sopenharmony_ci 969bf215546Sopenharmony_ci pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, &buffer, 0x1); 970bf215546Sopenharmony_ci } 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci struct pipe_grid_info info = { 0 }; 973bf215546Sopenharmony_ci info.block[0] = src->target != PIPE_TEXTURE_1D ? 8 : 64; 974bf215546Sopenharmony_ci info.block[1] = src->target != PIPE_TEXTURE_1D ? 8 : 1; 975bf215546Sopenharmony_ci info.last_block[0] = width % info.block[0]; 976bf215546Sopenharmony_ci info.last_block[1] = height % info.block[1]; 977bf215546Sopenharmony_ci info.block[2] = 1; 978bf215546Sopenharmony_ci info.grid[0] = DIV_ROUND_UP(width, info.block[0]); 979bf215546Sopenharmony_ci info.grid[1] = DIV_ROUND_UP(height, info.block[1]); 980bf215546Sopenharmony_ci info.grid[2] = depth; 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci pipe->launch_grid(pipe, &info); 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_cifail: 985bf215546Sopenharmony_ci cso_restore_compute_state(cso); 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_ci /* Unbind all because st/mesa won't do it if the current shader doesn't 988bf215546Sopenharmony_ci * use them. 989bf215546Sopenharmony_ci */ 990bf215546Sopenharmony_ci pipe->set_sampler_views(pipe, PIPE_SHADER_COMPUTE, 0, 0, 991bf215546Sopenharmony_ci st->state.num_sampler_views[PIPE_SHADER_COMPUTE], 992bf215546Sopenharmony_ci false, NULL); 993bf215546Sopenharmony_ci st->state.num_sampler_views[PIPE_SHADER_COMPUTE] = 0; 994bf215546Sopenharmony_ci pipe->set_shader_buffers(pipe, PIPE_SHADER_COMPUTE, 0, 1, NULL, 0); 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci st->dirty |= ST_NEW_CS_CONSTANTS | 997bf215546Sopenharmony_ci ST_NEW_CS_SSBOS | 998bf215546Sopenharmony_ci ST_NEW_CS_SAMPLER_VIEWS; 999bf215546Sopenharmony_ci 1000bf215546Sopenharmony_ci return dst; 1001bf215546Sopenharmony_ci} 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_cistatic void 1004bf215546Sopenharmony_cicopy_converted_buffer(struct gl_context * ctx, 1005bf215546Sopenharmony_ci struct gl_pixelstore_attrib *pack, 1006bf215546Sopenharmony_ci enum pipe_texture_target view_target, 1007bf215546Sopenharmony_ci struct pipe_resource *dst, enum pipe_format dst_format, 1008bf215546Sopenharmony_ci GLint xoffset, GLint yoffset, GLint zoffset, 1009bf215546Sopenharmony_ci GLsizei width, GLsizei height, GLint depth, 1010bf215546Sopenharmony_ci GLenum format, GLenum type, void *pixels) 1011bf215546Sopenharmony_ci{ 1012bf215546Sopenharmony_ci struct pipe_transfer *xfer; 1013bf215546Sopenharmony_ci struct st_context *st = st_context(ctx); 1014bf215546Sopenharmony_ci unsigned dim = get_dim_from_target(view_target); 1015bf215546Sopenharmony_ci uint8_t *map = pipe_buffer_map(st->pipe, dst, PIPE_MAP_READ | PIPE_MAP_ONCE, &xfer); 1016bf215546Sopenharmony_ci if (!map) 1017bf215546Sopenharmony_ci return; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci pixels = _mesa_map_pbo_dest(ctx, pack, pixels); 1020bf215546Sopenharmony_ci /* compute shader doesn't handle these to cut down on uniform size */ 1021bf215546Sopenharmony_ci if (!can_copy_direct(pack)) { 1022bf215546Sopenharmony_ci if (view_target == PIPE_TEXTURE_1D_ARRAY) { 1023bf215546Sopenharmony_ci depth = height; 1024bf215546Sopenharmony_ci height = 1; 1025bf215546Sopenharmony_ci zoffset = yoffset; 1026bf215546Sopenharmony_ci yoffset = 0; 1027bf215546Sopenharmony_ci } 1028bf215546Sopenharmony_ci struct gl_pixelstore_attrib packing = *pack; 1029bf215546Sopenharmony_ci memset(&packing.RowLength, 0, offsetof(struct gl_pixelstore_attrib, SwapBytes) - offsetof(struct gl_pixelstore_attrib, RowLength)); 1030bf215546Sopenharmony_ci for (unsigned z = 0; z < depth; z++) { 1031bf215546Sopenharmony_ci for (unsigned y = 0; y < height; y++) { 1032bf215546Sopenharmony_ci GLubyte *dst = _mesa_image_address(dim, pack, pixels, 1033bf215546Sopenharmony_ci width, height, format, type, 1034bf215546Sopenharmony_ci z, y, 0); 1035bf215546Sopenharmony_ci GLubyte *srcpx = _mesa_image_address(dim, &packing, map, 1036bf215546Sopenharmony_ci width, height, format, type, 1037bf215546Sopenharmony_ci z, y, 0); 1038bf215546Sopenharmony_ci util_streaming_load_memcpy(dst, srcpx, util_format_get_stride(dst_format, width)); 1039bf215546Sopenharmony_ci } 1040bf215546Sopenharmony_ci } 1041bf215546Sopenharmony_ci } else { 1042bf215546Sopenharmony_ci /* direct copy for all other cases */ 1043bf215546Sopenharmony_ci util_streaming_load_memcpy(pixels, map, dst->width0); 1044bf215546Sopenharmony_ci } 1045bf215546Sopenharmony_ci 1046bf215546Sopenharmony_ci _mesa_unmap_pbo_dest(ctx, pack); 1047bf215546Sopenharmony_ci pipe_buffer_unmap(st->pipe, xfer); 1048bf215546Sopenharmony_ci} 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_cibool 1051bf215546Sopenharmony_cist_GetTexSubImage_shader(struct gl_context * ctx, 1052bf215546Sopenharmony_ci GLint xoffset, GLint yoffset, GLint zoffset, 1053bf215546Sopenharmony_ci GLsizei width, GLsizei height, GLint depth, 1054bf215546Sopenharmony_ci GLenum format, GLenum type, void * pixels, 1055bf215546Sopenharmony_ci struct gl_texture_image *texImage) 1056bf215546Sopenharmony_ci{ 1057bf215546Sopenharmony_ci struct st_context *st = st_context(ctx); 1058bf215546Sopenharmony_ci struct pipe_screen *screen = st->screen; 1059bf215546Sopenharmony_ci struct gl_texture_object *stObj = texImage->TexObject; 1060bf215546Sopenharmony_ci struct pipe_resource *src = texImage->pt; 1061bf215546Sopenharmony_ci struct pipe_resource *dst = NULL; 1062bf215546Sopenharmony_ci enum pipe_format dst_format, src_format; 1063bf215546Sopenharmony_ci unsigned level = (texImage->pt != stObj->pt ? 0 : texImage->Level) + texImage->TexObject->Attrib.MinLevel; 1064bf215546Sopenharmony_ci unsigned layer = texImage->Face + texImage->TexObject->Attrib.MinLayer; 1065bf215546Sopenharmony_ci enum pipe_texture_target view_target; 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_ci assert(!_mesa_is_format_etc2(texImage->TexFormat) && 1068bf215546Sopenharmony_ci !_mesa_is_format_astc_2d(texImage->TexFormat) && 1069bf215546Sopenharmony_ci texImage->TexFormat != MESA_FORMAT_ETC1_RGB8); 1070bf215546Sopenharmony_ci 1071bf215546Sopenharmony_ci /* See if the texture format already matches the format and type, 1072bf215546Sopenharmony_ci * in which case the memcpy-based fast path will be used. */ 1073bf215546Sopenharmony_ci if (_mesa_format_matches_format_and_type(texImage->TexFormat, format, 1074bf215546Sopenharmony_ci type, ctx->Pack.SwapBytes, NULL)) { 1075bf215546Sopenharmony_ci return false; 1076bf215546Sopenharmony_ci } 1077bf215546Sopenharmony_ci enum swizzle_clamp swizzle_clamp = 0; 1078bf215546Sopenharmony_ci src_format = st_pbo_get_src_format(screen, stObj->surface_based ? stObj->surface_format : src->format, src); 1079bf215546Sopenharmony_ci if (src_format == PIPE_FORMAT_NONE) 1080bf215546Sopenharmony_ci return false; 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci if (texImage->_BaseFormat != _mesa_get_format_base_format(texImage->TexFormat)) { 1083bf215546Sopenharmony_ci /* special handling for drivers that don't support these formats natively */ 1084bf215546Sopenharmony_ci if (texImage->_BaseFormat == GL_LUMINANCE) 1085bf215546Sopenharmony_ci swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE; 1086bf215546Sopenharmony_ci else if (texImage->_BaseFormat == GL_LUMINANCE_ALPHA) 1087bf215546Sopenharmony_ci swizzle_clamp = SWIZZLE_CLAMP_LUMINANCE_ALPHA; 1088bf215546Sopenharmony_ci else if (texImage->_BaseFormat == GL_ALPHA) 1089bf215546Sopenharmony_ci swizzle_clamp = SWIZZLE_CLAMP_ALPHA; 1090bf215546Sopenharmony_ci else if (texImage->_BaseFormat == GL_INTENSITY) 1091bf215546Sopenharmony_ci swizzle_clamp = SWIZZLE_CLAMP_INTENSITY; 1092bf215546Sopenharmony_ci else if (texImage->_BaseFormat == GL_RGB) 1093bf215546Sopenharmony_ci swizzle_clamp = SWIZZLE_CLAMP_RGBX; 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci dst_format = st_pbo_get_dst_format(ctx, PIPE_BUFFER, src_format, false, format, type, 0); 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci if (dst_format == PIPE_FORMAT_NONE) { 1099bf215546Sopenharmony_ci bool need_bgra_swizzle = false; 1100bf215546Sopenharmony_ci dst_format = get_convert_format(ctx, src_format, format, type, &need_bgra_swizzle); 1101bf215546Sopenharmony_ci if (dst_format == PIPE_FORMAT_NONE) 1102bf215546Sopenharmony_ci return false; 1103bf215546Sopenharmony_ci /* special swizzling for component selection */ 1104bf215546Sopenharmony_ci if (need_bgra_swizzle) 1105bf215546Sopenharmony_ci swizzle_clamp |= SWIZZLE_CLAMP_BGRA; 1106bf215546Sopenharmony_ci else if (format == GL_GREEN_INTEGER) 1107bf215546Sopenharmony_ci swizzle_clamp |= SWIZZLE_CLAMP_GREEN; 1108bf215546Sopenharmony_ci else if (format == GL_BLUE_INTEGER) 1109bf215546Sopenharmony_ci swizzle_clamp |= SWIZZLE_CLAMP_BLUE; 1110bf215546Sopenharmony_ci } 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci /* check with the driver to see if memcpy is likely to be faster */ 1113bf215546Sopenharmony_ci if (!screen->is_compute_copy_faster(screen, src_format, dst_format, width, height, depth, true)) 1114bf215546Sopenharmony_ci return false; 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci view_target = get_target_from_texture(src); 1117bf215546Sopenharmony_ci /* I don't know why this works 1118bf215546Sopenharmony_ci * only for the texture rects 1119bf215546Sopenharmony_ci * but that's how it is 1120bf215546Sopenharmony_ci */ 1121bf215546Sopenharmony_ci if ((src->target != PIPE_TEXTURE_RECT && 1122bf215546Sopenharmony_ci /* this would need multiple samplerviews */ 1123bf215546Sopenharmony_ci ((util_format_is_depth_and_stencil(src_format) && util_format_is_depth_and_stencil(dst_format)) || 1124bf215546Sopenharmony_ci /* these format just doesn't work and science can't explain why */ 1125bf215546Sopenharmony_ci dst_format == PIPE_FORMAT_Z32_FLOAT)) || 1126bf215546Sopenharmony_ci /* L8 -> L32_FLOAT is another thinker */ 1127bf215546Sopenharmony_ci (!util_format_is_float(src_format) && dst_format == PIPE_FORMAT_L32_FLOAT)) 1128bf215546Sopenharmony_ci return false; 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci dst = download_texture_compute(st, &ctx->Pack, xoffset, yoffset, zoffset, width, height, depth, 1131bf215546Sopenharmony_ci level, layer, format, type, src_format, view_target, src, dst_format, 1132bf215546Sopenharmony_ci swizzle_clamp); 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_ci if (!can_copy_direct(&ctx->Pack) || !ctx->Pack.BufferObj) { 1135bf215546Sopenharmony_ci copy_converted_buffer(ctx, &ctx->Pack, view_target, dst, dst_format, xoffset, yoffset, zoffset, 1136bf215546Sopenharmony_ci width, height, depth, format, type, pixels); 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci pipe_resource_reference(&dst, NULL); 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci return true; 1142bf215546Sopenharmony_ci} 1143bf215546Sopenharmony_ci 1144