1/* 2 * Copyright © 2021 Google 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26 27#include "nir/nir_builder.h" 28#include "radv_meta.h" 29#include "radv_private.h" 30#include "sid.h" 31#include "vk_format.h" 32 33/* Based on 34 * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/etc2.comp 35 * https://github.com/Themaister/Granite/blob/master/assets/shaders/decode/eac.comp 36 * 37 * With some differences: 38 * - Use the vk format to do all the settings. 39 * - Combine the ETC2 and EAC shaders. 40 * - Since we combined the above, reuse the function for the ETC2 A8 component. 41 * - the EAC shader doesn't do SNORM correctly, so this has that fixed. 42 */ 43 44static nir_ssa_def * 45flip_endian(nir_builder *b, nir_ssa_def *src, unsigned cnt) 46{ 47 nir_ssa_def *v[2]; 48 for (unsigned i = 0; i < cnt; ++i) { 49 nir_ssa_def *intermediate[4]; 50 nir_ssa_def *chan = cnt == 1 ? src : nir_channel(b, src, i); 51 for (unsigned j = 0; j < 4; ++j) 52 intermediate[j] = nir_ubfe_imm(b, chan, 8 * j, 8); 53 v[i] = nir_ior( 54 b, nir_ior(b, nir_ishl_imm(b, intermediate[0], 24), nir_ishl_imm(b, intermediate[1], 16)), 55 nir_ior(b, nir_ishl_imm(b, intermediate[2], 8), nir_ishl_imm(b, intermediate[3], 0))); 56 } 57 return cnt == 1 ? v[0] : nir_vec(b, v, cnt); 58} 59 60static nir_ssa_def * 61etc1_color_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 62{ 63 const unsigned table[8][2] = {{2, 8}, {5, 17}, {9, 29}, {13, 42}, 64 {18, 60}, {24, 80}, {33, 106}, {47, 183}}; 65 nir_ssa_def *upper = nir_ieq_imm(b, y, 1); 66 nir_ssa_def *result = NULL; 67 for (unsigned i = 0; i < 8; ++i) { 68 nir_ssa_def *tmp = 69 nir_bcsel(b, upper, nir_imm_int(b, table[i][1]), nir_imm_int(b, table[i][0])); 70 if (result) 71 result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result); 72 else 73 result = tmp; 74 } 75 return result; 76} 77 78static nir_ssa_def * 79etc2_distance_lookup(nir_builder *b, nir_ssa_def *x) 80{ 81 const unsigned table[8] = {3, 6, 11, 16, 23, 32, 41, 64}; 82 nir_ssa_def *result = NULL; 83 for (unsigned i = 0; i < 8; ++i) { 84 if (result) 85 result = nir_bcsel(b, nir_ieq_imm(b, x, i), nir_imm_int(b, table[i]), result); 86 else 87 result = nir_imm_int(b, table[i]); 88 } 89 return result; 90} 91 92static nir_ssa_def * 93etc1_alpha_modifier_lookup(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y) 94{ 95 const unsigned table[16] = {0xe852, 0xc962, 0xc741, 0xc531, 0xb752, 0xa862, 0xa763, 0xa742, 96 0x9751, 0x9741, 0x9731, 0x9641, 0x9632, 0x9210, 0x8753, 0x8642}; 97 nir_ssa_def *result = NULL; 98 for (unsigned i = 0; i < 16; ++i) { 99 nir_ssa_def *tmp = nir_imm_int(b, table[i]); 100 if (result) 101 result = nir_bcsel(b, nir_ieq_imm(b, x, i), tmp, result); 102 else 103 result = tmp; 104 } 105 return nir_ubfe(b, result, nir_imul_imm(b, y, 4), nir_imm_int(b, 4)); 106} 107 108static nir_ssa_def * 109etc_extend(nir_builder *b, nir_ssa_def *v, int bits) 110{ 111 if (bits == 4) 112 return nir_imul_imm(b, v, 0x11); 113 return nir_ior(b, nir_ishl_imm(b, v, 8 - bits), nir_ushr_imm(b, v, bits - (8 - bits))); 114} 115 116static nir_ssa_def * 117decode_etc2_alpha(struct nir_builder *b, nir_ssa_def *alpha_payload, nir_ssa_def *linear_pixel, 118 bool eac, nir_ssa_def *is_signed) 119{ 120 alpha_payload = flip_endian(b, alpha_payload, 2); 121 nir_ssa_def *alpha_x = nir_channel(b, alpha_payload, 1); 122 nir_ssa_def *alpha_y = nir_channel(b, alpha_payload, 0); 123 nir_ssa_def *bit_offset = nir_isub_imm(b, 45, nir_imul_imm(b, linear_pixel, 3)); 124 nir_ssa_def *base = nir_ubfe_imm(b, alpha_y, 24, 8); 125 nir_ssa_def *multiplier = nir_ubfe_imm(b, alpha_y, 20, 4); 126 nir_ssa_def *table = nir_ubfe_imm(b, alpha_y, 16, 4); 127 128 if (eac) { 129 nir_ssa_def *signed_base = nir_ibfe_imm(b, alpha_y, 24, 8); 130 signed_base = nir_imul_imm(b, signed_base, 8); 131 base = nir_iadd_imm(b, nir_imul_imm(b, base, 8), 4); 132 base = nir_bcsel(b, is_signed, signed_base, base); 133 multiplier = nir_imax(b, nir_imul_imm(b, multiplier, 8), nir_imm_int(b, 1)); 134 } 135 136 nir_ssa_def *lsb_index = 137 nir_ubfe(b, nir_bcsel(b, nir_uge(b, bit_offset, nir_imm_int(b, 32)), alpha_y, alpha_x), 138 nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 2)); 139 bit_offset = nir_iadd_imm(b, bit_offset, 2); 140 nir_ssa_def *msb = 141 nir_ubfe(b, nir_bcsel(b, nir_uge(b, bit_offset, nir_imm_int(b, 32)), alpha_y, alpha_x), 142 nir_iand_imm(b, bit_offset, 31), nir_imm_int(b, 1)); 143 nir_ssa_def *mod = 144 nir_ixor(b, etc1_alpha_modifier_lookup(b, table, lsb_index), nir_iadd_imm(b, msb, -1)); 145 nir_ssa_def *a = nir_iadd(b, base, nir_imul(b, mod, multiplier)); 146 147 nir_ssa_def *low_bound = nir_imm_int(b, 0); 148 nir_ssa_def *high_bound = nir_imm_int(b, 255); 149 nir_ssa_def *final_mult = nir_imm_float(b, 1 / 255.0); 150 if (eac) { 151 low_bound = nir_bcsel(b, is_signed, nir_imm_int(b, -1023), low_bound); 152 high_bound = nir_bcsel(b, is_signed, nir_imm_int(b, 1023), nir_imm_int(b, 2047)); 153 final_mult = 154 nir_bcsel(b, is_signed, nir_imm_float(b, 1 / 1023.0), nir_imm_float(b, 1 / 2047.0)); 155 } 156 157 return nir_fmul(b, nir_i2f32(b, nir_iclamp(b, a, low_bound, high_bound)), final_mult); 158} 159 160static nir_shader * 161build_shader(struct radv_device *dev) 162{ 163 const struct glsl_type *sampler_type_2d = 164 glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, true, GLSL_TYPE_FLOAT); 165 const struct glsl_type *sampler_type_3d = 166 glsl_sampler_type(GLSL_SAMPLER_DIM_3D, false, false, GLSL_TYPE_FLOAT); 167 const struct glsl_type *img_type_2d = 168 glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT); 169 const struct glsl_type *img_type_3d = 170 glsl_image_type(GLSL_SAMPLER_DIM_3D, false, GLSL_TYPE_FLOAT); 171 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_decode_etc"); 172 b.shader->info.workgroup_size[0] = 8; 173 b.shader->info.workgroup_size[1] = 8; 174 175 nir_variable *input_img_2d = 176 nir_variable_create(b.shader, nir_var_uniform, sampler_type_2d, "s_tex_2d"); 177 input_img_2d->data.descriptor_set = 0; 178 input_img_2d->data.binding = 0; 179 180 nir_variable *input_img_3d = 181 nir_variable_create(b.shader, nir_var_uniform, sampler_type_3d, "s_tex_3d"); 182 input_img_2d->data.descriptor_set = 0; 183 input_img_2d->data.binding = 0; 184 185 nir_variable *output_img_2d = 186 nir_variable_create(b.shader, nir_var_image, img_type_2d, "out_img_2d"); 187 output_img_2d->data.descriptor_set = 0; 188 output_img_2d->data.binding = 1; 189 190 nir_variable *output_img_3d = 191 nir_variable_create(b.shader, nir_var_image, img_type_3d, "out_img_3d"); 192 output_img_3d->data.descriptor_set = 0; 193 output_img_3d->data.binding = 1; 194 195 nir_ssa_def *global_id = get_global_ids(&b, 3); 196 197 nir_ssa_def *consts = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16); 198 nir_ssa_def *consts2 = 199 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4); 200 nir_ssa_def *offset = nir_channels(&b, consts, 7); 201 nir_ssa_def *format = nir_channel(&b, consts, 3); 202 nir_ssa_def *image_type = nir_channel(&b, consts2, 0); 203 nir_ssa_def *is_3d = nir_ieq_imm(&b, image_type, VK_IMAGE_TYPE_3D); 204 nir_ssa_def *coord = nir_iadd(&b, global_id, offset); 205 nir_ssa_def *src_coord = 206 nir_vec3(&b, nir_ushr_imm(&b, nir_channel(&b, coord, 0), 2), 207 nir_ushr_imm(&b, nir_channel(&b, coord, 1), 2), nir_channel(&b, coord, 2)); 208 209 nir_variable *payload_var = 210 nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "payload"); 211 nir_push_if(&b, is_3d); 212 { 213 nir_ssa_def *tex_deref = &nir_build_deref_var(&b, input_img_3d)->dest.ssa; 214 215 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 216 tex->sampler_dim = GLSL_SAMPLER_DIM_3D; 217 tex->op = nir_texop_txf; 218 tex->src[0].src_type = nir_tex_src_coord; 219 tex->src[0].src = nir_src_for_ssa(src_coord); 220 tex->src[1].src_type = nir_tex_src_lod; 221 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 222 tex->src[2].src_type = nir_tex_src_texture_deref; 223 tex->src[2].src = nir_src_for_ssa(tex_deref); 224 tex->dest_type = nir_type_uint32; 225 tex->is_array = false; 226 tex->coord_components = 3; 227 228 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 229 nir_builder_instr_insert(&b, &tex->instr); 230 nir_store_var(&b, payload_var, &tex->dest.ssa, 0xf); 231 } 232 nir_push_else(&b, NULL); 233 { 234 nir_ssa_def *tex_deref = &nir_build_deref_var(&b, input_img_2d)->dest.ssa; 235 236 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3); 237 tex->sampler_dim = GLSL_SAMPLER_DIM_2D; 238 tex->op = nir_texop_txf; 239 tex->src[0].src_type = nir_tex_src_coord; 240 tex->src[0].src = nir_src_for_ssa(src_coord); 241 tex->src[1].src_type = nir_tex_src_lod; 242 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0)); 243 tex->src[2].src_type = nir_tex_src_texture_deref; 244 tex->src[2].src = nir_src_for_ssa(tex_deref); 245 tex->dest_type = nir_type_uint32; 246 tex->is_array = true; 247 tex->coord_components = 3; 248 249 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex"); 250 nir_builder_instr_insert(&b, &tex->instr); 251 nir_store_var(&b, payload_var, &tex->dest.ssa, 0xf); 252 } 253 nir_pop_if(&b, NULL); 254 255 nir_ssa_def *pixel_coord = nir_iand_imm(&b, nir_channels(&b, coord, 3), 3); 256 nir_ssa_def *linear_pixel = nir_iadd(&b, nir_imul_imm(&b, nir_channel(&b, pixel_coord, 0), 4), 257 nir_channel(&b, pixel_coord, 1)); 258 259 nir_ssa_def *payload = nir_load_var(&b, payload_var); 260 nir_variable *color = 261 nir_variable_create(b.shader, nir_var_shader_temp, glsl_vec4_type(), "color"); 262 nir_store_var(&b, color, nir_imm_vec4(&b, 1.0, 0.0, 0.0, 1.0), 0xf); 263 nir_push_if(&b, nir_ilt(&b, format, nir_imm_int(&b, VK_FORMAT_EAC_R11_UNORM_BLOCK))); 264 { 265 nir_ssa_def *alpha_bits_8 = 266 nir_ige(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK)); 267 nir_ssa_def *alpha_bits_1 = 268 nir_iand(&b, nir_ige(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK)), 269 nir_ilt(&b, format, nir_imm_int(&b, VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK))); 270 271 nir_ssa_def *color_payload = 272 nir_bcsel(&b, alpha_bits_8, nir_channels(&b, payload, 0xC), nir_channels(&b, payload, 3)); 273 color_payload = flip_endian(&b, color_payload, 2); 274 nir_ssa_def *color_y = nir_channel(&b, color_payload, 0); 275 nir_ssa_def *color_x = nir_channel(&b, color_payload, 1); 276 nir_ssa_def *flip = nir_test_mask(&b, color_y, 1); 277 nir_ssa_def *subblock = nir_ushr_imm( 278 &b, nir_bcsel(&b, flip, nir_channel(&b, pixel_coord, 1), nir_channel(&b, pixel_coord, 0)), 279 1); 280 281 nir_variable *punchthrough = 282 nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "punchthrough"); 283 nir_ssa_def *punchthrough_init = 284 nir_iand(&b, alpha_bits_1, nir_inot(&b, nir_test_mask(&b, color_y, 2))); 285 nir_store_var(&b, punchthrough, punchthrough_init, 0x1); 286 287 nir_variable *etc1_compat = 288 nir_variable_create(b.shader, nir_var_shader_temp, glsl_bool_type(), "etc1_compat"); 289 nir_store_var(&b, etc1_compat, nir_imm_bool(&b, false), 0x1); 290 291 nir_variable *alpha_result = 292 nir_variable_create(b.shader, nir_var_shader_temp, glsl_float_type(), "alpha_result"); 293 nir_push_if(&b, alpha_bits_8); 294 { 295 nir_store_var( 296 &b, alpha_result, 297 decode_etc2_alpha(&b, nir_channels(&b, payload, 3), linear_pixel, false, NULL), 1); 298 } 299 nir_push_else(&b, NULL); 300 { 301 nir_store_var(&b, alpha_result, nir_imm_float(&b, 1.0), 1); 302 } 303 nir_pop_if(&b, NULL); 304 305 const struct glsl_type *uvec3_type = glsl_vector_type(GLSL_TYPE_UINT, 3); 306 nir_variable *rgb_result = 307 nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "rgb_result"); 308 nir_variable *base_rgb = 309 nir_variable_create(b.shader, nir_var_shader_temp, uvec3_type, "base_rgb"); 310 nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 255, 0, 0), 0x7); 311 312 nir_ssa_def *msb = 313 nir_iand_imm(&b, nir_ushr(&b, color_x, nir_iadd_imm(&b, linear_pixel, 15)), 2); 314 nir_ssa_def *lsb = nir_iand_imm(&b, nir_ushr(&b, color_x, linear_pixel), 1); 315 316 nir_push_if( 317 &b, nir_iand(&b, nir_inot(&b, alpha_bits_1), nir_inot(&b, nir_test_mask(&b, color_y, 2)))); 318 { 319 nir_store_var(&b, etc1_compat, nir_imm_bool(&b, true), 1); 320 nir_ssa_def *tmp[3]; 321 for (unsigned i = 0; i < 3; ++i) 322 tmp[i] = etc_extend( 323 &b, 324 nir_iand_imm(&b, 325 nir_ushr(&b, color_y, 326 nir_isub_imm(&b, 28 - 8 * i, nir_imul_imm(&b, subblock, 4))), 327 0xf), 328 4); 329 nir_store_var(&b, base_rgb, nir_vec(&b, tmp, 3), 0x7); 330 } 331 nir_push_else(&b, NULL); 332 { 333 nir_ssa_def *rb = nir_ubfe_imm(&b, color_y, 27, 5); 334 nir_ssa_def *rd = nir_ibfe_imm(&b, color_y, 24, 3); 335 nir_ssa_def *gb = nir_ubfe_imm(&b, color_y, 19, 5); 336 nir_ssa_def *gd = nir_ibfe_imm(&b, color_y, 16, 3); 337 nir_ssa_def *bb = nir_ubfe_imm(&b, color_y, 11, 5); 338 nir_ssa_def *bd = nir_ibfe_imm(&b, color_y, 8, 3); 339 nir_ssa_def *r1 = nir_iadd(&b, rb, rd); 340 nir_ssa_def *g1 = nir_iadd(&b, gb, gd); 341 nir_ssa_def *b1 = nir_iadd(&b, bb, bd); 342 343 nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), r1)); 344 { 345 nir_ssa_def *r0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 24, 2), 346 nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 27, 2), 2)); 347 nir_ssa_def *g0 = nir_ubfe_imm(&b, color_y, 20, 4); 348 nir_ssa_def *b0 = nir_ubfe_imm(&b, color_y, 16, 4); 349 nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 12, 4); 350 nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 8, 4); 351 nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 4, 4); 352 nir_ssa_def *da = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 2), 1), 353 nir_iand_imm(&b, color_y, 1)); 354 nir_ssa_def *dist = etc2_distance_lookup(&b, da); 355 nir_ssa_def *index = nir_ior(&b, lsb, msb); 356 357 nir_store_var(&b, punchthrough, 358 nir_iand(&b, nir_load_var(&b, punchthrough), 359 nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 360 0x1); 361 nir_push_if(&b, nir_ieq_imm(&b, index, 0)); 362 { 363 nir_store_var(&b, rgb_result, etc_extend(&b, nir_vec3(&b, r0, g0, b0), 4), 0x7); 364 } 365 nir_push_else(&b, NULL); 366 { 367 368 nir_ssa_def *tmp = nir_iadd(&b, etc_extend(&b, nir_vec3(&b, r2, g2, b2), 4), 369 nir_imul(&b, dist, nir_isub_imm(&b, 2, index))); 370 nir_store_var(&b, rgb_result, tmp, 0x7); 371 } 372 nir_pop_if(&b, NULL); 373 } 374 nir_push_else(&b, NULL); 375 nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), g1)); 376 { 377 nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 27, 4); 378 nir_ssa_def *g0 = nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 24, 3), 1), 379 nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 20), 1)); 380 nir_ssa_def *b0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 15, 3), 381 nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 16), 8)); 382 nir_ssa_def *r2 = nir_ubfe_imm(&b, color_y, 11, 4); 383 nir_ssa_def *g2 = nir_ubfe_imm(&b, color_y, 7, 4); 384 nir_ssa_def *b2 = nir_ubfe_imm(&b, color_y, 3, 4); 385 nir_ssa_def *da = nir_iand_imm(&b, color_y, 4); 386 nir_ssa_def *db = nir_iand_imm(&b, color_y, 1); 387 nir_ssa_def *d = nir_iadd(&b, da, nir_imul_imm(&b, db, 2)); 388 nir_ssa_def *d0 = 389 nir_iadd(&b, nir_ishl_imm(&b, r0, 16), nir_iadd(&b, nir_ishl_imm(&b, g0, 8), b0)); 390 nir_ssa_def *d2 = 391 nir_iadd(&b, nir_ishl_imm(&b, r2, 16), nir_iadd(&b, nir_ishl_imm(&b, g2, 8), b2)); 392 d = nir_bcsel(&b, nir_uge(&b, d0, d2), nir_iadd_imm(&b, d, 1), d); 393 nir_ssa_def *dist = etc2_distance_lookup(&b, d); 394 nir_ssa_def *base = nir_bcsel(&b, nir_ine_imm(&b, msb, 0), nir_vec3(&b, r2, g2, b2), 395 nir_vec3(&b, r0, g0, b0)); 396 base = etc_extend(&b, base, 4); 397 base = nir_iadd(&b, base, 398 nir_imul(&b, dist, nir_isub_imm(&b, 1, nir_imul_imm(&b, lsb, 2)))); 399 nir_store_var(&b, rgb_result, base, 0x7); 400 nir_store_var(&b, punchthrough, 401 nir_iand(&b, nir_load_var(&b, punchthrough), 402 nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 403 0x1); 404 } 405 nir_push_else(&b, NULL); 406 nir_push_if(&b, nir_ult(&b, nir_imm_int(&b, 31), b1)); 407 { 408 nir_ssa_def *r0 = nir_ubfe_imm(&b, color_y, 25, 6); 409 nir_ssa_def *g0 = nir_ior(&b, nir_ubfe_imm(&b, color_y, 17, 6), 410 nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 18), 0x40)); 411 nir_ssa_def *b0 = 412 nir_ior(&b, nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 11, 2), 3), 413 nir_ior(&b, nir_iand_imm(&b, nir_ushr_imm(&b, color_y, 11), 0x20), 414 nir_ubfe_imm(&b, color_y, 7, 3))); 415 nir_ssa_def *rh = nir_ior(&b, nir_iand_imm(&b, color_y, 1), 416 nir_ishl_imm(&b, nir_ubfe_imm(&b, color_y, 2, 5), 1)); 417 nir_ssa_def *rv = nir_ubfe_imm(&b, color_x, 13, 6); 418 nir_ssa_def *gh = nir_ubfe_imm(&b, color_x, 25, 7); 419 nir_ssa_def *gv = nir_ubfe_imm(&b, color_x, 6, 7); 420 nir_ssa_def *bh = nir_ubfe_imm(&b, color_x, 19, 6); 421 nir_ssa_def *bv = nir_ubfe_imm(&b, color_x, 0, 6); 422 423 r0 = etc_extend(&b, r0, 6); 424 g0 = etc_extend(&b, g0, 7); 425 b0 = etc_extend(&b, b0, 6); 426 rh = etc_extend(&b, rh, 6); 427 rv = etc_extend(&b, rv, 6); 428 gh = etc_extend(&b, gh, 7); 429 gv = etc_extend(&b, gv, 7); 430 bh = etc_extend(&b, bh, 6); 431 bv = etc_extend(&b, bv, 6); 432 433 nir_ssa_def *rgb = nir_vec3(&b, r0, g0, b0); 434 nir_ssa_def *dx = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rh, gh, bh), rgb), 435 nir_channel(&b, pixel_coord, 0)); 436 nir_ssa_def *dy = nir_imul(&b, nir_isub(&b, nir_vec3(&b, rv, gv, bv), rgb), 437 nir_channel(&b, pixel_coord, 1)); 438 rgb = nir_iadd(&b, rgb, nir_ishr_imm(&b, nir_iadd_imm(&b, nir_iadd(&b, dx, dy), 2), 2)); 439 nir_store_var(&b, rgb_result, rgb, 0x7); 440 nir_store_var(&b, punchthrough, nir_imm_bool(&b, false), 0x1); 441 } 442 nir_push_else(&b, NULL); 443 { 444 nir_store_var(&b, etc1_compat, nir_imm_bool(&b, true), 1); 445 nir_ssa_def *subblock_b = nir_ine_imm(&b, subblock, 0); 446 nir_ssa_def *tmp[] = { 447 nir_bcsel(&b, subblock_b, r1, rb), 448 nir_bcsel(&b, subblock_b, g1, gb), 449 nir_bcsel(&b, subblock_b, b1, bb), 450 }; 451 nir_store_var(&b, base_rgb, etc_extend(&b, nir_vec(&b, tmp, 3), 5), 0x7); 452 } 453 nir_pop_if(&b, NULL); 454 nir_pop_if(&b, NULL); 455 nir_pop_if(&b, NULL); 456 } 457 nir_pop_if(&b, NULL); 458 nir_push_if(&b, nir_load_var(&b, etc1_compat)); 459 { 460 nir_ssa_def *etc1_table_index = nir_ubfe( 461 &b, color_y, nir_isub_imm(&b, 5, nir_imul_imm(&b, subblock, 3)), nir_imm_int(&b, 3)); 462 nir_ssa_def *sgn = nir_isub_imm(&b, 1, msb); 463 sgn = nir_bcsel(&b, nir_load_var(&b, punchthrough), nir_imul(&b, sgn, lsb), sgn); 464 nir_store_var(&b, punchthrough, 465 nir_iand(&b, nir_load_var(&b, punchthrough), 466 nir_ieq_imm(&b, nir_iadd(&b, lsb, msb), 2)), 467 0x1); 468 nir_ssa_def *off = 469 nir_imul(&b, etc1_color_modifier_lookup(&b, etc1_table_index, lsb), sgn); 470 nir_ssa_def *result = nir_iadd(&b, nir_load_var(&b, base_rgb), off); 471 nir_store_var(&b, rgb_result, result, 0x7); 472 } 473 nir_pop_if(&b, NULL); 474 nir_push_if(&b, nir_load_var(&b, punchthrough)); 475 { 476 nir_store_var(&b, alpha_result, nir_imm_float(&b, 0), 0x1); 477 nir_store_var(&b, rgb_result, nir_imm_ivec3(&b, 0, 0, 0), 0x7); 478 } 479 nir_pop_if(&b, NULL); 480 nir_ssa_def *col[4]; 481 for (unsigned i = 0; i < 3; ++i) 482 col[i] = nir_fdiv(&b, nir_i2f32(&b, nir_channel(&b, nir_load_var(&b, rgb_result), i)), 483 nir_imm_float(&b, 255.0)); 484 col[3] = nir_load_var(&b, alpha_result); 485 nir_store_var(&b, color, nir_vec(&b, col, 4), 0xf); 486 } 487 nir_push_else(&b, NULL); 488 { /* EAC */ 489 nir_ssa_def *is_signed = nir_ior(&b, nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11_SNORM_BLOCK), 490 nir_ieq_imm(&b, format, VK_FORMAT_EAC_R11G11_SNORM_BLOCK)); 491 nir_ssa_def *val[4]; 492 for (int i = 0; i < 2; ++i) { 493 val[i] = decode_etc2_alpha(&b, nir_channels(&b, payload, 3 << (2 * i)), linear_pixel, true, 494 is_signed); 495 } 496 val[2] = nir_imm_float(&b, 0.0); 497 val[3] = nir_imm_float(&b, 1.0); 498 nir_store_var(&b, color, nir_vec(&b, val, 4), 0xf); 499 } 500 nir_pop_if(&b, NULL); 501 502 nir_ssa_def *outval = nir_load_var(&b, color); 503 nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), 504 nir_channel(&b, coord, 2), nir_ssa_undef(&b, 1, 32)); 505 506 nir_push_if(&b, is_3d); 507 { 508 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_3d)->dest.ssa, img_coord, 509 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), 510 .image_dim = GLSL_SAMPLER_DIM_3D); 511 } 512 nir_push_else(&b, NULL); 513 { 514 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img_2d)->dest.ssa, img_coord, 515 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), 516 .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true); 517 } 518 nir_pop_if(&b, NULL); 519 return b.shader; 520} 521 522static VkResult 523create_layout(struct radv_device *device) 524{ 525 VkResult result; 526 VkDescriptorSetLayoutCreateInfo ds_create_info = { 527 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 528 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR, 529 .bindingCount = 2, 530 .pBindings = (VkDescriptorSetLayoutBinding[]){ 531 {.binding = 0, 532 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 533 .descriptorCount = 1, 534 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 535 .pImmutableSamplers = NULL}, 536 {.binding = 1, 537 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 538 .descriptorCount = 1, 539 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 540 .pImmutableSamplers = NULL}, 541 }}; 542 543 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, 544 &device->meta_state.alloc, 545 &device->meta_state.etc_decode.ds_layout); 546 if (result != VK_SUCCESS) 547 goto fail; 548 549 VkPipelineLayoutCreateInfo pl_create_info = { 550 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 551 .setLayoutCount = 1, 552 .pSetLayouts = &device->meta_state.etc_decode.ds_layout, 553 .pushConstantRangeCount = 1, 554 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20}, 555 }; 556 557 result = 558 radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, 559 &device->meta_state.alloc, &device->meta_state.etc_decode.p_layout); 560 if (result != VK_SUCCESS) 561 goto fail; 562 return VK_SUCCESS; 563fail: 564 return result; 565} 566 567static VkResult 568create_decode_pipeline(struct radv_device *device, VkPipeline *pipeline) 569{ 570 VkResult result; 571 572 mtx_lock(&device->meta_state.mtx); 573 if (*pipeline) { 574 mtx_unlock(&device->meta_state.mtx); 575 return VK_SUCCESS; 576 } 577 578 nir_shader *cs = build_shader(device); 579 580 /* compute shader */ 581 582 VkPipelineShaderStageCreateInfo pipeline_shader_stage = { 583 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, 584 .stage = VK_SHADER_STAGE_COMPUTE_BIT, 585 .module = vk_shader_module_handle_from_nir(cs), 586 .pName = "main", 587 .pSpecializationInfo = NULL, 588 }; 589 590 VkComputePipelineCreateInfo vk_pipeline_info = { 591 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 592 .stage = pipeline_shader_stage, 593 .flags = 0, 594 .layout = device->meta_state.resolve_compute.p_layout, 595 }; 596 597 result = radv_CreateComputePipelines(radv_device_to_handle(device), 598 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1, 599 &vk_pipeline_info, NULL, pipeline); 600 if (result != VK_SUCCESS) 601 goto fail; 602 603 ralloc_free(cs); 604 mtx_unlock(&device->meta_state.mtx); 605 return VK_SUCCESS; 606fail: 607 ralloc_free(cs); 608 mtx_unlock(&device->meta_state.mtx); 609 return result; 610} 611 612VkResult 613radv_device_init_meta_etc_decode_state(struct radv_device *device, bool on_demand) 614{ 615 struct radv_meta_state *state = &device->meta_state; 616 VkResult res; 617 618 if (!device->physical_device->emulate_etc2) 619 return VK_SUCCESS; 620 621 res = create_layout(device); 622 if (res != VK_SUCCESS) 623 return res; 624 625 if (on_demand) 626 return VK_SUCCESS; 627 628 return create_decode_pipeline(device, &state->etc_decode.pipeline); 629} 630 631void 632radv_device_finish_meta_etc_decode_state(struct radv_device *device) 633{ 634 struct radv_meta_state *state = &device->meta_state; 635 radv_DestroyPipeline(radv_device_to_handle(device), state->etc_decode.pipeline, &state->alloc); 636 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->etc_decode.p_layout, 637 &state->alloc); 638 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), 639 state->etc_decode.ds_layout, &state->alloc); 640} 641 642static VkPipeline 643radv_get_etc_decode_pipeline(struct radv_cmd_buffer *cmd_buffer) 644{ 645 struct radv_device *device = cmd_buffer->device; 646 VkPipeline *pipeline = &device->meta_state.etc_decode.pipeline; 647 648 if (!*pipeline) { 649 VkResult ret; 650 651 ret = create_decode_pipeline(device, pipeline); 652 if (ret != VK_SUCCESS) { 653 cmd_buffer->record_result = ret; 654 return VK_NULL_HANDLE; 655 } 656 } 657 658 return *pipeline; 659} 660 661static void 662decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, 663 struct radv_image_view *dest_iview, const VkOffset3D *offset, const VkExtent3D *extent) 664{ 665 struct radv_device *device = cmd_buffer->device; 666 667 radv_meta_push_descriptor_set( 668 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout, 669 0, /* set */ 670 2, /* descriptorWriteCount */ 671 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 672 .dstBinding = 0, 673 .dstArrayElement = 0, 674 .descriptorCount = 1, 675 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 676 .pImageInfo = 677 (VkDescriptorImageInfo[]){ 678 {.sampler = VK_NULL_HANDLE, 679 .imageView = radv_image_view_to_handle(src_iview), 680 .imageLayout = VK_IMAGE_LAYOUT_GENERAL}, 681 }}, 682 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 683 .dstBinding = 1, 684 .dstArrayElement = 0, 685 .descriptorCount = 1, 686 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 687 .pImageInfo = (VkDescriptorImageInfo[]){ 688 { 689 .sampler = VK_NULL_HANDLE, 690 .imageView = radv_image_view_to_handle(dest_iview), 691 .imageLayout = VK_IMAGE_LAYOUT_GENERAL, 692 }, 693 }}}); 694 695 VkPipeline pipeline = radv_get_etc_decode_pipeline(cmd_buffer); 696 697 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, 698 pipeline); 699 700 unsigned push_constants[5] = { 701 offset->x, offset->y, offset->z, src_iview->image->vk.format, src_iview->image->vk.image_type, 702 }; 703 704 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), 705 device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 706 0, 20, push_constants); 707 radv_unaligned_dispatch(cmd_buffer, extent->width, extent->height, extent->depth); 708} 709 710void 711radv_meta_decode_etc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 712 VkImageLayout layout, const VkImageSubresourceLayers *subresource, 713 VkOffset3D offset, VkExtent3D extent) 714{ 715 struct radv_meta_saved_state saved_state; 716 radv_meta_save(&saved_state, cmd_buffer, 717 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | 718 RADV_META_SAVE_DESCRIPTORS | RADV_META_SUSPEND_PREDICATING); 719 720 uint32_t base_slice = radv_meta_get_iview_layer(image, subresource, &offset); 721 uint32_t slice_count = image->vk.image_type == VK_IMAGE_TYPE_3D ? extent.depth : subresource->layerCount; 722 723 extent = vk_image_sanitize_extent(&image->vk, extent); 724 offset = vk_image_sanitize_offset(&image->vk, offset); 725 726 VkFormat load_format = vk_format_get_blocksize(image->vk.format) == 16 727 ? VK_FORMAT_R32G32B32A32_UINT 728 : VK_FORMAT_R32G32_UINT; 729 struct radv_image_view src_iview; 730 radv_image_view_init( 731 &src_iview, cmd_buffer->device, 732 &(VkImageViewCreateInfo){ 733 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 734 .image = radv_image_to_handle(image), 735 .viewType = radv_meta_get_view_type(image), 736 .format = load_format, 737 .subresourceRange = 738 { 739 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 740 .baseMipLevel = subresource->mipLevel, 741 .levelCount = 1, 742 .baseArrayLayer = 0, 743 .layerCount = subresource->baseArrayLayer + subresource->layerCount, 744 }, 745 }, 746 0, NULL); 747 748 VkFormat store_format; 749 switch (image->vk.format) { 750 case VK_FORMAT_EAC_R11_UNORM_BLOCK: 751 store_format = VK_FORMAT_R16_UNORM; 752 break; 753 case VK_FORMAT_EAC_R11_SNORM_BLOCK: 754 store_format = VK_FORMAT_R16_SNORM; 755 break; 756 case VK_FORMAT_EAC_R11G11_UNORM_BLOCK: 757 store_format = VK_FORMAT_R16G16_UNORM; 758 break; 759 case VK_FORMAT_EAC_R11G11_SNORM_BLOCK: 760 store_format = VK_FORMAT_R16G16_SNORM; 761 break; 762 default: 763 store_format = VK_FORMAT_R8G8B8A8_UNORM; 764 } 765 struct radv_image_view dest_iview; 766 radv_image_view_init( 767 &dest_iview, cmd_buffer->device, 768 &(VkImageViewCreateInfo){ 769 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 770 .image = radv_image_to_handle(image), 771 .viewType = radv_meta_get_view_type(image), 772 .format = store_format, 773 .subresourceRange = 774 { 775 .aspectMask = VK_IMAGE_ASPECT_PLANE_1_BIT, 776 .baseMipLevel = subresource->mipLevel, 777 .levelCount = 1, 778 .baseArrayLayer = 0, 779 .layerCount = subresource->baseArrayLayer + subresource->layerCount, 780 }, 781 }, 782 0, NULL); 783 784 decode_etc(cmd_buffer, &src_iview, &dest_iview, &(VkOffset3D){offset.x, offset.y, base_slice}, 785 &(VkExtent3D){extent.width, extent.height, slice_count}); 786 787 radv_image_view_finish(&src_iview); 788 radv_image_view_finish(&dest_iview); 789 790 radv_meta_restore(&saved_state, cmd_buffer); 791} 792