1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors (Collabora): 24bf215546Sopenharmony_ci * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/** 28bf215546Sopenharmony_ci * Implements framebuffer format conversions in software for Midgard/Bifrost 29bf215546Sopenharmony_ci * blend shaders. This pass is designed for a single render target; Midgard 30bf215546Sopenharmony_ci * duplicates blend shaders for MRT to simplify everything. A particular 31bf215546Sopenharmony_ci * framebuffer format may be categorized as 1) typed load available, 2) typed 32bf215546Sopenharmony_ci * unpack available, or 3) software unpack only, and likewise for stores. The 33bf215546Sopenharmony_ci * first two types are handled in the compiler backend directly, so this module 34bf215546Sopenharmony_ci * is responsible for identifying type 3 formats (hardware dependent) and 35bf215546Sopenharmony_ci * inserting appropriate ALU code to perform the conversion from the packed 36bf215546Sopenharmony_ci * type to a designated unpacked type, and vice versa. 37bf215546Sopenharmony_ci * 38bf215546Sopenharmony_ci * The unpacked type depends on the format: 39bf215546Sopenharmony_ci * 40bf215546Sopenharmony_ci * - For 32-bit float formats or >8-bit UNORM, 32-bit floats. 41bf215546Sopenharmony_ci * - For other floats, 16-bit floats. 42bf215546Sopenharmony_ci * - For 32-bit ints, 32-bit ints. 43bf215546Sopenharmony_ci * - For 8-bit ints, 8-bit ints. 44bf215546Sopenharmony_ci * - For other ints, 16-bit ints. 45bf215546Sopenharmony_ci * 46bf215546Sopenharmony_ci * The rationale is to optimize blending and logic op instructions by using the 47bf215546Sopenharmony_ci * smallest precision necessary to store the pixel losslessly. 48bf215546Sopenharmony_ci */ 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci#include "compiler/nir/nir.h" 51bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 52bf215546Sopenharmony_ci#include "compiler/nir/nir_format_convert.h" 53bf215546Sopenharmony_ci#include "util/format/u_format.h" 54bf215546Sopenharmony_ci#include "pan_lower_framebuffer.h" 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci/* Determines the unpacked type best suiting a given format, so the rest of the 57bf215546Sopenharmony_ci * pipeline may be adjusted accordingly */ 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_cinir_alu_type 60bf215546Sopenharmony_cipan_unpacked_type_for_format(const struct util_format_description *desc) 61bf215546Sopenharmony_ci{ 62bf215546Sopenharmony_ci int c = util_format_get_first_non_void_channel(desc->format); 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci if (c == -1) 65bf215546Sopenharmony_ci unreachable("Void format not renderable"); 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci bool large = (desc->channel[c].size > 16); 68bf215546Sopenharmony_ci bool large_norm = (desc->channel[c].size > 8); 69bf215546Sopenharmony_ci bool bit8 = (desc->channel[c].size == 8); 70bf215546Sopenharmony_ci assert(desc->channel[c].size <= 32); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci if (desc->channel[c].normalized) 73bf215546Sopenharmony_ci return large_norm ? nir_type_float32 : nir_type_float16; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci switch (desc->channel[c].type) { 76bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_UNSIGNED: 77bf215546Sopenharmony_ci return bit8 ? nir_type_uint8 : 78bf215546Sopenharmony_ci large ? nir_type_uint32 : nir_type_uint16; 79bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_SIGNED: 80bf215546Sopenharmony_ci return bit8 ? nir_type_int8 : 81bf215546Sopenharmony_ci large ? nir_type_int32 : nir_type_int16; 82bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_FLOAT: 83bf215546Sopenharmony_ci return large ? nir_type_float32 : nir_type_float16; 84bf215546Sopenharmony_ci default: 85bf215546Sopenharmony_ci unreachable("Format not renderable"); 86bf215546Sopenharmony_ci } 87bf215546Sopenharmony_ci} 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_cistatic bool 90bf215546Sopenharmony_cipan_is_format_native(const struct util_format_description *desc, bool broken_ld_special, bool is_store) 91bf215546Sopenharmony_ci{ 92bf215546Sopenharmony_ci if (is_store || broken_ld_special) 93bf215546Sopenharmony_ci return false; 94bf215546Sopenharmony_ci 95bf215546Sopenharmony_ci if (util_format_is_pure_integer(desc->format) || util_format_is_float(desc->format)) 96bf215546Sopenharmony_ci return false; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci /* Some formats are missing as typed but have unpacks */ 99bf215546Sopenharmony_ci if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 100bf215546Sopenharmony_ci return false; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci return true; 103bf215546Sopenharmony_ci} 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci/* Software packs/unpacks, by format class. Packs take in the pixel value typed 106bf215546Sopenharmony_ci * as `pan_unpacked_type_for_format` of the format and return an i32vec4 107bf215546Sopenharmony_ci * suitable for storing (with components replicated to fill). Unpacks do the 108bf215546Sopenharmony_ci * reverse but cannot rely on replication. */ 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_cistatic nir_ssa_def * 111bf215546Sopenharmony_cipan_replicate(nir_builder *b, nir_ssa_def *v, unsigned num_components) 112bf215546Sopenharmony_ci{ 113bf215546Sopenharmony_ci nir_ssa_def *replicated[4]; 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; ++i) 116bf215546Sopenharmony_ci replicated[i] = nir_channel(b, v, i % num_components); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci return nir_vec(b, replicated, 4); 119bf215546Sopenharmony_ci} 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_cistatic nir_ssa_def * 122bf215546Sopenharmony_cipan_unpack_pure_32(nir_builder *b, nir_ssa_def *pack, unsigned num_components) 123bf215546Sopenharmony_ci{ 124bf215546Sopenharmony_ci return nir_channels(b, pack, (1 << num_components) - 1); 125bf215546Sopenharmony_ci} 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci/* Pure x16 formats are x16 unpacked, so it's similar, but we need to pack 128bf215546Sopenharmony_ci * upper/lower halves of course */ 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_cistatic nir_ssa_def * 131bf215546Sopenharmony_cipan_pack_pure_16(nir_builder *b, nir_ssa_def *v, unsigned num_components) 132bf215546Sopenharmony_ci{ 133bf215546Sopenharmony_ci nir_ssa_def *v4 = pan_replicate(b, v, num_components); 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci nir_ssa_def *lo = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 0)); 136bf215546Sopenharmony_ci nir_ssa_def *hi = nir_pack_32_2x16(b, nir_channels(b, v4, 0x3 << 2)); 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci return nir_vec4(b, lo, hi, lo, hi); 139bf215546Sopenharmony_ci} 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_cistatic nir_ssa_def * 142bf215546Sopenharmony_cipan_unpack_pure_16(nir_builder *b, nir_ssa_def *pack, unsigned num_components) 143bf215546Sopenharmony_ci{ 144bf215546Sopenharmony_ci nir_ssa_def *unpacked[4]; 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci assert(num_components <= 4); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci for (unsigned i = 0; i < num_components; i += 2) { 149bf215546Sopenharmony_ci nir_ssa_def *halves = 150bf215546Sopenharmony_ci nir_unpack_32_2x16(b, nir_channel(b, pack, i >> 1)); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci unpacked[i + 0] = nir_channel(b, halves, 0); 153bf215546Sopenharmony_ci unpacked[i + 1] = nir_channel(b, halves, 1); 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci return nir_pad_vec4(b, nir_vec(b, unpacked, num_components)); 157bf215546Sopenharmony_ci} 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_cistatic nir_ssa_def * 160bf215546Sopenharmony_cipan_pack_reorder(nir_builder *b, 161bf215546Sopenharmony_ci const struct util_format_description *desc, 162bf215546Sopenharmony_ci nir_ssa_def *v) 163bf215546Sopenharmony_ci{ 164bf215546Sopenharmony_ci unsigned swizzle[4] = { 0, 1, 2, 3 }; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci for (unsigned i = 0; i < v->num_components; i++) { 167bf215546Sopenharmony_ci if (desc->swizzle[i] <= PIPE_SWIZZLE_W) 168bf215546Sopenharmony_ci swizzle[i] = desc->swizzle[i]; 169bf215546Sopenharmony_ci } 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci return nir_swizzle(b, v, swizzle, v->num_components); 172bf215546Sopenharmony_ci} 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_cistatic nir_ssa_def * 175bf215546Sopenharmony_cipan_unpack_reorder(nir_builder *b, 176bf215546Sopenharmony_ci const struct util_format_description *desc, 177bf215546Sopenharmony_ci nir_ssa_def *v) 178bf215546Sopenharmony_ci{ 179bf215546Sopenharmony_ci unsigned swizzle[4] = { 0, 1, 2, 3 }; 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci for (unsigned i = 0; i < v->num_components; i++) { 182bf215546Sopenharmony_ci if (desc->swizzle[i] <= PIPE_SWIZZLE_W) 183bf215546Sopenharmony_ci swizzle[desc->swizzle[i]] = i; 184bf215546Sopenharmony_ci } 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci return nir_swizzle(b, v, swizzle, v->num_components); 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_cistatic nir_ssa_def * 190bf215546Sopenharmony_cipan_replicate_4(nir_builder *b, nir_ssa_def *v) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci return nir_vec4(b, v, v, v, v); 193bf215546Sopenharmony_ci} 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_cistatic nir_ssa_def * 196bf215546Sopenharmony_cipan_pack_pure_8(nir_builder *b, nir_ssa_def *v, unsigned num_components) 197bf215546Sopenharmony_ci{ 198bf215546Sopenharmony_ci return pan_replicate_4(b, nir_pack_32_4x8(b, pan_replicate(b, v, num_components))); 199bf215546Sopenharmony_ci} 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_cistatic nir_ssa_def * 202bf215546Sopenharmony_cipan_unpack_pure_8(nir_builder *b, nir_ssa_def *pack, unsigned num_components) 203bf215546Sopenharmony_ci{ 204bf215546Sopenharmony_ci nir_ssa_def *unpacked = nir_unpack_32_4x8(b, nir_channel(b, pack, 0)); 205bf215546Sopenharmony_ci return nir_channels(b, unpacked, (1 << num_components) - 1); 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci/* For <= 8-bits per channel, [U,S]NORM formats are packed like [U,S]NORM 8, 209bf215546Sopenharmony_ci * with zeroes spacing out each component as needed */ 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_cistatic nir_ssa_def * 212bf215546Sopenharmony_cipan_pack_norm(nir_builder *b, nir_ssa_def *v, 213bf215546Sopenharmony_ci unsigned x, unsigned y, unsigned z, unsigned w, 214bf215546Sopenharmony_ci bool is_signed) 215bf215546Sopenharmony_ci{ 216bf215546Sopenharmony_ci /* If a channel has N bits, 1.0 is encoded as 2^N - 1 for UNORMs and 217bf215546Sopenharmony_ci * 2^(N-1) - 1 for SNORMs */ 218bf215546Sopenharmony_ci nir_ssa_def *scales = 219bf215546Sopenharmony_ci is_signed ? 220bf215546Sopenharmony_ci nir_imm_vec4_16(b, 221bf215546Sopenharmony_ci (1 << (x - 1)) - 1, (1 << (y - 1)) - 1, 222bf215546Sopenharmony_ci (1 << (z - 1)) - 1, (1 << (w - 1)) - 1) : 223bf215546Sopenharmony_ci nir_imm_vec4_16(b, 224bf215546Sopenharmony_ci (1 << x) - 1, (1 << y) - 1, 225bf215546Sopenharmony_ci (1 << z) - 1, (1 << w) - 1); 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci /* If a channel has N bits, we pad out to the byte by (8 - N) bits */ 228bf215546Sopenharmony_ci nir_ssa_def *shifts = nir_imm_ivec4(b, 8 - x, 8 - y, 8 - z, 8 - w); 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci nir_ssa_def *clamped = 231bf215546Sopenharmony_ci is_signed ? 232bf215546Sopenharmony_ci nir_fsat_signed_mali(b, nir_pad_vec4(b, v)) : 233bf215546Sopenharmony_ci nir_fsat(b, nir_pad_vec4(b, v)); 234bf215546Sopenharmony_ci 235bf215546Sopenharmony_ci nir_ssa_def *f = nir_fmul(b, clamped, scales); 236bf215546Sopenharmony_ci nir_ssa_def *u8 = nir_f2u8(b, nir_fround_even(b, f)); 237bf215546Sopenharmony_ci nir_ssa_def *s = nir_ishl(b, u8, shifts); 238bf215546Sopenharmony_ci nir_ssa_def *repl = nir_pack_32_4x8(b, s); 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci return pan_replicate_4(b, repl); 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_cistatic nir_ssa_def * 244bf215546Sopenharmony_cipan_pack_unorm(nir_builder *b, nir_ssa_def *v, 245bf215546Sopenharmony_ci unsigned x, unsigned y, unsigned z, unsigned w) 246bf215546Sopenharmony_ci{ 247bf215546Sopenharmony_ci return pan_pack_norm(b, v, x, y, z, w, false); 248bf215546Sopenharmony_ci} 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_cistatic nir_ssa_def * 251bf215546Sopenharmony_cipan_pack_snorm(nir_builder *b, nir_ssa_def *v, 252bf215546Sopenharmony_ci unsigned x, unsigned y, unsigned z, unsigned w) 253bf215546Sopenharmony_ci{ 254bf215546Sopenharmony_ci return pan_pack_norm(b, v, x, y, z, w, true); 255bf215546Sopenharmony_ci} 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci/* RGB10_A2 is packed in the tilebuffer as the bottom 3 bytes being the top 258bf215546Sopenharmony_ci * 8-bits of RGB and the top byte being RGBA as 2-bits packed. As imirkin 259bf215546Sopenharmony_ci * pointed out, this means free conversion to RGBX8 */ 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_cistatic nir_ssa_def * 262bf215546Sopenharmony_cipan_pack_unorm_1010102(nir_builder *b, nir_ssa_def *v) 263bf215546Sopenharmony_ci{ 264bf215546Sopenharmony_ci nir_ssa_def *scale = nir_imm_vec4(b, 1023.0, 1023.0, 1023.0, 3.0); 265bf215546Sopenharmony_ci nir_ssa_def *s = nir_f2u32(b, nir_fround_even(b, nir_fmul(b, nir_fsat(b, v), scale))); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci nir_ssa_def *top8 = nir_ushr(b, s, nir_imm_ivec4(b, 0x2, 0x2, 0x2, 0x2)); 268bf215546Sopenharmony_ci nir_ssa_def *top8_rgb = nir_pack_32_4x8(b, nir_u2u8(b, top8)); 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci nir_ssa_def *bottom2 = nir_iand(b, s, nir_imm_ivec4(b, 0x3, 0x3, 0x3, 0x3)); 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci nir_ssa_def *top = 273bf215546Sopenharmony_ci nir_ior(b, 274bf215546Sopenharmony_ci nir_ior(b, 275bf215546Sopenharmony_ci nir_ishl(b, nir_channel(b, bottom2, 0), nir_imm_int(b, 24 + 0)), 276bf215546Sopenharmony_ci nir_ishl(b, nir_channel(b, bottom2, 1), nir_imm_int(b, 24 + 2))), 277bf215546Sopenharmony_ci nir_ior(b, 278bf215546Sopenharmony_ci nir_ishl(b, nir_channel(b, bottom2, 2), nir_imm_int(b, 24 + 4)), 279bf215546Sopenharmony_ci nir_ishl(b, nir_channel(b, bottom2, 3), nir_imm_int(b, 24 + 6)))); 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci nir_ssa_def *p = nir_ior(b, top, top8_rgb); 282bf215546Sopenharmony_ci return pan_replicate_4(b, p); 283bf215546Sopenharmony_ci} 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci/* On the other hand, the pure int RGB10_A2 is identical to the spec */ 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_cistatic nir_ssa_def * 288bf215546Sopenharmony_cipan_pack_int_1010102(nir_builder *b, nir_ssa_def *v, bool is_signed) 289bf215546Sopenharmony_ci{ 290bf215546Sopenharmony_ci v = nir_u2u32(b, v); 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci /* Clamp the values */ 293bf215546Sopenharmony_ci if (is_signed) { 294bf215546Sopenharmony_ci v = nir_imin(b, v, nir_imm_ivec4(b, 511, 511, 511, 1)); 295bf215546Sopenharmony_ci v = nir_imax(b, v, nir_imm_ivec4(b, -512, -512, -512, -2)); 296bf215546Sopenharmony_ci } else { 297bf215546Sopenharmony_ci v = nir_umin(b, v, nir_imm_ivec4(b, 1023, 1023, 1023, 3)); 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci v = nir_ishl(b, v, nir_imm_ivec4(b, 0, 10, 20, 30)); 301bf215546Sopenharmony_ci v = nir_ior(b, 302bf215546Sopenharmony_ci nir_ior(b, nir_channel(b, v, 0), nir_channel(b, v, 1)), 303bf215546Sopenharmony_ci nir_ior(b, nir_channel(b, v, 2), nir_channel(b, v, 3))); 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci return pan_replicate_4(b, v); 306bf215546Sopenharmony_ci} 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_cistatic nir_ssa_def * 309bf215546Sopenharmony_cipan_unpack_int_1010102(nir_builder *b, nir_ssa_def *packed, bool is_signed) 310bf215546Sopenharmony_ci{ 311bf215546Sopenharmony_ci nir_ssa_def *v = pan_replicate_4(b, nir_channel(b, packed, 0)); 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci /* Left shift all components so the sign bit is on the MSB, and 314bf215546Sopenharmony_ci * can be extended by ishr(). The ishl()+[u,i]shr() combination 315bf215546Sopenharmony_ci * sets all unused bits to 0 without requiring a mask. 316bf215546Sopenharmony_ci */ 317bf215546Sopenharmony_ci v = nir_ishl(b, v, nir_imm_ivec4(b, 22, 12, 2, 0)); 318bf215546Sopenharmony_ci 319bf215546Sopenharmony_ci if (is_signed) 320bf215546Sopenharmony_ci v = nir_ishr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30)); 321bf215546Sopenharmony_ci else 322bf215546Sopenharmony_ci v = nir_ushr(b, v, nir_imm_ivec4(b, 22, 22, 22, 30)); 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci return nir_i2i16(b, v); 325bf215546Sopenharmony_ci} 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci/* NIR means we can *finally* catch a break */ 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_cistatic nir_ssa_def * 330bf215546Sopenharmony_cipan_pack_r11g11b10(nir_builder *b, nir_ssa_def *v) 331bf215546Sopenharmony_ci{ 332bf215546Sopenharmony_ci return pan_replicate_4(b, nir_format_pack_11f11f10f(b, 333bf215546Sopenharmony_ci nir_f2f32(b, v))); 334bf215546Sopenharmony_ci} 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_cistatic nir_ssa_def * 337bf215546Sopenharmony_cipan_unpack_r11g11b10(nir_builder *b, nir_ssa_def *v) 338bf215546Sopenharmony_ci{ 339bf215546Sopenharmony_ci nir_ssa_def *f32 = nir_format_unpack_11f11f10f(b, nir_channel(b, v, 0)); 340bf215546Sopenharmony_ci nir_ssa_def *f16 = nir_f2fmp(b, f32); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci /* Extend to vec4 with alpha */ 343bf215546Sopenharmony_ci nir_ssa_def *components[4] = { 344bf215546Sopenharmony_ci nir_channel(b, f16, 0), 345bf215546Sopenharmony_ci nir_channel(b, f16, 1), 346bf215546Sopenharmony_ci nir_channel(b, f16, 2), 347bf215546Sopenharmony_ci nir_imm_float16(b, 1.0) 348bf215546Sopenharmony_ci }; 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci return nir_vec(b, components, 4); 351bf215546Sopenharmony_ci} 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci/* Wrapper around sRGB conversion */ 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_cistatic nir_ssa_def * 356bf215546Sopenharmony_cipan_linear_to_srgb(nir_builder *b, nir_ssa_def *linear) 357bf215546Sopenharmony_ci{ 358bf215546Sopenharmony_ci nir_ssa_def *rgb = nir_channels(b, linear, 0x7); 359bf215546Sopenharmony_ci 360bf215546Sopenharmony_ci /* TODO: fp16 native conversion */ 361bf215546Sopenharmony_ci nir_ssa_def *srgb = nir_f2fmp(b, 362bf215546Sopenharmony_ci nir_format_linear_to_srgb(b, nir_f2f32(b, rgb))); 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci nir_ssa_def *comp[4] = { 365bf215546Sopenharmony_ci nir_channel(b, srgb, 0), 366bf215546Sopenharmony_ci nir_channel(b, srgb, 1), 367bf215546Sopenharmony_ci nir_channel(b, srgb, 2), 368bf215546Sopenharmony_ci nir_channel(b, linear, 3), 369bf215546Sopenharmony_ci }; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci return nir_vec(b, comp, 4); 372bf215546Sopenharmony_ci} 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci/* Generic dispatches for un/pack regardless of format */ 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_cistatic nir_ssa_def * 377bf215546Sopenharmony_cipan_unpack(nir_builder *b, 378bf215546Sopenharmony_ci const struct util_format_description *desc, 379bf215546Sopenharmony_ci nir_ssa_def *packed) 380bf215546Sopenharmony_ci{ 381bf215546Sopenharmony_ci if (desc->is_array) { 382bf215546Sopenharmony_ci int c = util_format_get_first_non_void_channel(desc->format); 383bf215546Sopenharmony_ci assert(c >= 0); 384bf215546Sopenharmony_ci struct util_format_channel_description d = desc->channel[c]; 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci if (d.size == 32 || d.size == 16) { 387bf215546Sopenharmony_ci assert(!d.normalized); 388bf215546Sopenharmony_ci assert(d.type == UTIL_FORMAT_TYPE_FLOAT || d.pure_integer); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci return d.size == 32 ? pan_unpack_pure_32(b, packed, desc->nr_channels) : 391bf215546Sopenharmony_ci pan_unpack_pure_16(b, packed, desc->nr_channels); 392bf215546Sopenharmony_ci } else if (d.size == 8) { 393bf215546Sopenharmony_ci assert(d.pure_integer); 394bf215546Sopenharmony_ci return pan_unpack_pure_8(b, packed, desc->nr_channels); 395bf215546Sopenharmony_ci } else { 396bf215546Sopenharmony_ci unreachable("Unrenderable size"); 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci } 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci switch (desc->format) { 401bf215546Sopenharmony_ci case PIPE_FORMAT_R10G10B10A2_UINT: 402bf215546Sopenharmony_ci case PIPE_FORMAT_B10G10R10A2_UINT: 403bf215546Sopenharmony_ci return pan_unpack_int_1010102(b, packed, false); 404bf215546Sopenharmony_ci case PIPE_FORMAT_R10G10B10A2_SINT: 405bf215546Sopenharmony_ci case PIPE_FORMAT_B10G10R10A2_SINT: 406bf215546Sopenharmony_ci return pan_unpack_int_1010102(b, packed, true); 407bf215546Sopenharmony_ci case PIPE_FORMAT_R11G11B10_FLOAT: 408bf215546Sopenharmony_ci return pan_unpack_r11g11b10(b, packed); 409bf215546Sopenharmony_ci default: 410bf215546Sopenharmony_ci break; 411bf215546Sopenharmony_ci } 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci fprintf(stderr, "%s\n", desc->name); 414bf215546Sopenharmony_ci unreachable("Unknown format"); 415bf215546Sopenharmony_ci} 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_cistatic nir_ssa_def * 418bf215546Sopenharmony_cipan_pack(nir_builder *b, 419bf215546Sopenharmony_ci const struct util_format_description *desc, 420bf215546Sopenharmony_ci nir_ssa_def *unpacked) 421bf215546Sopenharmony_ci{ 422bf215546Sopenharmony_ci if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 423bf215546Sopenharmony_ci unpacked = pan_linear_to_srgb(b, unpacked); 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci if (util_format_is_unorm8(desc)) 426bf215546Sopenharmony_ci return pan_pack_unorm(b, unpacked, 8, 8, 8, 8); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci if (util_format_is_snorm8(desc->format)) 429bf215546Sopenharmony_ci return pan_pack_snorm(b, unpacked, 8, 8, 8, 8); 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci if (desc->is_array) { 432bf215546Sopenharmony_ci int c = util_format_get_first_non_void_channel(desc->format); 433bf215546Sopenharmony_ci assert(c >= 0); 434bf215546Sopenharmony_ci struct util_format_channel_description d = desc->channel[c]; 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci if (d.size == 32 || d.size == 16) { 437bf215546Sopenharmony_ci assert(!d.normalized); 438bf215546Sopenharmony_ci assert(d.type == UTIL_FORMAT_TYPE_FLOAT || d.pure_integer); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci return d.size == 32 ? 441bf215546Sopenharmony_ci pan_replicate(b, unpacked, desc->nr_channels) : 442bf215546Sopenharmony_ci pan_pack_pure_16(b, unpacked, desc->nr_channels); 443bf215546Sopenharmony_ci } else if (d.size == 8) { 444bf215546Sopenharmony_ci assert(d.pure_integer); 445bf215546Sopenharmony_ci return pan_pack_pure_8(b, unpacked, desc->nr_channels); 446bf215546Sopenharmony_ci } else { 447bf215546Sopenharmony_ci unreachable("Unrenderable size"); 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci } 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci switch (desc->format) { 452bf215546Sopenharmony_ci case PIPE_FORMAT_B4G4R4A4_UNORM: 453bf215546Sopenharmony_ci case PIPE_FORMAT_B4G4R4X4_UNORM: 454bf215546Sopenharmony_ci case PIPE_FORMAT_A4R4_UNORM: 455bf215546Sopenharmony_ci case PIPE_FORMAT_R4A4_UNORM: 456bf215546Sopenharmony_ci case PIPE_FORMAT_A4B4G4R4_UNORM: 457bf215546Sopenharmony_ci case PIPE_FORMAT_R4G4B4A4_UNORM: 458bf215546Sopenharmony_ci return pan_pack_unorm(b, unpacked, 4, 4, 4, 4); 459bf215546Sopenharmony_ci case PIPE_FORMAT_B5G5R5A1_UNORM: 460bf215546Sopenharmony_ci case PIPE_FORMAT_R5G5B5A1_UNORM: 461bf215546Sopenharmony_ci return pan_pack_unorm(b, unpacked, 5, 6, 5, 1); 462bf215546Sopenharmony_ci case PIPE_FORMAT_R5G6B5_UNORM: 463bf215546Sopenharmony_ci case PIPE_FORMAT_B5G6R5_UNORM: 464bf215546Sopenharmony_ci return pan_pack_unorm(b, unpacked, 5, 6, 5, 0); 465bf215546Sopenharmony_ci case PIPE_FORMAT_R10G10B10A2_UNORM: 466bf215546Sopenharmony_ci case PIPE_FORMAT_B10G10R10A2_UNORM: 467bf215546Sopenharmony_ci return pan_pack_unorm_1010102(b, unpacked); 468bf215546Sopenharmony_ci case PIPE_FORMAT_R10G10B10A2_UINT: 469bf215546Sopenharmony_ci case PIPE_FORMAT_B10G10R10A2_UINT: 470bf215546Sopenharmony_ci return pan_pack_int_1010102(b, unpacked, false); 471bf215546Sopenharmony_ci case PIPE_FORMAT_R10G10B10A2_SINT: 472bf215546Sopenharmony_ci case PIPE_FORMAT_B10G10R10A2_SINT: 473bf215546Sopenharmony_ci return pan_pack_int_1010102(b, unpacked, true); 474bf215546Sopenharmony_ci case PIPE_FORMAT_R11G11B10_FLOAT: 475bf215546Sopenharmony_ci return pan_pack_r11g11b10(b, unpacked); 476bf215546Sopenharmony_ci default: 477bf215546Sopenharmony_ci break; 478bf215546Sopenharmony_ci } 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci fprintf(stderr, "%s\n", desc->name); 481bf215546Sopenharmony_ci unreachable("Unknown format"); 482bf215546Sopenharmony_ci} 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_cistatic void 485bf215546Sopenharmony_cipan_lower_fb_store(nir_shader *shader, 486bf215546Sopenharmony_ci nir_builder *b, 487bf215546Sopenharmony_ci nir_intrinsic_instr *intr, 488bf215546Sopenharmony_ci const struct util_format_description *desc, 489bf215546Sopenharmony_ci bool reorder_comps) 490bf215546Sopenharmony_ci{ 491bf215546Sopenharmony_ci /* For stores, add conversion before */ 492bf215546Sopenharmony_ci nir_ssa_def *unpacked = 493bf215546Sopenharmony_ci nir_ssa_for_src(b, intr->src[1], intr->num_components); 494bf215546Sopenharmony_ci unpacked = nir_pad_vec4(b, unpacked); 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci /* Re-order the components */ 497bf215546Sopenharmony_ci if (reorder_comps) 498bf215546Sopenharmony_ci unpacked = pan_pack_reorder(b, desc, unpacked); 499bf215546Sopenharmony_ci 500bf215546Sopenharmony_ci nir_ssa_def *packed = pan_pack(b, desc, unpacked); 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci nir_store_raw_output_pan(b, packed); 503bf215546Sopenharmony_ci} 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_cistatic nir_ssa_def * 506bf215546Sopenharmony_cipan_sample_id(nir_builder *b, int sample) 507bf215546Sopenharmony_ci{ 508bf215546Sopenharmony_ci return (sample >= 0) ? nir_imm_int(b, sample) : nir_load_sample_id(b); 509bf215546Sopenharmony_ci} 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_cistatic void 512bf215546Sopenharmony_cipan_lower_fb_load(nir_shader *shader, 513bf215546Sopenharmony_ci nir_builder *b, 514bf215546Sopenharmony_ci nir_intrinsic_instr *intr, 515bf215546Sopenharmony_ci const struct util_format_description *desc, 516bf215546Sopenharmony_ci bool reorder_comps, 517bf215546Sopenharmony_ci unsigned base, int sample) 518bf215546Sopenharmony_ci{ 519bf215546Sopenharmony_ci nir_ssa_def *packed = 520bf215546Sopenharmony_ci nir_load_raw_output_pan(b, 4, 32, pan_sample_id(b, sample), 521bf215546Sopenharmony_ci .base = base); 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci /* Convert the raw value */ 524bf215546Sopenharmony_ci nir_ssa_def *unpacked = pan_unpack(b, desc, packed); 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci /* Convert to the size of the load intrinsic. 527bf215546Sopenharmony_ci * 528bf215546Sopenharmony_ci * We can assume that the type will match with the framebuffer format: 529bf215546Sopenharmony_ci * 530bf215546Sopenharmony_ci * Page 170 of the PDF of the OpenGL ES 3.0.6 spec says: 531bf215546Sopenharmony_ci * 532bf215546Sopenharmony_ci * If [UNORM or SNORM, convert to fixed-point]; otherwise no type 533bf215546Sopenharmony_ci * conversion is applied. If the values written by the fragment shader 534bf215546Sopenharmony_ci * do not match the format(s) of the corresponding color buffer(s), 535bf215546Sopenharmony_ci * the result is undefined. 536bf215546Sopenharmony_ci */ 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci unsigned bits = nir_dest_bit_size(intr->dest); 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci nir_alu_type src_type = nir_alu_type_get_base_type( 541bf215546Sopenharmony_ci pan_unpacked_type_for_format(desc)); 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci unpacked = nir_convert_to_bit_size(b, unpacked, src_type, bits); 544bf215546Sopenharmony_ci unpacked = nir_resize_vector(b, unpacked, intr->dest.ssa.num_components); 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci /* Reorder the components */ 547bf215546Sopenharmony_ci if (reorder_comps) 548bf215546Sopenharmony_ci unpacked = pan_unpack_reorder(b, desc, unpacked); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, unpacked, &intr->instr); 551bf215546Sopenharmony_ci} 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_cibool 554bf215546Sopenharmony_cipan_lower_framebuffer(nir_shader *shader, const enum pipe_format *rt_fmts, 555bf215546Sopenharmony_ci uint8_t raw_fmt_mask, bool is_blend, bool broken_ld_special) 556bf215546Sopenharmony_ci{ 557bf215546Sopenharmony_ci if (shader->info.stage != MESA_SHADER_FRAGMENT) 558bf215546Sopenharmony_ci return false; 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci bool progress = false; 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci nir_foreach_function(func, shader) { 563bf215546Sopenharmony_ci nir_foreach_block(block, func->impl) { 564bf215546Sopenharmony_ci nir_foreach_instr_safe(instr, block) { 565bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 566bf215546Sopenharmony_ci continue; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci bool is_load = intr->intrinsic == nir_intrinsic_load_deref; 571bf215546Sopenharmony_ci bool is_store = intr->intrinsic == nir_intrinsic_store_deref; 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci if (!(is_load || (is_store && is_blend))) 574bf215546Sopenharmony_ci continue; 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci nir_variable *var = nir_intrinsic_get_var(intr, 0); 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci if (var->data.mode != nir_var_shader_out) 579bf215546Sopenharmony_ci continue; 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci if (var->data.location < FRAG_RESULT_DATA0) 582bf215546Sopenharmony_ci continue; 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_ci unsigned base = var->data.driver_location; 585bf215546Sopenharmony_ci unsigned rt = var->data.location - FRAG_RESULT_DATA0; 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci if (rt_fmts[rt] == PIPE_FORMAT_NONE) 588bf215546Sopenharmony_ci continue; 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci const struct util_format_description *desc = 591bf215546Sopenharmony_ci util_format_description(rt_fmts[rt]); 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci /* Don't lower */ 594bf215546Sopenharmony_ci if (pan_is_format_native(desc, broken_ld_special, is_store)) 595bf215546Sopenharmony_ci continue; 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci /* EXT_shader_framebuffer_fetch requires 598bf215546Sopenharmony_ci * per-sample loads. 599bf215546Sopenharmony_ci * MSAA blend shaders are not yet handled, so 600bf215546Sopenharmony_ci * for now always load sample 0. */ 601bf215546Sopenharmony_ci int sample = is_blend ? 0 : -1; 602bf215546Sopenharmony_ci bool reorder_comps = raw_fmt_mask & BITFIELD_BIT(rt); 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci nir_builder b; 605bf215546Sopenharmony_ci nir_builder_init(&b, func->impl); 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci if (is_store) { 608bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 609bf215546Sopenharmony_ci pan_lower_fb_store(shader, &b, intr, desc, reorder_comps); 610bf215546Sopenharmony_ci } else { 611bf215546Sopenharmony_ci b.cursor = nir_after_instr(instr); 612bf215546Sopenharmony_ci pan_lower_fb_load(shader, &b, intr, desc, reorder_comps, base, sample); 613bf215546Sopenharmony_ci } 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci nir_instr_remove(instr); 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci progress = true; 618bf215546Sopenharmony_ci } 619bf215546Sopenharmony_ci } 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci nir_metadata_preserve(func->impl, nir_metadata_block_index | 622bf215546Sopenharmony_ci nir_metadata_dominance); 623bf215546Sopenharmony_ci } 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci return progress; 626bf215546Sopenharmony_ci} 627