1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2017-2018 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#define GPU 600 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include "ir3_context.h" 30bf215546Sopenharmony_ci#include "ir3_image.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci/* 33bf215546Sopenharmony_ci * Handlers for instructions changed/added in a6xx: 34bf215546Sopenharmony_ci * 35bf215546Sopenharmony_ci * Starting with a6xx, isam and stbi is used for SSBOs as well; stbi and the 36bf215546Sopenharmony_ci * atomic instructions (used for both SSBO and image) use a new instruction 37bf215546Sopenharmony_ci * encoding compared to a4xx/a5xx. 38bf215546Sopenharmony_ci */ 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci/* src[] = { buffer_index, offset }. No const_index */ 41bf215546Sopenharmony_cistatic void 42bf215546Sopenharmony_ciemit_intrinsic_load_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr, 43bf215546Sopenharmony_ci struct ir3_instruction **dst) 44bf215546Sopenharmony_ci{ 45bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 46bf215546Sopenharmony_ci struct ir3_instruction *offset; 47bf215546Sopenharmony_ci struct ir3_instruction *ldib; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci offset = ir3_get_src(ctx, &intr->src[2])[0]; 50bf215546Sopenharmony_ci 51bf215546Sopenharmony_ci ldib = ir3_LDIB(b, ir3_ssbo_to_ibo(ctx, intr->src[0]), 0, offset, 0); 52bf215546Sopenharmony_ci ldib->dsts[0]->wrmask = MASK(intr->num_components); 53bf215546Sopenharmony_ci ldib->cat6.iim_val = intr->num_components; 54bf215546Sopenharmony_ci ldib->cat6.d = 1; 55bf215546Sopenharmony_ci ldib->cat6.type = intr->dest.ssa.bit_size == 16 ? TYPE_U16 : TYPE_U32; 56bf215546Sopenharmony_ci ldib->barrier_class = IR3_BARRIER_BUFFER_R; 57bf215546Sopenharmony_ci ldib->barrier_conflict = IR3_BARRIER_BUFFER_W; 58bf215546Sopenharmony_ci ir3_handle_bindless_cat6(ldib, intr->src[0]); 59bf215546Sopenharmony_ci ir3_handle_nonuniform(ldib, intr); 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci ir3_split_dest(b, dst, ldib, 0, intr->num_components); 62bf215546Sopenharmony_ci} 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ 65bf215546Sopenharmony_cistatic void 66bf215546Sopenharmony_ciemit_intrinsic_store_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) 67bf215546Sopenharmony_ci{ 68bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 69bf215546Sopenharmony_ci struct ir3_instruction *stib, *val, *offset; 70bf215546Sopenharmony_ci unsigned wrmask = nir_intrinsic_write_mask(intr); 71bf215546Sopenharmony_ci unsigned ncomp = ffs(~wrmask) - 1; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci assert(wrmask == BITFIELD_MASK(intr->num_components)); 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci /* src0 is offset, src1 is value: 76bf215546Sopenharmony_ci */ 77bf215546Sopenharmony_ci val = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp); 78bf215546Sopenharmony_ci offset = ir3_get_src(ctx, &intr->src[3])[0]; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci stib = ir3_STIB(b, ir3_ssbo_to_ibo(ctx, intr->src[1]), 0, offset, 0, val, 0); 81bf215546Sopenharmony_ci stib->cat6.iim_val = ncomp; 82bf215546Sopenharmony_ci stib->cat6.d = 1; 83bf215546Sopenharmony_ci stib->cat6.type = intr->src[0].ssa->bit_size == 16 ? TYPE_U16 : TYPE_U32; 84bf215546Sopenharmony_ci stib->barrier_class = IR3_BARRIER_BUFFER_W; 85bf215546Sopenharmony_ci stib->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; 86bf215546Sopenharmony_ci ir3_handle_bindless_cat6(stib, intr->src[1]); 87bf215546Sopenharmony_ci ir3_handle_nonuniform(stib, intr); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci array_insert(b, b->keeps, stib); 90bf215546Sopenharmony_ci} 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci/* 93bf215546Sopenharmony_ci * SSBO atomic intrinsics 94bf215546Sopenharmony_ci * 95bf215546Sopenharmony_ci * All of the SSBO atomic memory operations read a value from memory, 96bf215546Sopenharmony_ci * compute a new value using one of the operations below, write the new 97bf215546Sopenharmony_ci * value to memory, and return the original value read. 98bf215546Sopenharmony_ci * 99bf215546Sopenharmony_ci * All operations take 3 sources except CompSwap that takes 4. These 100bf215546Sopenharmony_ci * sources represent: 101bf215546Sopenharmony_ci * 102bf215546Sopenharmony_ci * 0: The SSBO buffer index. 103bf215546Sopenharmony_ci * 1: The offset into the SSBO buffer of the variable that the atomic 104bf215546Sopenharmony_ci * operation will operate on. 105bf215546Sopenharmony_ci * 2: The data parameter to the atomic function (i.e. the value to add 106bf215546Sopenharmony_ci * in ssbo_atomic_add, etc). 107bf215546Sopenharmony_ci * 3: For CompSwap only: the second data parameter. 108bf215546Sopenharmony_ci */ 109bf215546Sopenharmony_cistatic struct ir3_instruction * 110bf215546Sopenharmony_ciemit_intrinsic_atomic_ssbo(struct ir3_context *ctx, nir_intrinsic_instr *intr) 111bf215546Sopenharmony_ci{ 112bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 113bf215546Sopenharmony_ci struct ir3_instruction *atomic, *ibo, *src0, *src1, *data, *dummy; 114bf215546Sopenharmony_ci type_t type = TYPE_U32; 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci ibo = ir3_ssbo_to_ibo(ctx, intr->src[0]); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci data = ir3_get_src(ctx, &intr->src[2])[0]; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci /* So this gets a bit creative: 121bf215546Sopenharmony_ci * 122bf215546Sopenharmony_ci * src0 - vecN offset/coords 123bf215546Sopenharmony_ci * src1.x - is actually destination register 124bf215546Sopenharmony_ci * src1.y - is 'data' except for cmpxchg where src2.y is 'compare' 125bf215546Sopenharmony_ci * src1.z - is 'data' for cmpxchg 126bf215546Sopenharmony_ci * 127bf215546Sopenharmony_ci * The combining src and dest kinda doesn't work out so well with how 128bf215546Sopenharmony_ci * scheduling and RA work. So we create a dummy src2 which is tied to the 129bf215546Sopenharmony_ci * destination in RA (i.e. must be allocated to the same vec2/vec3 130bf215546Sopenharmony_ci * register) and then immediately extract the first component. 131bf215546Sopenharmony_ci * 132bf215546Sopenharmony_ci * Note that nir already multiplies the offset by four 133bf215546Sopenharmony_ci */ 134bf215546Sopenharmony_ci dummy = create_immed(b, 0); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap_ir3) { 137bf215546Sopenharmony_ci src0 = ir3_get_src(ctx, &intr->src[4])[0]; 138bf215546Sopenharmony_ci struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[3])[0]; 139bf215546Sopenharmony_ci src1 = ir3_collect(b, dummy, compare, data); 140bf215546Sopenharmony_ci } else { 141bf215546Sopenharmony_ci src0 = ir3_get_src(ctx, &intr->src[3])[0]; 142bf215546Sopenharmony_ci src1 = ir3_collect(b, dummy, data); 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci switch (intr->intrinsic) { 146bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_add_ir3: 147bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0); 148bf215546Sopenharmony_ci break; 149bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imin_ir3: 150bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0); 151bf215546Sopenharmony_ci type = TYPE_S32; 152bf215546Sopenharmony_ci break; 153bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umin_ir3: 154bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0); 155bf215546Sopenharmony_ci break; 156bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_imax_ir3: 157bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0); 158bf215546Sopenharmony_ci type = TYPE_S32; 159bf215546Sopenharmony_ci break; 160bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_umax_ir3: 161bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0); 162bf215546Sopenharmony_ci break; 163bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_and_ir3: 164bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0); 165bf215546Sopenharmony_ci break; 166bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_or_ir3: 167bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0); 168bf215546Sopenharmony_ci break; 169bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_xor_ir3: 170bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0); 171bf215546Sopenharmony_ci break; 172bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_exchange_ir3: 173bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0); 174bf215546Sopenharmony_ci break; 175bf215546Sopenharmony_ci case nir_intrinsic_ssbo_atomic_comp_swap_ir3: 176bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0); 177bf215546Sopenharmony_ci break; 178bf215546Sopenharmony_ci default: 179bf215546Sopenharmony_ci unreachable("boo"); 180bf215546Sopenharmony_ci } 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci atomic->cat6.iim_val = 1; 183bf215546Sopenharmony_ci atomic->cat6.d = 1; 184bf215546Sopenharmony_ci atomic->cat6.type = type; 185bf215546Sopenharmony_ci atomic->barrier_class = IR3_BARRIER_BUFFER_W; 186bf215546Sopenharmony_ci atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; 187bf215546Sopenharmony_ci ir3_handle_bindless_cat6(atomic, intr->src[0]); 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci /* even if nothing consume the result, we can't DCE the instruction: */ 190bf215546Sopenharmony_ci array_insert(b, b->keeps, atomic); 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask; 193bf215546Sopenharmony_ci ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]); 194bf215546Sopenharmony_ci struct ir3_instruction *split; 195bf215546Sopenharmony_ci ir3_split_dest(b, &split, atomic, 0, 1); 196bf215546Sopenharmony_ci return split; 197bf215546Sopenharmony_ci} 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci/* src[] = { deref, coord, sample_index }. const_index[] = {} */ 200bf215546Sopenharmony_cistatic void 201bf215546Sopenharmony_ciemit_intrinsic_load_image(struct ir3_context *ctx, nir_intrinsic_instr *intr, 202bf215546Sopenharmony_ci struct ir3_instruction **dst) 203bf215546Sopenharmony_ci{ 204bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 205bf215546Sopenharmony_ci struct ir3_instruction *ldib; 206bf215546Sopenharmony_ci struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]); 207bf215546Sopenharmony_ci unsigned ncoords = ir3_get_image_coords(intr, NULL); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci ldib = ir3_LDIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0, 210bf215546Sopenharmony_ci ir3_create_collect(b, coords, ncoords), 0); 211bf215546Sopenharmony_ci ldib->dsts[0]->wrmask = MASK(intr->num_components); 212bf215546Sopenharmony_ci ldib->cat6.iim_val = intr->num_components; 213bf215546Sopenharmony_ci ldib->cat6.d = ncoords; 214bf215546Sopenharmony_ci ldib->cat6.type = ir3_get_type_for_image_intrinsic(intr); 215bf215546Sopenharmony_ci ldib->cat6.typed = true; 216bf215546Sopenharmony_ci ldib->barrier_class = IR3_BARRIER_IMAGE_R; 217bf215546Sopenharmony_ci ldib->barrier_conflict = IR3_BARRIER_IMAGE_W; 218bf215546Sopenharmony_ci ir3_handle_bindless_cat6(ldib, intr->src[0]); 219bf215546Sopenharmony_ci ir3_handle_nonuniform(ldib, intr); 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci ir3_split_dest(b, dst, ldib, 0, intr->num_components); 222bf215546Sopenharmony_ci} 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci/* src[] = { deref, coord, sample_index, value }. const_index[] = {} */ 225bf215546Sopenharmony_cistatic void 226bf215546Sopenharmony_ciemit_intrinsic_store_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) 227bf215546Sopenharmony_ci{ 228bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 229bf215546Sopenharmony_ci struct ir3_instruction *stib; 230bf215546Sopenharmony_ci struct ir3_instruction *const *value = ir3_get_src(ctx, &intr->src[3]); 231bf215546Sopenharmony_ci struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]); 232bf215546Sopenharmony_ci unsigned ncoords = ir3_get_image_coords(intr, NULL); 233bf215546Sopenharmony_ci enum pipe_format format = nir_intrinsic_format(intr); 234bf215546Sopenharmony_ci unsigned ncomp = ir3_get_num_components_for_image_format(format); 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci /* src0 is offset, src1 is value: 237bf215546Sopenharmony_ci */ 238bf215546Sopenharmony_ci stib = ir3_STIB(b, ir3_image_to_ibo(ctx, intr->src[0]), 0, 239bf215546Sopenharmony_ci ir3_create_collect(b, coords, ncoords), 0, 240bf215546Sopenharmony_ci ir3_create_collect(b, value, ncomp), 0); 241bf215546Sopenharmony_ci stib->cat6.iim_val = ncomp; 242bf215546Sopenharmony_ci stib->cat6.d = ncoords; 243bf215546Sopenharmony_ci stib->cat6.type = ir3_get_type_for_image_intrinsic(intr); 244bf215546Sopenharmony_ci stib->cat6.typed = true; 245bf215546Sopenharmony_ci stib->barrier_class = IR3_BARRIER_IMAGE_W; 246bf215546Sopenharmony_ci stib->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; 247bf215546Sopenharmony_ci ir3_handle_bindless_cat6(stib, intr->src[0]); 248bf215546Sopenharmony_ci ir3_handle_nonuniform(stib, intr); 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci array_insert(b, b->keeps, stib); 251bf215546Sopenharmony_ci} 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_ci/* src[] = { deref, coord, sample_index, value, compare }. const_index[] = {} */ 254bf215546Sopenharmony_cistatic struct ir3_instruction * 255bf215546Sopenharmony_ciemit_intrinsic_atomic_image(struct ir3_context *ctx, nir_intrinsic_instr *intr) 256bf215546Sopenharmony_ci{ 257bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 258bf215546Sopenharmony_ci struct ir3_instruction *atomic, *ibo, *src0, *src1, *dummy; 259bf215546Sopenharmony_ci struct ir3_instruction *const *coords = ir3_get_src(ctx, &intr->src[1]); 260bf215546Sopenharmony_ci struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[3])[0]; 261bf215546Sopenharmony_ci unsigned ncoords = ir3_get_image_coords(intr, NULL); 262bf215546Sopenharmony_ci 263bf215546Sopenharmony_ci ibo = ir3_image_to_ibo(ctx, intr->src[0]); 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci /* So this gets a bit creative: 266bf215546Sopenharmony_ci * 267bf215546Sopenharmony_ci * src0 - vecN offset/coords 268bf215546Sopenharmony_ci * src1.x - is actually destination register 269bf215546Sopenharmony_ci * src1.y - is 'value' except for cmpxchg where src2.y is 'compare' 270bf215546Sopenharmony_ci * src1.z - is 'value' for cmpxchg 271bf215546Sopenharmony_ci * 272bf215546Sopenharmony_ci * The combining src and dest kinda doesn't work out so well with how 273bf215546Sopenharmony_ci * scheduling and RA work. So we create a dummy src2 which is tied to the 274bf215546Sopenharmony_ci * destination in RA (i.e. must be allocated to the same vec2/vec3 275bf215546Sopenharmony_ci * register) and then immediately extract the first component. 276bf215546Sopenharmony_ci */ 277bf215546Sopenharmony_ci dummy = create_immed(b, 0); 278bf215546Sopenharmony_ci src0 = ir3_create_collect(b, coords, ncoords); 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_image_atomic_comp_swap || 281bf215546Sopenharmony_ci intr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap) { 282bf215546Sopenharmony_ci struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[4])[0]; 283bf215546Sopenharmony_ci src1 = ir3_collect(b, dummy, compare, value); 284bf215546Sopenharmony_ci } else { 285bf215546Sopenharmony_ci src1 = ir3_collect(b, dummy, value); 286bf215546Sopenharmony_ci } 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci switch (intr->intrinsic) { 289bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_add: 290bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_add: 291bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_ADD(b, ibo, 0, src0, 0, src1, 0); 292bf215546Sopenharmony_ci break; 293bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_imin: 294bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_umin: 295bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_imin: 296bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_umin: 297bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_MIN(b, ibo, 0, src0, 0, src1, 0); 298bf215546Sopenharmony_ci break; 299bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_imax: 300bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_umax: 301bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_imax: 302bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_umax: 303bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_MAX(b, ibo, 0, src0, 0, src1, 0); 304bf215546Sopenharmony_ci break; 305bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_and: 306bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_and: 307bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_AND(b, ibo, 0, src0, 0, src1, 0); 308bf215546Sopenharmony_ci break; 309bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_or: 310bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_or: 311bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_OR(b, ibo, 0, src0, 0, src1, 0); 312bf215546Sopenharmony_ci break; 313bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_xor: 314bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_xor: 315bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_XOR(b, ibo, 0, src0, 0, src1, 0); 316bf215546Sopenharmony_ci break; 317bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_exchange: 318bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_exchange: 319bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_XCHG(b, ibo, 0, src0, 0, src1, 0); 320bf215546Sopenharmony_ci break; 321bf215546Sopenharmony_ci case nir_intrinsic_image_atomic_comp_swap: 322bf215546Sopenharmony_ci case nir_intrinsic_bindless_image_atomic_comp_swap: 323bf215546Sopenharmony_ci atomic = ir3_ATOMIC_B_CMPXCHG(b, ibo, 0, src0, 0, src1, 0); 324bf215546Sopenharmony_ci break; 325bf215546Sopenharmony_ci default: 326bf215546Sopenharmony_ci unreachable("boo"); 327bf215546Sopenharmony_ci } 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci atomic->cat6.iim_val = 1; 330bf215546Sopenharmony_ci atomic->cat6.d = ncoords; 331bf215546Sopenharmony_ci atomic->cat6.type = ir3_get_type_for_image_intrinsic(intr); 332bf215546Sopenharmony_ci atomic->cat6.typed = true; 333bf215546Sopenharmony_ci atomic->barrier_class = IR3_BARRIER_IMAGE_W; 334bf215546Sopenharmony_ci atomic->barrier_conflict = IR3_BARRIER_IMAGE_R | IR3_BARRIER_IMAGE_W; 335bf215546Sopenharmony_ci ir3_handle_bindless_cat6(atomic, intr->src[0]); 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci /* even if nothing consume the result, we can't DCE the instruction: */ 338bf215546Sopenharmony_ci array_insert(b, b->keeps, atomic); 339bf215546Sopenharmony_ci 340bf215546Sopenharmony_ci atomic->dsts[0]->wrmask = src1->dsts[0]->wrmask; 341bf215546Sopenharmony_ci ir3_reg_tie(atomic->dsts[0], atomic->srcs[2]); 342bf215546Sopenharmony_ci struct ir3_instruction *split; 343bf215546Sopenharmony_ci ir3_split_dest(b, &split, atomic, 0, 1); 344bf215546Sopenharmony_ci return split; 345bf215546Sopenharmony_ci} 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_cistatic void 348bf215546Sopenharmony_ciemit_intrinsic_image_size(struct ir3_context *ctx, nir_intrinsic_instr *intr, 349bf215546Sopenharmony_ci struct ir3_instruction **dst) 350bf215546Sopenharmony_ci{ 351bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 352bf215546Sopenharmony_ci struct ir3_instruction *ibo = ir3_image_to_ibo(ctx, intr->src[0]); 353bf215546Sopenharmony_ci struct ir3_instruction *resinfo = ir3_RESINFO(b, ibo, 0); 354bf215546Sopenharmony_ci resinfo->cat6.iim_val = 1; 355bf215546Sopenharmony_ci resinfo->cat6.d = intr->num_components; 356bf215546Sopenharmony_ci resinfo->cat6.type = TYPE_U32; 357bf215546Sopenharmony_ci resinfo->cat6.typed = false; 358bf215546Sopenharmony_ci /* resinfo has no writemask and always writes out 3 components: */ 359bf215546Sopenharmony_ci compile_assert(ctx, intr->num_components <= 3); 360bf215546Sopenharmony_ci resinfo->dsts[0]->wrmask = MASK(3); 361bf215546Sopenharmony_ci ir3_handle_bindless_cat6(resinfo, intr->src[0]); 362bf215546Sopenharmony_ci ir3_handle_nonuniform(resinfo, intr); 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci ir3_split_dest(b, dst, resinfo, 0, intr->num_components); 365bf215546Sopenharmony_ci} 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_cistatic void 368bf215546Sopenharmony_ciemit_intrinsic_load_global_ir3(struct ir3_context *ctx, 369bf215546Sopenharmony_ci nir_intrinsic_instr *intr, 370bf215546Sopenharmony_ci struct ir3_instruction **dst) 371bf215546Sopenharmony_ci{ 372bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 373bf215546Sopenharmony_ci unsigned dest_components = nir_intrinsic_dest_components(intr); 374bf215546Sopenharmony_ci struct ir3_instruction *addr, *offset; 375bf215546Sopenharmony_ci 376bf215546Sopenharmony_ci addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0], 377bf215546Sopenharmony_ci ir3_get_src(ctx, &intr->src[0])[1]); 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci struct ir3_instruction *load; 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci bool const_offset_in_bounds = nir_src_is_const(intr->src[1]) && 382bf215546Sopenharmony_ci nir_src_as_int(intr->src[1]) < (1 << 13) && 383bf215546Sopenharmony_ci nir_src_as_int(intr->src[1]) > -(1 << 13); 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci if (const_offset_in_bounds) { 386bf215546Sopenharmony_ci load = ir3_LDG(b, addr, 0, create_immed(b, nir_src_as_int(intr->src[1])), 387bf215546Sopenharmony_ci 0, create_immed(b, dest_components), 0); 388bf215546Sopenharmony_ci } else { 389bf215546Sopenharmony_ci offset = ir3_get_src(ctx, &intr->src[1])[0]; 390bf215546Sopenharmony_ci load = 391bf215546Sopenharmony_ci ir3_LDG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0, 392bf215546Sopenharmony_ci create_immed(b, 0), 0, create_immed(b, dest_components), 0); 393bf215546Sopenharmony_ci } 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_ci load->cat6.type = type_uint_size(intr->dest.ssa.bit_size); 396bf215546Sopenharmony_ci load->dsts[0]->wrmask = MASK(dest_components); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci load->barrier_class = IR3_BARRIER_BUFFER_R; 399bf215546Sopenharmony_ci load->barrier_conflict = IR3_BARRIER_BUFFER_W; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci ir3_split_dest(b, dst, load, 0, dest_components); 402bf215546Sopenharmony_ci} 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_cistatic void 405bf215546Sopenharmony_ciemit_intrinsic_store_global_ir3(struct ir3_context *ctx, 406bf215546Sopenharmony_ci nir_intrinsic_instr *intr) 407bf215546Sopenharmony_ci{ 408bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 409bf215546Sopenharmony_ci struct ir3_instruction *value, *addr, *offset; 410bf215546Sopenharmony_ci unsigned ncomp = nir_intrinsic_src_components(intr, 0); 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[1])[0], 413bf215546Sopenharmony_ci ir3_get_src(ctx, &intr->src[1])[1]); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci value = ir3_create_collect(b, ir3_get_src(ctx, &intr->src[0]), ncomp); 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci struct ir3_instruction *stg; 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci bool const_offset_in_bounds = nir_src_is_const(intr->src[2]) && 420bf215546Sopenharmony_ci nir_src_as_int(intr->src[2]) < (1 << 13) && 421bf215546Sopenharmony_ci nir_src_as_int(intr->src[2]) > -(1 << 13); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci if (const_offset_in_bounds) { 424bf215546Sopenharmony_ci stg = ir3_STG(b, addr, 0, 425bf215546Sopenharmony_ci create_immed(b, nir_src_as_int(intr->src[2])), 0, 426bf215546Sopenharmony_ci value, 0, 427bf215546Sopenharmony_ci create_immed(b, ncomp), 0); 428bf215546Sopenharmony_ci } else { 429bf215546Sopenharmony_ci offset = ir3_get_src(ctx, &intr->src[2])[0]; 430bf215546Sopenharmony_ci stg = 431bf215546Sopenharmony_ci ir3_STG_A(b, addr, 0, offset, 0, create_immed(b, 0), 0, 432bf215546Sopenharmony_ci create_immed(b, 0), 0, value, 0, create_immed(b, ncomp), 0); 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci stg->cat6.type = type_uint_size(intr->src[0].ssa->bit_size); 436bf215546Sopenharmony_ci stg->cat6.iim_val = 1; 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci array_insert(b, b->keeps, stg); 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci stg->barrier_class = IR3_BARRIER_BUFFER_W; 441bf215546Sopenharmony_ci stg->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; 442bf215546Sopenharmony_ci} 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_cistatic struct ir3_instruction * 445bf215546Sopenharmony_ciemit_intrinsic_atomic_global(struct ir3_context *ctx, nir_intrinsic_instr *intr) 446bf215546Sopenharmony_ci{ 447bf215546Sopenharmony_ci struct ir3_block *b = ctx->block; 448bf215546Sopenharmony_ci struct ir3_instruction *addr, *atomic, *src1; 449bf215546Sopenharmony_ci struct ir3_instruction *value = ir3_get_src(ctx, &intr->src[1])[0]; 450bf215546Sopenharmony_ci type_t type = TYPE_U32; 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci addr = ir3_collect(b, ir3_get_src(ctx, &intr->src[0])[0], 453bf215546Sopenharmony_ci ir3_get_src(ctx, &intr->src[0])[1]); 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci if (intr->intrinsic == nir_intrinsic_global_atomic_comp_swap_ir3) { 456bf215546Sopenharmony_ci struct ir3_instruction *compare = ir3_get_src(ctx, &intr->src[2])[0]; 457bf215546Sopenharmony_ci src1 = ir3_collect(b, compare, value); 458bf215546Sopenharmony_ci } else { 459bf215546Sopenharmony_ci src1 = value; 460bf215546Sopenharmony_ci } 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci switch (intr->intrinsic) { 463bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_add_ir3: 464bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_ADD(b, addr, 0, src1, 0); 465bf215546Sopenharmony_ci break; 466bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imin_ir3: 467bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0); 468bf215546Sopenharmony_ci type = TYPE_S32; 469bf215546Sopenharmony_ci break; 470bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umin_ir3: 471bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_MIN(b, addr, 0, src1, 0); 472bf215546Sopenharmony_ci break; 473bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_imax_ir3: 474bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0); 475bf215546Sopenharmony_ci type = TYPE_S32; 476bf215546Sopenharmony_ci break; 477bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_umax_ir3: 478bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_MAX(b, addr, 0, src1, 0); 479bf215546Sopenharmony_ci break; 480bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_and_ir3: 481bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_AND(b, addr, 0, src1, 0); 482bf215546Sopenharmony_ci break; 483bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_or_ir3: 484bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_OR(b, addr, 0, src1, 0); 485bf215546Sopenharmony_ci break; 486bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_xor_ir3: 487bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_XOR(b, addr, 0, src1, 0); 488bf215546Sopenharmony_ci break; 489bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_exchange_ir3: 490bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_XCHG(b, addr, 0, src1, 0); 491bf215546Sopenharmony_ci break; 492bf215546Sopenharmony_ci case nir_intrinsic_global_atomic_comp_swap_ir3: 493bf215546Sopenharmony_ci atomic = ir3_ATOMIC_G_CMPXCHG(b, addr, 0, src1, 0); 494bf215546Sopenharmony_ci break; 495bf215546Sopenharmony_ci default: 496bf215546Sopenharmony_ci unreachable("Unknown global atomic op"); 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci atomic->cat6.iim_val = 1; 500bf215546Sopenharmony_ci atomic->cat6.d = 1; 501bf215546Sopenharmony_ci atomic->cat6.type = type; 502bf215546Sopenharmony_ci atomic->barrier_class = IR3_BARRIER_BUFFER_W; 503bf215546Sopenharmony_ci atomic->barrier_conflict = IR3_BARRIER_BUFFER_R | IR3_BARRIER_BUFFER_W; 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci /* even if nothing consume the result, we can't DCE the instruction: */ 506bf215546Sopenharmony_ci array_insert(b, b->keeps, atomic); 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci return atomic; 509bf215546Sopenharmony_ci} 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ciconst struct ir3_context_funcs ir3_a6xx_funcs = { 512bf215546Sopenharmony_ci .emit_intrinsic_load_ssbo = emit_intrinsic_load_ssbo, 513bf215546Sopenharmony_ci .emit_intrinsic_store_ssbo = emit_intrinsic_store_ssbo, 514bf215546Sopenharmony_ci .emit_intrinsic_atomic_ssbo = emit_intrinsic_atomic_ssbo, 515bf215546Sopenharmony_ci .emit_intrinsic_load_image = emit_intrinsic_load_image, 516bf215546Sopenharmony_ci .emit_intrinsic_store_image = emit_intrinsic_store_image, 517bf215546Sopenharmony_ci .emit_intrinsic_atomic_image = emit_intrinsic_atomic_image, 518bf215546Sopenharmony_ci .emit_intrinsic_image_size = emit_intrinsic_image_size, 519bf215546Sopenharmony_ci .emit_intrinsic_load_global_ir3 = emit_intrinsic_load_global_ir3, 520bf215546Sopenharmony_ci .emit_intrinsic_store_global_ir3 = emit_intrinsic_store_global_ir3, 521bf215546Sopenharmony_ci .emit_intrinsic_atomic_global = emit_intrinsic_atomic_global, 522bf215546Sopenharmony_ci}; 523