1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "pipe/p_state.h" 28bf215546Sopenharmony_ci#include "util/format/u_format.h" 29bf215546Sopenharmony_ci#include "util/u_inlines.h" 30bf215546Sopenharmony_ci#include "util/u_memory.h" 31bf215546Sopenharmony_ci#include "util/u_string.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#include "freedreno_program.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include "fd4_emit.h" 36bf215546Sopenharmony_ci#include "fd4_format.h" 37bf215546Sopenharmony_ci#include "fd4_program.h" 38bf215546Sopenharmony_ci#include "fd4_texture.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_civoid 41bf215546Sopenharmony_cifd4_emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci const struct ir3_info *si = &so->info; 44bf215546Sopenharmony_ci enum a4xx_state_block sb = fd4_stage2shadersb(so->type); 45bf215546Sopenharmony_ci enum a4xx_state_src src; 46bf215546Sopenharmony_ci uint32_t i, sz, *bin; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci if (FD_DBG(DIRECT)) { 49bf215546Sopenharmony_ci sz = si->sizedwords; 50bf215546Sopenharmony_ci src = SS4_DIRECT; 51bf215546Sopenharmony_ci bin = fd_bo_map(so->bo); 52bf215546Sopenharmony_ci } else { 53bf215546Sopenharmony_ci sz = 0; 54bf215546Sopenharmony_ci src = SS4_INDIRECT; 55bf215546Sopenharmony_ci bin = NULL; 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci 58bf215546Sopenharmony_ci OUT_PKT3(ring, CP_LOAD_STATE4, 2 + sz); 59bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(0) | 60bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_SRC(src) | 61bf215546Sopenharmony_ci CP_LOAD_STATE4_0_STATE_BLOCK(sb) | 62bf215546Sopenharmony_ci CP_LOAD_STATE4_0_NUM_UNIT(so->instrlen)); 63bf215546Sopenharmony_ci if (bin) { 64bf215546Sopenharmony_ci OUT_RING(ring, CP_LOAD_STATE4_1_EXT_SRC_ADDR(0) | 65bf215546Sopenharmony_ci CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER)); 66bf215546Sopenharmony_ci } else { 67bf215546Sopenharmony_ci OUT_RELOC(ring, so->bo, 0, CP_LOAD_STATE4_1_STATE_TYPE(ST4_SHADER), 0); 68bf215546Sopenharmony_ci } 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci /* for how clever coverity is, it is sometimes rather dull, and 71bf215546Sopenharmony_ci * doesn't realize that the only case where bin==NULL, sz==0: 72bf215546Sopenharmony_ci */ 73bf215546Sopenharmony_ci assume(bin || (sz == 0)); 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci for (i = 0; i < sz; i++) { 76bf215546Sopenharmony_ci OUT_RING(ring, bin[i]); 77bf215546Sopenharmony_ci } 78bf215546Sopenharmony_ci} 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_cistruct stage { 81bf215546Sopenharmony_ci const struct ir3_shader_variant *v; 82bf215546Sopenharmony_ci const struct ir3_info *i; 83bf215546Sopenharmony_ci /* const sizes are in units of 4 * vec4 */ 84bf215546Sopenharmony_ci uint8_t constoff; 85bf215546Sopenharmony_ci uint8_t constlen; 86bf215546Sopenharmony_ci /* instr sizes are in units of 16 instructions */ 87bf215546Sopenharmony_ci uint8_t instroff; 88bf215546Sopenharmony_ci uint8_t instrlen; 89bf215546Sopenharmony_ci}; 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_cienum { VS = 0, FS = 1, HS = 2, DS = 3, GS = 4, MAX_STAGES }; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_cistatic void 94bf215546Sopenharmony_cisetup_stages(struct fd4_emit *emit, struct stage *s) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci unsigned i; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci s[VS].v = fd4_emit_get_vp(emit); 99bf215546Sopenharmony_ci s[FS].v = fd4_emit_get_fp(emit); 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */ 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci for (i = 0; i < MAX_STAGES; i++) { 104bf215546Sopenharmony_ci if (s[i].v) { 105bf215546Sopenharmony_ci s[i].i = &s[i].v->info; 106bf215546Sopenharmony_ci /* constlen is in units of 4 * vec4: */ 107bf215546Sopenharmony_ci assert(s[i].v->constlen % 4 == 0); 108bf215546Sopenharmony_ci s[i].constlen = s[i].v->constlen / 4; 109bf215546Sopenharmony_ci /* instrlen is already in units of 16 instr.. although 110bf215546Sopenharmony_ci * probably we should ditch that and not make the compiler 111bf215546Sopenharmony_ci * care about instruction group size of a3xx vs a4xx 112bf215546Sopenharmony_ci */ 113bf215546Sopenharmony_ci s[i].instrlen = s[i].v->instrlen; 114bf215546Sopenharmony_ci } else { 115bf215546Sopenharmony_ci s[i].i = NULL; 116bf215546Sopenharmony_ci s[i].constlen = 0; 117bf215546Sopenharmony_ci s[i].instrlen = 0; 118bf215546Sopenharmony_ci } 119bf215546Sopenharmony_ci } 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci /* NOTE: at least for gles2, blob partitions VS at bottom of const 122bf215546Sopenharmony_ci * space and FS taking entire remaining space. We probably don't 123bf215546Sopenharmony_ci * need to do that the same way, but for now mimic what the blob 124bf215546Sopenharmony_ci * does to make it easier to diff against register values from blob 125bf215546Sopenharmony_ci * 126bf215546Sopenharmony_ci * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders 127bf215546Sopenharmony_ci * is run from external memory. 128bf215546Sopenharmony_ci */ 129bf215546Sopenharmony_ci if ((s[VS].instrlen + s[FS].instrlen) > 64) { 130bf215546Sopenharmony_ci /* prioritize FS for internal memory: */ 131bf215546Sopenharmony_ci if (s[FS].instrlen < 64) { 132bf215546Sopenharmony_ci /* if FS can fit, kick VS out to external memory: */ 133bf215546Sopenharmony_ci s[VS].instrlen = 0; 134bf215546Sopenharmony_ci } else if (s[VS].instrlen < 64) { 135bf215546Sopenharmony_ci /* otherwise if VS can fit, kick out FS: */ 136bf215546Sopenharmony_ci s[FS].instrlen = 0; 137bf215546Sopenharmony_ci } else { 138bf215546Sopenharmony_ci /* neither can fit, run both from external memory: */ 139bf215546Sopenharmony_ci s[VS].instrlen = 0; 140bf215546Sopenharmony_ci s[FS].instrlen = 0; 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci } 143bf215546Sopenharmony_ci s[VS].constlen = 66; 144bf215546Sopenharmony_ci s[FS].constlen = 128 - s[VS].constlen; 145bf215546Sopenharmony_ci s[VS].instroff = 0; 146bf215546Sopenharmony_ci s[VS].constoff = 0; 147bf215546Sopenharmony_ci s[FS].instroff = 64 - s[FS].instrlen; 148bf215546Sopenharmony_ci s[FS].constoff = s[VS].constlen; 149bf215546Sopenharmony_ci s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff; 150bf215546Sopenharmony_ci s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff; 151bf215546Sopenharmony_ci} 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_civoid 154bf215546Sopenharmony_cifd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit, int nr, 155bf215546Sopenharmony_ci struct pipe_surface **bufs) 156bf215546Sopenharmony_ci{ 157bf215546Sopenharmony_ci struct stage s[MAX_STAGES]; 158bf215546Sopenharmony_ci uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; 159bf215546Sopenharmony_ci uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid, 160bf215546Sopenharmony_ci samp_mask_regid, ij_regid[IJ_COUNT]; 161bf215546Sopenharmony_ci enum a3xx_threadsize fssz; 162bf215546Sopenharmony_ci int constmode; 163bf215546Sopenharmony_ci int i, j; 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci assert(nr <= ARRAY_SIZE(color_regid)); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci if (emit->binning_pass) 168bf215546Sopenharmony_ci nr = 0; 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci setup_stages(emit, s); 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci fssz = (s[FS].i->double_threadsize) ? FOUR_QUADS : TWO_QUADS; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci /* blob seems to always use constmode currently: */ 175bf215546Sopenharmony_ci constmode = 1; 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); 178bf215546Sopenharmony_ci if (pos_regid == regid(63, 0)) { 179bf215546Sopenharmony_ci /* hw dislikes when there is no position output, which can 180bf215546Sopenharmony_ci * happen for transform-feedback vertex shaders. Just tell 181bf215546Sopenharmony_ci * the hw to use r0.x, with whatever random value is there: 182bf215546Sopenharmony_ci */ 183bf215546Sopenharmony_ci pos_regid = regid(0, 0); 184bf215546Sopenharmony_ci } 185bf215546Sopenharmony_ci posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH); 186bf215546Sopenharmony_ci psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); 187bf215546Sopenharmony_ci if (s[FS].v->color0_mrt) { 188bf215546Sopenharmony_ci color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = 189bf215546Sopenharmony_ci color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = 190bf215546Sopenharmony_ci ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); 191bf215546Sopenharmony_ci } else { 192bf215546Sopenharmony_ci color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); 193bf215546Sopenharmony_ci color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); 194bf215546Sopenharmony_ci color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); 195bf215546Sopenharmony_ci color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); 196bf215546Sopenharmony_ci color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); 197bf215546Sopenharmony_ci color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); 198bf215546Sopenharmony_ci color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); 199bf215546Sopenharmony_ci color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); 200bf215546Sopenharmony_ci } 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci samp_id_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_ID); 203bf215546Sopenharmony_ci samp_mask_regid = 204bf215546Sopenharmony_ci ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_SAMPLE_MASK_IN); 205bf215546Sopenharmony_ci face_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRONT_FACE); 206bf215546Sopenharmony_ci coord_regid = ir3_find_sysval_regid(s[FS].v, SYSTEM_VALUE_FRAG_COORD); 207bf215546Sopenharmony_ci zwcoord_regid = 208bf215546Sopenharmony_ci (coord_regid == regid(63, 0)) ? regid(63, 0) : (coord_regid + 2); 209bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++) 210bf215546Sopenharmony_ci ij_regid[i] = ir3_find_sysval_regid( 211bf215546Sopenharmony_ci s[FS].v, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i); 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci /* we could probably divide this up into things that need to be 214bf215546Sopenharmony_ci * emitted if frag-prog is dirty vs if vert-prog is dirty.. 215bf215546Sopenharmony_ci */ 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); 218bf215546Sopenharmony_ci OUT_RING(ring, 0x00000003); 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 5); 221bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) | 222bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | 223bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | 224bf215546Sopenharmony_ci /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe 225bf215546Sopenharmony_ci * flush some caches? I think we only need to set those 226bf215546Sopenharmony_ci * bits if we have updated const or shader.. 227bf215546Sopenharmony_ci */ 228bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | 229bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); 230bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | 231bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | 232bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_1_REG_COORDREGID(coord_regid) | 233bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_1_REG_ZWCOORDREGID(zwcoord_regid)); 234bf215546Sopenharmony_ci OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(63) | 235bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_2_REG_SAMPLEID_REGID(samp_id_regid) | 236bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_2_REG_SAMPLEMASK_REGID(samp_mask_regid) | 237bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid)); 238bf215546Sopenharmony_ci /* XXX left out centroid/sample for now */ 239bf215546Sopenharmony_ci OUT_RING( 240bf215546Sopenharmony_ci ring, 241bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) | 242bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) | 243bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID( 244bf215546Sopenharmony_ci ij_regid[IJ_PERSP_CENTROID]) | 245bf215546Sopenharmony_ci A4XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID( 246bf215546Sopenharmony_ci ij_regid[IJ_LINEAR_CENTROID])); 247bf215546Sopenharmony_ci OUT_RING(ring, 0x00fcfcfc); /* XXX HLSQ_CONTROL_4 */ 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5); 250bf215546Sopenharmony_ci OUT_RING(ring, 251bf215546Sopenharmony_ci A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) | 252bf215546Sopenharmony_ci A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) | 253bf215546Sopenharmony_ci COND(s[VS].v && s[VS].v->has_ssbo, A4XX_HLSQ_VS_CONTROL_REG_SSBO_ENABLE) | 254bf215546Sopenharmony_ci COND(s[VS].v, A4XX_HLSQ_VS_CONTROL_REG_ENABLED) | 255bf215546Sopenharmony_ci A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) | 256bf215546Sopenharmony_ci A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff)); 257bf215546Sopenharmony_ci OUT_RING(ring, 258bf215546Sopenharmony_ci A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) | 259bf215546Sopenharmony_ci A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) | 260bf215546Sopenharmony_ci COND(s[FS].v && s[FS].v->has_ssbo, A4XX_HLSQ_FS_CONTROL_REG_SSBO_ENABLE) | 261bf215546Sopenharmony_ci COND(s[FS].v, A4XX_HLSQ_FS_CONTROL_REG_ENABLED) | 262bf215546Sopenharmony_ci A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) | 263bf215546Sopenharmony_ci A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff)); 264bf215546Sopenharmony_ci OUT_RING(ring, 265bf215546Sopenharmony_ci A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) | 266bf215546Sopenharmony_ci A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) | 267bf215546Sopenharmony_ci COND(s[HS].v && s[HS].v->has_ssbo, A4XX_HLSQ_HS_CONTROL_REG_SSBO_ENABLE) | 268bf215546Sopenharmony_ci A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) | 269bf215546Sopenharmony_ci A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff)); 270bf215546Sopenharmony_ci OUT_RING(ring, 271bf215546Sopenharmony_ci A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) | 272bf215546Sopenharmony_ci A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) | 273bf215546Sopenharmony_ci COND(s[DS].v && s[DS].v->has_ssbo, A4XX_HLSQ_DS_CONTROL_REG_SSBO_ENABLE) | 274bf215546Sopenharmony_ci A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) | 275bf215546Sopenharmony_ci A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff)); 276bf215546Sopenharmony_ci OUT_RING(ring, 277bf215546Sopenharmony_ci A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) | 278bf215546Sopenharmony_ci A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) | 279bf215546Sopenharmony_ci COND(s[GS].v && s[GS].v->has_ssbo, A4XX_HLSQ_GS_CONTROL_REG_SSBO_ENABLE) | 280bf215546Sopenharmony_ci A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) | 281bf215546Sopenharmony_ci A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff)); 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1); 284bf215546Sopenharmony_ci OUT_RING(ring, 285bf215546Sopenharmony_ci 0x140010 | /* XXX */ 286bf215546Sopenharmony_ci COND(emit->binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS)); 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1); 289bf215546Sopenharmony_ci OUT_RING(ring, 0x7f | /* XXX */ 290bf215546Sopenharmony_ci COND(s[VS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_VS_BUFFER) | 291bf215546Sopenharmony_ci COND(s[FS].instrlen, A4XX_SP_INSTR_CACHE_CTRL_FS_BUFFER) | 292bf215546Sopenharmony_ci COND(s[VS].instrlen && s[FS].instrlen, 293bf215546Sopenharmony_ci A4XX_SP_INSTR_CACHE_CTRL_INSTR_BUFFER)); 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1); 296bf215546Sopenharmony_ci OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */ 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3); 299bf215546Sopenharmony_ci OUT_RING( 300bf215546Sopenharmony_ci ring, 301bf215546Sopenharmony_ci A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | 302bf215546Sopenharmony_ci A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | 303bf215546Sopenharmony_ci A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | 304bf215546Sopenharmony_ci A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | 305bf215546Sopenharmony_ci A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | 306bf215546Sopenharmony_ci A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | 307bf215546Sopenharmony_ci COND(s[VS].v->need_pixlod, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE)); 308bf215546Sopenharmony_ci OUT_RING(ring, 309bf215546Sopenharmony_ci A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) | 310bf215546Sopenharmony_ci A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in)); 311bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | 312bf215546Sopenharmony_ci A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | 313bf215546Sopenharmony_ci A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(s[FS].v->varying_in)); 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci struct ir3_shader_linkage l = {0}; 316bf215546Sopenharmony_ci ir3_link_shaders(&l, s[VS].v, s[FS].v, false); 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { 319bf215546Sopenharmony_ci uint32_t reg = 0; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1); 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci reg |= A4XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); 324bf215546Sopenharmony_ci reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); 325bf215546Sopenharmony_ci j++; 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci reg |= A4XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); 328bf215546Sopenharmony_ci reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); 329bf215546Sopenharmony_ci j++; 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci OUT_RING(ring, reg); 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { 335bf215546Sopenharmony_ci uint32_t reg = 0; 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1); 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc + 8); 340bf215546Sopenharmony_ci reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc + 8); 341bf215546Sopenharmony_ci reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc + 8); 342bf215546Sopenharmony_ci reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc + 8); 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci OUT_RING(ring, reg); 345bf215546Sopenharmony_ci } 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2); 348bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) | 349bf215546Sopenharmony_ci A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff)); 350bf215546Sopenharmony_ci OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci if (emit->binning_pass) { 353bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1); 354bf215546Sopenharmony_ci OUT_RING(ring, 0x00000000); /* SP_FS_LENGTH_REG */ 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2); 357bf215546Sopenharmony_ci OUT_RING(ring, 358bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | 359bf215546Sopenharmony_ci COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) | 360bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(0) | 361bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(0) | 362bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | 363bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | 364bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE); 365bf215546Sopenharmony_ci OUT_RING(ring, 366bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 0x80000000); 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); 369bf215546Sopenharmony_ci OUT_RING(ring, 370bf215546Sopenharmony_ci A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | 371bf215546Sopenharmony_ci A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff)); 372bf215546Sopenharmony_ci OUT_RING(ring, 0x00000000); 373bf215546Sopenharmony_ci } else { 374bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1); 375bf215546Sopenharmony_ci OUT_RING(ring, s[FS].v->instrlen); /* SP_FS_LENGTH_REG */ 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2); 378bf215546Sopenharmony_ci OUT_RING( 379bf215546Sopenharmony_ci ring, 380bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | 381bf215546Sopenharmony_ci COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) | 382bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | 383bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | 384bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | 385bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) | 386bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | 387bf215546Sopenharmony_ci COND(s[FS].v->need_pixlod, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); 388bf215546Sopenharmony_ci OUT_RING(ring, 389bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | 390bf215546Sopenharmony_ci 0x80000000 | /* XXX */ 391bf215546Sopenharmony_ci COND(s[FS].v->frag_face, A4XX_SP_FS_CTRL_REG1_FACENESS) | 392bf215546Sopenharmony_ci COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING) | 393bf215546Sopenharmony_ci COND(s[FS].v->fragcoord_compmask != 0, 394bf215546Sopenharmony_ci A4XX_SP_FS_CTRL_REG1_FRAGCOORD)); 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); 397bf215546Sopenharmony_ci OUT_RING(ring, 398bf215546Sopenharmony_ci A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | 399bf215546Sopenharmony_ci A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff)); 400bf215546Sopenharmony_ci OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1); 404bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) | 405bf215546Sopenharmony_ci A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff)); 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1); 408bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) | 409bf215546Sopenharmony_ci A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff)); 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1); 412bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) | 413bf215546Sopenharmony_ci A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff)); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_GRAS_CNTL, 1); 416bf215546Sopenharmony_ci OUT_RING(ring, 417bf215546Sopenharmony_ci CONDREG(face_regid, A4XX_GRAS_CNTL_IJ_PERSP) | 418bf215546Sopenharmony_ci CONDREG(zwcoord_regid, A4XX_GRAS_CNTL_IJ_PERSP) | 419bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_PIXEL], A4XX_GRAS_CNTL_IJ_PERSP) | 420bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_LINEAR_PIXEL], A4XX_GRAS_CNTL_IJ_LINEAR) | 421bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_CENTROID], A4XX_GRAS_CNTL_IJ_PERSP)); 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL2, 1); 424bf215546Sopenharmony_ci OUT_RING( 425bf215546Sopenharmony_ci ring, 426bf215546Sopenharmony_ci A4XX_RB_RENDER_CONTROL2_MSAA_SAMPLES(0) | 427bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_PIXEL], 428bf215546Sopenharmony_ci A4XX_RB_RENDER_CONTROL2_IJ_PERSP_PIXEL) | 429bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_CENTROID], 430bf215546Sopenharmony_ci A4XX_RB_RENDER_CONTROL2_IJ_PERSP_CENTROID) | 431bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_LINEAR_PIXEL], A4XX_RB_RENDER_CONTROL2_SIZE) | 432bf215546Sopenharmony_ci CONDREG(samp_id_regid, A4XX_RB_RENDER_CONTROL2_SAMPLEID) | 433bf215546Sopenharmony_ci COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) | 434bf215546Sopenharmony_ci CONDREG(samp_mask_regid, A4XX_RB_RENDER_CONTROL2_SAMPLEMASK) | 435bf215546Sopenharmony_ci COND(s[FS].v->fragcoord_compmask != 0, 436bf215546Sopenharmony_ci A4XX_RB_RENDER_CONTROL2_COORD_MASK(s[FS].v->fragcoord_compmask))); 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); 439bf215546Sopenharmony_ci OUT_RING(ring, 440bf215546Sopenharmony_ci A4XX_RB_FS_OUTPUT_REG_MRT(nr) | 441bf215546Sopenharmony_ci COND(s[FS].v->writes_pos, A4XX_RB_FS_OUTPUT_REG_FRAG_WRITES_Z)); 442bf215546Sopenharmony_ci 443bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1); 444bf215546Sopenharmony_ci OUT_RING(ring, 445bf215546Sopenharmony_ci A4XX_SP_FS_OUTPUT_REG_MRT(nr) | 446bf215546Sopenharmony_ci COND(s[FS].v->writes_pos, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | 447bf215546Sopenharmony_ci A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid)); 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8); 450bf215546Sopenharmony_ci for (i = 0; i < 8; i++) { 451bf215546Sopenharmony_ci enum a4xx_color_fmt format = 0; 452bf215546Sopenharmony_ci bool srgb = false; 453bf215546Sopenharmony_ci bool uint = false; 454bf215546Sopenharmony_ci bool sint = false; 455bf215546Sopenharmony_ci if (i < nr) { 456bf215546Sopenharmony_ci format = fd4_emit_format(bufs[i]); 457bf215546Sopenharmony_ci if (bufs[i]) { 458bf215546Sopenharmony_ci if (!emit->no_decode_srgb) 459bf215546Sopenharmony_ci srgb = util_format_is_srgb(bufs[i]->format); 460bf215546Sopenharmony_ci uint = util_format_is_pure_uint(bufs[i]->format); 461bf215546Sopenharmony_ci sint = util_format_is_pure_sint(bufs[i]->format); 462bf215546Sopenharmony_ci } 463bf215546Sopenharmony_ci } 464bf215546Sopenharmony_ci OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid[i]) | 465bf215546Sopenharmony_ci A4XX_SP_FS_MRT_REG_MRTFORMAT(format) | 466bf215546Sopenharmony_ci COND(srgb, A4XX_SP_FS_MRT_REG_COLOR_SRGB) | 467bf215546Sopenharmony_ci COND(uint, A4XX_SP_FS_MRT_REG_COLOR_UINT) | 468bf215546Sopenharmony_ci COND(sint, A4XX_SP_FS_MRT_REG_COLOR_SINT) | 469bf215546Sopenharmony_ci COND(color_regid[i] & HALF_REG_ID, 470bf215546Sopenharmony_ci A4XX_SP_FS_MRT_REG_HALF_PRECISION)); 471bf215546Sopenharmony_ci } 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci if (emit->binning_pass) { 474bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); 475bf215546Sopenharmony_ci OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) | 0x40000000 | /* XXX */ 476bf215546Sopenharmony_ci COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE)); 477bf215546Sopenharmony_ci OUT_RING(ring, 0x00000000); 478bf215546Sopenharmony_ci } else { 479bf215546Sopenharmony_ci uint32_t vinterp[8], vpsrepl[8]; 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci memset(vinterp, 0, sizeof(vinterp)); 482bf215546Sopenharmony_ci memset(vpsrepl, 0, sizeof(vpsrepl)); 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci /* looks like we need to do int varyings in the frag 485bf215546Sopenharmony_ci * shader on a4xx (no flatshad reg? or a420.0 bug?): 486bf215546Sopenharmony_ci * 487bf215546Sopenharmony_ci * (sy)(ss)nop 488bf215546Sopenharmony_ci * (sy)ldlv.u32 r0.x,l[r0.x], 1 489bf215546Sopenharmony_ci * ldlv.u32 r0.y,l[r0.x+1], 1 490bf215546Sopenharmony_ci * (ss)bary.f (ei)r63.x, 0, r0.x 491bf215546Sopenharmony_ci * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x 492bf215546Sopenharmony_ci * (rpt5)nop 493bf215546Sopenharmony_ci * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 494bf215546Sopenharmony_ci * 495bf215546Sopenharmony_ci * Possibly on later a4xx variants we'll be able to use 496bf215546Sopenharmony_ci * something like the code below instead of workaround 497bf215546Sopenharmony_ci * in the shader: 498bf215546Sopenharmony_ci */ 499bf215546Sopenharmony_ci /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ 500bf215546Sopenharmony_ci for (j = -1; 501bf215546Sopenharmony_ci (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count;) { 502bf215546Sopenharmony_ci /* NOTE: varyings are packed, so if compmask is 0xb 503bf215546Sopenharmony_ci * then first, third, and fourth component occupy 504bf215546Sopenharmony_ci * three consecutive varying slots: 505bf215546Sopenharmony_ci */ 506bf215546Sopenharmony_ci unsigned compmask = s[FS].v->inputs[j].compmask; 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci uint32_t inloc = s[FS].v->inputs[j].inloc; 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_ci if (s[FS].v->inputs[j].flat || 511bf215546Sopenharmony_ci (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { 512bf215546Sopenharmony_ci uint32_t loc = inloc; 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci for (i = 0; i < 4; i++) { 515bf215546Sopenharmony_ci if (compmask & (1 << i)) { 516bf215546Sopenharmony_ci vinterp[loc / 16] |= 1 << ((loc % 16) * 2); 517bf215546Sopenharmony_ci // flatshade[loc / 32] |= 1 << (loc % 32); 518bf215546Sopenharmony_ci loc++; 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci } 521bf215546Sopenharmony_ci } 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci bool coord_mode = emit->sprite_coord_mode; 524bf215546Sopenharmony_ci if (ir3_point_sprite(s[FS].v, j, emit->sprite_coord_enable, 525bf215546Sopenharmony_ci &coord_mode)) { 526bf215546Sopenharmony_ci /* mask is two 2-bit fields, where: 527bf215546Sopenharmony_ci * '01' -> S 528bf215546Sopenharmony_ci * '10' -> T 529bf215546Sopenharmony_ci * '11' -> 1 - T (flip mode) 530bf215546Sopenharmony_ci */ 531bf215546Sopenharmony_ci unsigned mask = coord_mode ? 0b1101 : 0b1001; 532bf215546Sopenharmony_ci uint32_t loc = inloc; 533bf215546Sopenharmony_ci if (compmask & 0x1) { 534bf215546Sopenharmony_ci vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); 535bf215546Sopenharmony_ci loc++; 536bf215546Sopenharmony_ci } 537bf215546Sopenharmony_ci if (compmask & 0x2) { 538bf215546Sopenharmony_ci vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); 539bf215546Sopenharmony_ci loc++; 540bf215546Sopenharmony_ci } 541bf215546Sopenharmony_ci if (compmask & 0x4) { 542bf215546Sopenharmony_ci /* .z <- 0.0f */ 543bf215546Sopenharmony_ci vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); 544bf215546Sopenharmony_ci loc++; 545bf215546Sopenharmony_ci } 546bf215546Sopenharmony_ci if (compmask & 0x8) { 547bf215546Sopenharmony_ci /* .w <- 1.0f */ 548bf215546Sopenharmony_ci vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); 549bf215546Sopenharmony_ci loc++; 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci } 552bf215546Sopenharmony_ci } 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); 555bf215546Sopenharmony_ci OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) | 556bf215546Sopenharmony_ci A4XX_VPC_ATTR_THRDASSIGN(1) | 557bf215546Sopenharmony_ci COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) | 558bf215546Sopenharmony_ci 0x40000000 | /* XXX */ 559bf215546Sopenharmony_ci COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE)); 560bf215546Sopenharmony_ci OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) | 561bf215546Sopenharmony_ci A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in)); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8); 564bf215546Sopenharmony_ci for (i = 0; i < 8; i++) 565bf215546Sopenharmony_ci OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */ 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8); 568bf215546Sopenharmony_ci for (i = 0; i < 8; i++) 569bf215546Sopenharmony_ci OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci if (s[VS].instrlen) 573bf215546Sopenharmony_ci fd4_emit_shader(ring, s[VS].v); 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_ci if (!emit->binning_pass) 576bf215546Sopenharmony_ci if (s[FS].instrlen) 577bf215546Sopenharmony_ci fd4_emit_shader(ring, s[FS].v); 578bf215546Sopenharmony_ci} 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_cistatic struct ir3_program_state * 581bf215546Sopenharmony_cifd4_program_create(void *data, struct ir3_shader_variant *bs, 582bf215546Sopenharmony_ci struct ir3_shader_variant *vs, struct ir3_shader_variant *hs, 583bf215546Sopenharmony_ci struct ir3_shader_variant *ds, struct ir3_shader_variant *gs, 584bf215546Sopenharmony_ci struct ir3_shader_variant *fs, 585bf215546Sopenharmony_ci const struct ir3_cache_key *key) in_dt 586bf215546Sopenharmony_ci{ 587bf215546Sopenharmony_ci struct fd_context *ctx = fd_context(data); 588bf215546Sopenharmony_ci struct fd4_program_state *state = CALLOC_STRUCT(fd4_program_state); 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci tc_assert_driver_thread(ctx->tc); 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci state->bs = bs; 593bf215546Sopenharmony_ci state->vs = vs; 594bf215546Sopenharmony_ci state->fs = fs; 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci return &state->base; 597bf215546Sopenharmony_ci} 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_cistatic void 600bf215546Sopenharmony_cifd4_program_destroy(void *data, struct ir3_program_state *state) 601bf215546Sopenharmony_ci{ 602bf215546Sopenharmony_ci struct fd4_program_state *so = fd4_program_state(state); 603bf215546Sopenharmony_ci free(so); 604bf215546Sopenharmony_ci} 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_cistatic const struct ir3_cache_funcs cache_funcs = { 607bf215546Sopenharmony_ci .create_state = fd4_program_create, 608bf215546Sopenharmony_ci .destroy_state = fd4_program_destroy, 609bf215546Sopenharmony_ci}; 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_civoid 612bf215546Sopenharmony_cifd4_prog_init(struct pipe_context *pctx) 613bf215546Sopenharmony_ci{ 614bf215546Sopenharmony_ci struct fd_context *ctx = fd_context(pctx); 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci ctx->shader_cache = ir3_cache_create(&cache_funcs, ctx); 617bf215546Sopenharmony_ci ir3_prog_init(pctx); 618bf215546Sopenharmony_ci fd_prog_init(pctx); 619bf215546Sopenharmony_ci} 620