1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org> 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors: 24bf215546Sopenharmony_ci * Rob Clark <robclark@freedesktop.org> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "tgsi/tgsi_transform.h" 28bf215546Sopenharmony_ci#include "tgsi/tgsi_scan.h" 29bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h" 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "util/compiler.h" 32bf215546Sopenharmony_ci#include "util/u_debug.h" 33bf215546Sopenharmony_ci#include "util/u_math.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include "tgsi_lowering.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_cistruct tgsi_lowering_context { 38bf215546Sopenharmony_ci struct tgsi_transform_context base; 39bf215546Sopenharmony_ci const struct tgsi_lowering_config *config; 40bf215546Sopenharmony_ci struct tgsi_shader_info *info; 41bf215546Sopenharmony_ci unsigned two_side_colors; 42bf215546Sopenharmony_ci unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS]; 43bf215546Sopenharmony_ci unsigned color_base; /* base register for chosen COLOR/BCOLOR's */ 44bf215546Sopenharmony_ci int face_idx; 45bf215546Sopenharmony_ci unsigned numtmp; 46bf215546Sopenharmony_ci struct { 47bf215546Sopenharmony_ci struct tgsi_full_src_register src; 48bf215546Sopenharmony_ci struct tgsi_full_dst_register dst; 49bf215546Sopenharmony_ci } tmp[2]; 50bf215546Sopenharmony_ci#define A 0 51bf215546Sopenharmony_ci#define B 1 52bf215546Sopenharmony_ci struct tgsi_full_src_register imm; 53bf215546Sopenharmony_ci int emitted_decls; 54bf215546Sopenharmony_ci unsigned saturate; 55bf215546Sopenharmony_ci}; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_cistatic inline struct tgsi_lowering_context * 58bf215546Sopenharmony_citgsi_lowering_context(struct tgsi_transform_context *tctx) 59bf215546Sopenharmony_ci{ 60bf215546Sopenharmony_ci return (struct tgsi_lowering_context *)tctx; 61bf215546Sopenharmony_ci} 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci/* 64bf215546Sopenharmony_ci * Utility helpers: 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_cistatic void 68bf215546Sopenharmony_cireg_dst(struct tgsi_full_dst_register *dst, 69bf215546Sopenharmony_ci const struct tgsi_full_dst_register *orig_dst, unsigned wrmask) 70bf215546Sopenharmony_ci{ 71bf215546Sopenharmony_ci *dst = *orig_dst; 72bf215546Sopenharmony_ci dst->Register.WriteMask &= wrmask; 73bf215546Sopenharmony_ci assert(dst->Register.WriteMask); 74bf215546Sopenharmony_ci} 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_cistatic inline void 77bf215546Sopenharmony_ciget_swiz(unsigned *swiz, const struct tgsi_src_register *src) 78bf215546Sopenharmony_ci{ 79bf215546Sopenharmony_ci swiz[0] = src->SwizzleX; 80bf215546Sopenharmony_ci swiz[1] = src->SwizzleY; 81bf215546Sopenharmony_ci swiz[2] = src->SwizzleZ; 82bf215546Sopenharmony_ci swiz[3] = src->SwizzleW; 83bf215546Sopenharmony_ci} 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_cistatic void 86bf215546Sopenharmony_cireg_src(struct tgsi_full_src_register *src, 87bf215546Sopenharmony_ci const struct tgsi_full_src_register *orig_src, 88bf215546Sopenharmony_ci unsigned sx, unsigned sy, unsigned sz, unsigned sw) 89bf215546Sopenharmony_ci{ 90bf215546Sopenharmony_ci unsigned swiz[4]; 91bf215546Sopenharmony_ci get_swiz(swiz, &orig_src->Register); 92bf215546Sopenharmony_ci *src = *orig_src; 93bf215546Sopenharmony_ci src->Register.SwizzleX = swiz[sx]; 94bf215546Sopenharmony_ci src->Register.SwizzleY = swiz[sy]; 95bf215546Sopenharmony_ci src->Register.SwizzleZ = swiz[sz]; 96bf215546Sopenharmony_ci src->Register.SwizzleW = swiz[sw]; 97bf215546Sopenharmony_ci} 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */ 100bf215546Sopenharmony_ci#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \ 101bf215546Sopenharmony_ci TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci/* 104bf215546Sopenharmony_ci * if (dst.x aliases src.x) { 105bf215546Sopenharmony_ci * MOV tmpA.x, src.x 106bf215546Sopenharmony_ci * src = tmpA 107bf215546Sopenharmony_ci * } 108bf215546Sopenharmony_ci * COS dst.x, src.x 109bf215546Sopenharmony_ci * SIN dst.y, src.x 110bf215546Sopenharmony_ci * MOV dst.zw, imm{0.0, 1.0} 111bf215546Sopenharmony_ci */ 112bf215546Sopenharmony_cistatic bool 113bf215546Sopenharmony_cialiases(const struct tgsi_full_dst_register *dst, unsigned dst_mask, 114bf215546Sopenharmony_ci const struct tgsi_full_src_register *src, unsigned src_mask) 115bf215546Sopenharmony_ci{ 116bf215546Sopenharmony_ci if ((dst->Register.File == src->Register.File) && 117bf215546Sopenharmony_ci (dst->Register.Index == src->Register.Index)) { 118bf215546Sopenharmony_ci unsigned i, actual_mask = 0; 119bf215546Sopenharmony_ci unsigned swiz[4]; 120bf215546Sopenharmony_ci get_swiz(swiz, &src->Register); 121bf215546Sopenharmony_ci for (i = 0; i < 4; i++) 122bf215546Sopenharmony_ci if (src_mask & (1 << i)) 123bf215546Sopenharmony_ci actual_mask |= (1 << swiz[i]); 124bf215546Sopenharmony_ci if (actual_mask & dst_mask) 125bf215546Sopenharmony_ci return true; 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci return false; 128bf215546Sopenharmony_ci} 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_cistatic void 131bf215546Sopenharmony_cicreate_mov(struct tgsi_transform_context *tctx, 132bf215546Sopenharmony_ci const struct tgsi_full_dst_register *dst, 133bf215546Sopenharmony_ci const struct tgsi_full_src_register *src, 134bf215546Sopenharmony_ci unsigned mask, unsigned saturate) 135bf215546Sopenharmony_ci{ 136bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 139bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 140bf215546Sopenharmony_ci new_inst.Instruction.Saturate = saturate; 141bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 142bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, mask); 143bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 144bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 145bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 146bf215546Sopenharmony_ci} 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci/* to help calculate # of tgsi tokens for a lowering.. we assume 149bf215546Sopenharmony_ci * the worst case, ie. removed instructions don't have ADDR[] or 150bf215546Sopenharmony_ci * anything which increases the # of tokens per src/dst and the 151bf215546Sopenharmony_ci * inserted instructions do. 152bf215546Sopenharmony_ci * 153bf215546Sopenharmony_ci * OINST() - old instruction 154bf215546Sopenharmony_ci * 1 : instruction itself 155bf215546Sopenharmony_ci * 1 : dst 156bf215546Sopenharmony_ci * 1 * nargs : srcN 157bf215546Sopenharmony_ci * 158bf215546Sopenharmony_ci * NINST() - new instruction 159bf215546Sopenharmony_ci * 1 : instruction itself 160bf215546Sopenharmony_ci * 2 : dst 161bf215546Sopenharmony_ci * 2 * nargs : srcN 162bf215546Sopenharmony_ci */ 163bf215546Sopenharmony_ci 164bf215546Sopenharmony_ci#define OINST(nargs) (1 + 1 + 1 * (nargs)) 165bf215546Sopenharmony_ci#define NINST(nargs) (1 + 2 + 2 * (nargs)) 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci/* 168bf215546Sopenharmony_ci * Lowering Translators: 169bf215546Sopenharmony_ci */ 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci/* DST - Distance Vector 172bf215546Sopenharmony_ci * dst.x = 1.0 173bf215546Sopenharmony_ci * dst.y = src0.y \times src1.y 174bf215546Sopenharmony_ci * dst.z = src0.z 175bf215546Sopenharmony_ci * dst.w = src1.w 176bf215546Sopenharmony_ci * 177bf215546Sopenharmony_ci * ; note: could be more clever and use just a single temp 178bf215546Sopenharmony_ci * ; if I was clever enough to re-write the swizzles. 179bf215546Sopenharmony_ci * ; needs: 2 tmp, imm{1.0} 180bf215546Sopenharmony_ci * if (dst.y aliases src0.z) { 181bf215546Sopenharmony_ci * MOV tmpA.yz, src0.yz 182bf215546Sopenharmony_ci * src0 = tmpA 183bf215546Sopenharmony_ci * } 184bf215546Sopenharmony_ci * if (dst.yz aliases src1.w) { 185bf215546Sopenharmony_ci * MOV tmpB.yw, src1.yw 186bf215546Sopenharmony_ci * src1 = tmpB 187bf215546Sopenharmony_ci * } 188bf215546Sopenharmony_ci * MUL dst.y, src0.y, src1.y 189bf215546Sopenharmony_ci * MOV dst.z, src0.z 190bf215546Sopenharmony_ci * MOV dst.w, src1.w 191bf215546Sopenharmony_ci * MOV dst.x, imm{1.0} 192bf215546Sopenharmony_ci */ 193bf215546Sopenharmony_ci#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \ 194bf215546Sopenharmony_ci NINST(1) + NINST(1) - OINST(2)) 195bf215546Sopenharmony_ci#define DST_TMP 2 196bf215546Sopenharmony_cistatic void 197bf215546Sopenharmony_citransform_dst(struct tgsi_transform_context *tctx, 198bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 199bf215546Sopenharmony_ci{ 200bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 201bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 202bf215546Sopenharmony_ci struct tgsi_full_src_register *src0 = &inst->Src[0]; 203bf215546Sopenharmony_ci struct tgsi_full_src_register *src1 = &inst->Src[1]; 204bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_ci if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) { 207bf215546Sopenharmony_ci create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0); 208bf215546Sopenharmony_ci src0 = &ctx->tmp[A].src; 209bf215546Sopenharmony_ci } 210bf215546Sopenharmony_ci 211bf215546Sopenharmony_ci if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) { 212bf215546Sopenharmony_ci create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0); 213bf215546Sopenharmony_ci src1 = &ctx->tmp[B].src; 214bf215546Sopenharmony_ci } 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 217bf215546Sopenharmony_ci /* MUL dst.y, src0.y, src1.y */ 218bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 219bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 220bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 221bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 222bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 223bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _)); 224bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _)); 225bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 226bf215546Sopenharmony_ci } 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 229bf215546Sopenharmony_ci /* MOV dst.z, src0.z */ 230bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 231bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 232bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 233bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 234bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 235bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _)); 236bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 240bf215546Sopenharmony_ci /* MOV dst.w, src1.w */ 241bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 242bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 243bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 244bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 245bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 246bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W)); 247bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 248bf215546Sopenharmony_ci } 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 251bf215546Sopenharmony_ci /* MOV dst.x, imm{1.0} */ 252bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 253bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 254bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 255bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 256bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 257bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _)); 258bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 259bf215546Sopenharmony_ci } 260bf215546Sopenharmony_ci} 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci/* LRP - Linear Interpolate 263bf215546Sopenharmony_ci * dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x 264bf215546Sopenharmony_ci * dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y 265bf215546Sopenharmony_ci * dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z 266bf215546Sopenharmony_ci * dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w 267bf215546Sopenharmony_ci * 268bf215546Sopenharmony_ci * This becomes: src0 \times src1 + src2 - src0 \times src2, which 269bf215546Sopenharmony_ci * can then become: src0 \times src1 - (src0 \times src2 - src2) 270bf215546Sopenharmony_ci * 271bf215546Sopenharmony_ci * ; needs: 1 tmp 272bf215546Sopenharmony_ci * MAD tmpA, src0, src2, -src2 273bf215546Sopenharmony_ci * MAD dst, src0, src1, -tmpA 274bf215546Sopenharmony_ci */ 275bf215546Sopenharmony_ci#define LRP_GROW (NINST(3) + NINST(3) - OINST(3)) 276bf215546Sopenharmony_ci#define LRP_TMP 1 277bf215546Sopenharmony_cistatic void 278bf215546Sopenharmony_citransform_lrp(struct tgsi_transform_context *tctx, 279bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 280bf215546Sopenharmony_ci{ 281bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 282bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 283bf215546Sopenharmony_ci struct tgsi_full_src_register *src0 = &inst->Src[0]; 284bf215546Sopenharmony_ci struct tgsi_full_src_register *src1 = &inst->Src[1]; 285bf215546Sopenharmony_ci struct tgsi_full_src_register *src2 = &inst->Src[2]; 286bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 289bf215546Sopenharmony_ci /* MAD tmpA, src0, src2, -src2 */ 290bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 291bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 292bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 293bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 294bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 3; 295bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 296bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W)); 297bf215546Sopenharmony_ci reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W)); 298bf215546Sopenharmony_ci new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate; 299bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci /* MAD dst, src0, src1, -tmpA */ 302bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 303bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 304bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 305bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 306bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 3; 307bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 308bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W)); 309bf215546Sopenharmony_ci reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 310bf215546Sopenharmony_ci new_inst.Src[2].Register.Negate = true; 311bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci} 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci/* FRC - Fraction 316bf215546Sopenharmony_ci * dst.x = src.x - \lfloor src.x\rfloor 317bf215546Sopenharmony_ci * dst.y = src.y - \lfloor src.y\rfloor 318bf215546Sopenharmony_ci * dst.z = src.z - \lfloor src.z\rfloor 319bf215546Sopenharmony_ci * dst.w = src.w - \lfloor src.w\rfloor 320bf215546Sopenharmony_ci * 321bf215546Sopenharmony_ci * ; needs: 1 tmp 322bf215546Sopenharmony_ci * FLR tmpA, src 323bf215546Sopenharmony_ci * SUB dst, src, tmpA 324bf215546Sopenharmony_ci */ 325bf215546Sopenharmony_ci#define FRC_GROW (NINST(1) + NINST(2) - OINST(1)) 326bf215546Sopenharmony_ci#define FRC_TMP 1 327bf215546Sopenharmony_cistatic void 328bf215546Sopenharmony_citransform_frc(struct tgsi_transform_context *tctx, 329bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 330bf215546Sopenharmony_ci{ 331bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 332bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 333bf215546Sopenharmony_ci struct tgsi_full_src_register *src = &inst->Src[0]; 334bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 337bf215546Sopenharmony_ci /* FLR tmpA, src */ 338bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 339bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 340bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 341bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 342bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 343bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 344bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci /* SUB dst, src, tmpA */ 347bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 348bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 349bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 350bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 351bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 352bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W)); 353bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 354bf215546Sopenharmony_ci new_inst.Src[1].Register.Negate = 1; 355bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 356bf215546Sopenharmony_ci } 357bf215546Sopenharmony_ci} 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci/* POW - Power 360bf215546Sopenharmony_ci * dst.x = src0.x^{src1.x} 361bf215546Sopenharmony_ci * dst.y = src0.x^{src1.x} 362bf215546Sopenharmony_ci * dst.z = src0.x^{src1.x} 363bf215546Sopenharmony_ci * dst.w = src0.x^{src1.x} 364bf215546Sopenharmony_ci * 365bf215546Sopenharmony_ci * ; needs: 1 tmp 366bf215546Sopenharmony_ci * LG2 tmpA.x, src0.x 367bf215546Sopenharmony_ci * MUL tmpA.x, src1.x, tmpA.x 368bf215546Sopenharmony_ci * EX2 dst, tmpA.x 369bf215546Sopenharmony_ci */ 370bf215546Sopenharmony_ci#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2)) 371bf215546Sopenharmony_ci#define POW_TMP 1 372bf215546Sopenharmony_cistatic void 373bf215546Sopenharmony_citransform_pow(struct tgsi_transform_context *tctx, 374bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 375bf215546Sopenharmony_ci{ 376bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 377bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 378bf215546Sopenharmony_ci struct tgsi_full_src_register *src0 = &inst->Src[0]; 379bf215546Sopenharmony_ci struct tgsi_full_src_register *src1 = &inst->Src[1]; 380bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 383bf215546Sopenharmony_ci /* LG2 tmpA.x, src0.x */ 384bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 385bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 386bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 387bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 388bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 389bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 390bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci /* MUL tmpA.x, src1.x, tmpA.x */ 393bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 394bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 395bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 396bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 397bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 398bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _)); 399bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 400bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 401bf215546Sopenharmony_ci 402bf215546Sopenharmony_ci /* EX2 dst, tmpA.x */ 403bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 404bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 405bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 406bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 407bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 408bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 409bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci} 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci/* LIT - Light Coefficients 414bf215546Sopenharmony_ci * dst.x = 1.0 415bf215546Sopenharmony_ci * dst.y = max(src.x, 0.0) 416bf215546Sopenharmony_ci * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0 417bf215546Sopenharmony_ci * dst.w = 1.0 418bf215546Sopenharmony_ci * 419bf215546Sopenharmony_ci * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0} 420bf215546Sopenharmony_ci * MAX tmpA.xy, src.xy, imm{0.0} 421bf215546Sopenharmony_ci * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0} 422bf215546Sopenharmony_ci * LG2 tmpA.y, tmpA.y 423bf215546Sopenharmony_ci * MUL tmpA.y, tmpA.z, tmpA.y 424bf215546Sopenharmony_ci * EX2 tmpA.y, tmpA.y 425bf215546Sopenharmony_ci * CMP tmpA.y, -src.x, tmpA.y, imm{0.0} 426bf215546Sopenharmony_ci * MOV dst.yz, tmpA.xy 427bf215546Sopenharmony_ci * MOV dst.xw, imm{1.0} 428bf215546Sopenharmony_ci */ 429bf215546Sopenharmony_ci#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \ 430bf215546Sopenharmony_ci NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1)) 431bf215546Sopenharmony_ci#define LIT_TMP 1 432bf215546Sopenharmony_cistatic void 433bf215546Sopenharmony_citransform_lit(struct tgsi_transform_context *tctx, 434bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 435bf215546Sopenharmony_ci{ 436bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 437bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 438bf215546Sopenharmony_ci struct tgsi_full_src_register *src = &inst->Src[0]; 439bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) { 442bf215546Sopenharmony_ci /* MAX tmpA.xy, src.xy, imm{0.0} */ 443bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 444bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 445bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 446bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY); 447bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 448bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _)); 449bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _)); 450bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci /* MIN tmpA.z, src.w, imm{128.0} */ 453bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 454bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MIN; 455bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 456bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 457bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 458bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _)); 459bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 460bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci /* MAX tmpA.z, tmpA.z, -imm{128.0} */ 463bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 464bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MAX; 465bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 466bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 467bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 468bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _)); 469bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _)); 470bf215546Sopenharmony_ci new_inst.Src[1].Register.Negate = true; 471bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci /* LG2 tmpA.y, tmpA.y */ 474bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 475bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 476bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 477bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 478bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 479bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 480bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci /* MUL tmpA.y, tmpA.z, tmpA.y */ 483bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 484bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 485bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 486bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 487bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 488bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 489bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 490bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci /* EX2 tmpA.y, tmpA.y */ 493bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 494bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 495bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 496bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 497bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 498bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 499bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */ 502bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 503bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 504bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 505bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 506bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 3; 507bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 508bf215546Sopenharmony_ci new_inst.Src[0].Register.Negate = true; 509bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 510bf215546Sopenharmony_ci reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _)); 511bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci /* MOV dst.yz, tmpA.xy */ 514bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 515bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 516bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 517bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ); 518bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 519bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _)); 520bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 521bf215546Sopenharmony_ci } 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) { 524bf215546Sopenharmony_ci /* MOV dst.xw, imm{1.0} */ 525bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 526bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 527bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 528bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW); 529bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 530bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y)); 531bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 532bf215546Sopenharmony_ci } 533bf215546Sopenharmony_ci} 534bf215546Sopenharmony_ci 535bf215546Sopenharmony_ci/* EXP - Approximate Exponential Base 2 536bf215546Sopenharmony_ci * dst.x = 2^{\lfloor src.x\rfloor} 537bf215546Sopenharmony_ci * dst.y = src.x - \lfloor src.x\rfloor 538bf215546Sopenharmony_ci * dst.z = 2^{src.x} 539bf215546Sopenharmony_ci * dst.w = 1.0 540bf215546Sopenharmony_ci * 541bf215546Sopenharmony_ci * ; needs: 1 tmp, imm{1.0} 542bf215546Sopenharmony_ci * if (lowering FLR) { 543bf215546Sopenharmony_ci * FRC tmpA.x, src.x 544bf215546Sopenharmony_ci * SUB tmpA.x, src.x, tmpA.x 545bf215546Sopenharmony_ci * } else { 546bf215546Sopenharmony_ci * FLR tmpA.x, src.x 547bf215546Sopenharmony_ci * } 548bf215546Sopenharmony_ci * EX2 tmpA.y, src.x 549bf215546Sopenharmony_ci * SUB dst.y, src.x, tmpA.x 550bf215546Sopenharmony_ci * EX2 dst.x, tmpA.x 551bf215546Sopenharmony_ci * MOV dst.z, tmpA.y 552bf215546Sopenharmony_ci * MOV dst.w, imm{1.0} 553bf215546Sopenharmony_ci */ 554bf215546Sopenharmony_ci#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \ 555bf215546Sopenharmony_ci NINST(1)+ NINST(1) - OINST(1)) 556bf215546Sopenharmony_ci#define EXP_TMP 1 557bf215546Sopenharmony_cistatic void 558bf215546Sopenharmony_citransform_exp(struct tgsi_transform_context *tctx, 559bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 560bf215546Sopenharmony_ci{ 561bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 562bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 563bf215546Sopenharmony_ci struct tgsi_full_src_register *src = &inst->Src[0]; 564bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 567bf215546Sopenharmony_ci if (ctx->config->lower_FLR) { 568bf215546Sopenharmony_ci /* FRC tmpA.x, src.x */ 569bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 570bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 571bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 572bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 573bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 574bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 575bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci /* SUB tmpA.x, src.x, tmpA.x */ 578bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 579bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 580bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 581bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 582bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 583bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 584bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 585bf215546Sopenharmony_ci new_inst.Src[1].Register.Negate = 1; 586bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 587bf215546Sopenharmony_ci } else { 588bf215546Sopenharmony_ci /* FLR tmpA.x, src.x */ 589bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 590bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 591bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 592bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 593bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 594bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 595bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 596bf215546Sopenharmony_ci } 597bf215546Sopenharmony_ci } 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 600bf215546Sopenharmony_ci /* EX2 tmpA.y, src.x */ 601bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 602bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 603bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 604bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 605bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 606bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 607bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 608bf215546Sopenharmony_ci } 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 611bf215546Sopenharmony_ci /* SUB dst.y, src.x, tmpA.x */ 612bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 613bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 614bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 615bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 616bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 617bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 618bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 619bf215546Sopenharmony_ci new_inst.Src[1].Register.Negate = 1; 620bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 621bf215546Sopenharmony_ci } 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_X) { 624bf215546Sopenharmony_ci /* EX2 dst.x, tmpA.x */ 625bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 626bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 627bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 628bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X); 629bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 630bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _)); 631bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 632bf215546Sopenharmony_ci } 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) { 635bf215546Sopenharmony_ci /* MOV dst.z, tmpA.y */ 636bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 637bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 638bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 639bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z); 640bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 641bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _)); 642bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 643bf215546Sopenharmony_ci } 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 646bf215546Sopenharmony_ci /* MOV dst.w, imm{1.0} */ 647bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 648bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 649bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 650bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 651bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 652bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 653bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 654bf215546Sopenharmony_ci } 655bf215546Sopenharmony_ci} 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci/* LOG - Approximate Logarithm Base 2 658bf215546Sopenharmony_ci * dst.x = \lfloor\log_2{|src.x|}\rfloor 659bf215546Sopenharmony_ci * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}} 660bf215546Sopenharmony_ci * dst.z = \log_2{|src.x|} 661bf215546Sopenharmony_ci * dst.w = 1.0 662bf215546Sopenharmony_ci * 663bf215546Sopenharmony_ci * ; needs: 1 tmp, imm{1.0} 664bf215546Sopenharmony_ci * LG2 tmpA.x, |src.x| 665bf215546Sopenharmony_ci * if (lowering FLR) { 666bf215546Sopenharmony_ci * FRC tmpA.y, tmpA.x 667bf215546Sopenharmony_ci * SUB tmpA.y, tmpA.x, tmpA.y 668bf215546Sopenharmony_ci * } else { 669bf215546Sopenharmony_ci * FLR tmpA.y, tmpA.x 670bf215546Sopenharmony_ci * } 671bf215546Sopenharmony_ci * EX2 tmpA.z, tmpA.y 672bf215546Sopenharmony_ci * RCP tmpA.z, tmpA.z 673bf215546Sopenharmony_ci * MUL dst.y, |src.x|, tmpA.z 674bf215546Sopenharmony_ci * MOV dst.xz, tmpA.yx 675bf215546Sopenharmony_ci * MOV dst.w, imm{1.0} 676bf215546Sopenharmony_ci */ 677bf215546Sopenharmony_ci#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \ 678bf215546Sopenharmony_ci NINST(2) + NINST(1) + NINST(1) - OINST(1)) 679bf215546Sopenharmony_ci#define LOG_TMP 1 680bf215546Sopenharmony_cistatic void 681bf215546Sopenharmony_citransform_log(struct tgsi_transform_context *tctx, 682bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 683bf215546Sopenharmony_ci{ 684bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 685bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 686bf215546Sopenharmony_ci struct tgsi_full_src_register *src = &inst->Src[0]; 687bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) { 690bf215546Sopenharmony_ci /* LG2 tmpA.x, |src.x| */ 691bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 692bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_LG2; 693bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 694bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 695bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 696bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _)); 697bf215546Sopenharmony_ci new_inst.Src[0].Register.Absolute = true; 698bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 699bf215546Sopenharmony_ci } 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) { 702bf215546Sopenharmony_ci if (ctx->config->lower_FLR) { 703bf215546Sopenharmony_ci /* FRC tmpA.y, tmpA.x */ 704bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 705bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 706bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 707bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 708bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 709bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 710bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 711bf215546Sopenharmony_ci 712bf215546Sopenharmony_ci /* SUB tmpA.y, tmpA.x, tmpA.y */ 713bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 714bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 715bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 716bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 717bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 718bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 719bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _)); 720bf215546Sopenharmony_ci new_inst.Src[1].Register.Negate = 1; 721bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 722bf215546Sopenharmony_ci } else { 723bf215546Sopenharmony_ci /* FLR tmpA.y, tmpA.x */ 724bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 725bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 726bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 727bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y); 728bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 729bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _)); 730bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 731bf215546Sopenharmony_ci } 732bf215546Sopenharmony_ci } 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) { 735bf215546Sopenharmony_ci /* EX2 tmpA.z, tmpA.y */ 736bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 737bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_EX2; 738bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 739bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 740bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 741bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _)); 742bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci /* RCP tmpA.z, tmpA.z */ 745bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 746bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 747bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 748bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z); 749bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 750bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _)); 751bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci /* MUL dst.y, |src.x|, tmpA.z */ 754bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 755bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 756bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 757bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y); 758bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 759bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _)); 760bf215546Sopenharmony_ci new_inst.Src[0].Register.Absolute = true; 761bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _)); 762bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) { 766bf215546Sopenharmony_ci /* MOV dst.xz, tmpA.yx */ 767bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 768bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 769bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 770bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ); 771bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 772bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _)); 773bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 774bf215546Sopenharmony_ci } 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_W) { 777bf215546Sopenharmony_ci /* MOV dst.w, imm{1.0} */ 778bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 779bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MOV; 780bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 781bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W); 782bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 783bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y)); 784bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 785bf215546Sopenharmony_ci } 786bf215546Sopenharmony_ci} 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci/* DP4 - 4-component Dot Product 789bf215546Sopenharmony_ci * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w 790bf215546Sopenharmony_ci * 791bf215546Sopenharmony_ci * DP3 - 3-component Dot Product 792bf215546Sopenharmony_ci * dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z 793bf215546Sopenharmony_ci * 794bf215546Sopenharmony_ci * DP2 - 2-component Dot Product 795bf215546Sopenharmony_ci * dst = src0.x \times src1.x + src0.y \times src1.y 796bf215546Sopenharmony_ci * 797bf215546Sopenharmony_ci * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar 798bf215546Sopenharmony_ci * operations, which is what you'd prefer for a ISA that is natively 799bf215546Sopenharmony_ci * scalar. Probably a native vector ISA would at least already have 800bf215546Sopenharmony_ci * DP4/DP3 instructions, but perhaps there is room for an alternative 801bf215546Sopenharmony_ci * translation for DP2 using vector instructions. 802bf215546Sopenharmony_ci * 803bf215546Sopenharmony_ci * ; needs: 1 tmp 804bf215546Sopenharmony_ci * MUL tmpA.x, src0.x, src1.x 805bf215546Sopenharmony_ci * MAD tmpA.x, src0.y, src1.y, tmpA.x 806bf215546Sopenharmony_ci * if (DP3 || DP4) { 807bf215546Sopenharmony_ci * MAD tmpA.x, src0.z, src1.z, tmpA.x 808bf215546Sopenharmony_ci * if (DP4) { 809bf215546Sopenharmony_ci * MAD tmpA.x, src0.w, src1.w, tmpA.x 810bf215546Sopenharmony_ci * } 811bf215546Sopenharmony_ci * } 812bf215546Sopenharmony_ci * ; fixup last instruction to replicate into dst 813bf215546Sopenharmony_ci */ 814bf215546Sopenharmony_ci#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2)) 815bf215546Sopenharmony_ci#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2)) 816bf215546Sopenharmony_ci#define DP2_GROW (NINST(2) + NINST(3) - OINST(2)) 817bf215546Sopenharmony_ci#define DOTP_TMP 1 818bf215546Sopenharmony_cistatic void 819bf215546Sopenharmony_citransform_dotp(struct tgsi_transform_context *tctx, 820bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 821bf215546Sopenharmony_ci{ 822bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 823bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 824bf215546Sopenharmony_ci struct tgsi_full_src_register *src0 = &inst->Src[0]; 825bf215546Sopenharmony_ci struct tgsi_full_src_register *src1 = &inst->Src[1]; 826bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 827bf215546Sopenharmony_ci enum tgsi_opcode opcode = inst->Instruction.Opcode; 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci /* NOTE: any potential last instruction must replicate src on all 830bf215546Sopenharmony_ci * components (since it could be re-written to write to final dst) 831bf215546Sopenharmony_ci */ 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 834bf215546Sopenharmony_ci /* MUL tmpA.x, src0.x, src1.x */ 835bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 836bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 837bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 838bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 839bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 840bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _)); 841bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _)); 842bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 843bf215546Sopenharmony_ci 844bf215546Sopenharmony_ci /* MAD tmpA.x, src0.y, src1.y, tmpA.x */ 845bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 846bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 847bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 848bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 849bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 3; 850bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y)); 851bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y)); 852bf215546Sopenharmony_ci reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci if ((opcode == TGSI_OPCODE_DP3) || 855bf215546Sopenharmony_ci (opcode == TGSI_OPCODE_DP4)) { 856bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci /* MAD tmpA.x, src0.z, src1.z, tmpA.x */ 859bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 860bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 861bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 862bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 863bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 3; 864bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z)); 865bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z)); 866bf215546Sopenharmony_ci reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_ci if (opcode == TGSI_OPCODE_DP4) { 869bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci /* MAD tmpA.x, src0.w, src1.w, tmpA.x */ 872bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 873bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MAD; 874bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 875bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X); 876bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 3; 877bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W)); 878bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W)); 879bf215546Sopenharmony_ci reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X)); 880bf215546Sopenharmony_ci } 881bf215546Sopenharmony_ci } 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci /* fixup last instruction to write to dst: */ 884bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 887bf215546Sopenharmony_ci } 888bf215546Sopenharmony_ci} 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci/* FLR - floor, CEIL - ceil 891bf215546Sopenharmony_ci * ; needs: 1 tmp 892bf215546Sopenharmony_ci * if (CEIL) { 893bf215546Sopenharmony_ci * FRC tmpA, -src 894bf215546Sopenharmony_ci * ADD dst, src, tmpA 895bf215546Sopenharmony_ci * } else { 896bf215546Sopenharmony_ci * FRC tmpA, src 897bf215546Sopenharmony_ci * SUB dst, src, tmpA 898bf215546Sopenharmony_ci * } 899bf215546Sopenharmony_ci */ 900bf215546Sopenharmony_ci#define FLR_GROW (NINST(1) + NINST(2) - OINST(1)) 901bf215546Sopenharmony_ci#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1)) 902bf215546Sopenharmony_ci#define FLR_TMP 1 903bf215546Sopenharmony_ci#define CEIL_TMP 1 904bf215546Sopenharmony_cistatic void 905bf215546Sopenharmony_citransform_flr_ceil(struct tgsi_transform_context *tctx, 906bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 907bf215546Sopenharmony_ci{ 908bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 909bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 910bf215546Sopenharmony_ci struct tgsi_full_src_register *src0 = &inst->Src[0]; 911bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 912bf215546Sopenharmony_ci enum tgsi_opcode opcode = inst->Instruction.Opcode; 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 915bf215546Sopenharmony_ci /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */ 916bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 917bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 918bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 919bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 920bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 921bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci if (opcode == TGSI_OPCODE_CEIL) 924bf215546Sopenharmony_ci new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate; 925bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */ 928bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 929bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 930bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 931bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 932bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 933bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 934bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 935bf215546Sopenharmony_ci if (opcode == TGSI_OPCODE_FLR) 936bf215546Sopenharmony_ci new_inst.Src[1].Register.Negate = 1; 937bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 938bf215546Sopenharmony_ci } 939bf215546Sopenharmony_ci} 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci/* TRUNC - truncate off fractional part 942bf215546Sopenharmony_ci * dst.x = trunc(src.x) 943bf215546Sopenharmony_ci * dst.y = trunc(src.y) 944bf215546Sopenharmony_ci * dst.z = trunc(src.z) 945bf215546Sopenharmony_ci * dst.w = trunc(src.w) 946bf215546Sopenharmony_ci * 947bf215546Sopenharmony_ci * ; needs: 1 tmp 948bf215546Sopenharmony_ci * if (lower FLR) { 949bf215546Sopenharmony_ci * FRC tmpA, |src| 950bf215546Sopenharmony_ci * SUB tmpA, |src|, tmpA 951bf215546Sopenharmony_ci * } else { 952bf215546Sopenharmony_ci * FLR tmpA, |src| 953bf215546Sopenharmony_ci * } 954bf215546Sopenharmony_ci * CMP dst, src, -tmpA, tmpA 955bf215546Sopenharmony_ci */ 956bf215546Sopenharmony_ci#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1)) 957bf215546Sopenharmony_ci#define TRUNC_TMP 1 958bf215546Sopenharmony_cistatic void 959bf215546Sopenharmony_citransform_trunc(struct tgsi_transform_context *tctx, 960bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 961bf215546Sopenharmony_ci{ 962bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 963bf215546Sopenharmony_ci struct tgsi_full_dst_register *dst = &inst->Dst[0]; 964bf215546Sopenharmony_ci struct tgsi_full_src_register *src0 = &inst->Src[0]; 965bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) { 968bf215546Sopenharmony_ci if (ctx->config->lower_FLR) { 969bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 970bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_FRC; 971bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 972bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 973bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 974bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 975bf215546Sopenharmony_ci new_inst.Src[0].Register.Absolute = true; 976bf215546Sopenharmony_ci new_inst.Src[0].Register.Negate = false; 977bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 980bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_ADD; 981bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 982bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 983bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 984bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 985bf215546Sopenharmony_ci new_inst.Src[0].Register.Absolute = true; 986bf215546Sopenharmony_ci new_inst.Src[0].Register.Negate = false; 987bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 988bf215546Sopenharmony_ci new_inst.Src[1].Register.Negate = 1; 989bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 990bf215546Sopenharmony_ci } else { 991bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 992bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_FLR; 993bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 994bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW); 995bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 996bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 997bf215546Sopenharmony_ci new_inst.Src[0].Register.Absolute = true; 998bf215546Sopenharmony_ci new_inst.Src[0].Register.Negate = false; 999bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 1000bf215546Sopenharmony_ci } 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 1003bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1004bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 1005bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW); 1006bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 3; 1007bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W)); 1008bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1009bf215546Sopenharmony_ci new_inst.Src[1].Register.Negate = true; 1010bf215546Sopenharmony_ci reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1011bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 1012bf215546Sopenharmony_ci } 1013bf215546Sopenharmony_ci} 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci/* Inserts a MOV_SAT for the needed components of tex coord. Note that 1016bf215546Sopenharmony_ci * in the case of TXP, the clamping must happen *after* projection, so 1017bf215546Sopenharmony_ci * we need to lower TXP to TEX. 1018bf215546Sopenharmony_ci * 1019bf215546Sopenharmony_ci * MOV tmpA, src0 1020bf215546Sopenharmony_ci * if (opc == TXP) { 1021bf215546Sopenharmony_ci * ; do perspective division manually before clamping: 1022bf215546Sopenharmony_ci * RCP tmpB, tmpA.w 1023bf215546Sopenharmony_ci * MUL tmpB.<pmask>, tmpA, tmpB.xxxx 1024bf215546Sopenharmony_ci * opc = TEX; 1025bf215546Sopenharmony_ci * } 1026bf215546Sopenharmony_ci * MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords 1027bf215546Sopenharmony_ci * <opc> dst, tmpA, ... 1028bf215546Sopenharmony_ci */ 1029bf215546Sopenharmony_ci#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1)) 1030bf215546Sopenharmony_ci#define SAMP_TMP 2 1031bf215546Sopenharmony_cistatic int 1032bf215546Sopenharmony_citransform_samp(struct tgsi_transform_context *tctx, 1033bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 1034bf215546Sopenharmony_ci{ 1035bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1036bf215546Sopenharmony_ci struct tgsi_full_src_register *coord = &inst->Src[0]; 1037bf215546Sopenharmony_ci struct tgsi_full_src_register *samp; 1038bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 1039bf215546Sopenharmony_ci /* mask is clamped coords, pmask is all coords (for projection): */ 1040bf215546Sopenharmony_ci unsigned mask = 0, pmask = 0, smask; 1041bf215546Sopenharmony_ci unsigned tex = inst->Texture.Texture; 1042bf215546Sopenharmony_ci enum tgsi_opcode opcode = inst->Instruction.Opcode; 1043bf215546Sopenharmony_ci bool lower_txp = (opcode == TGSI_OPCODE_TXP) && 1044bf215546Sopenharmony_ci (ctx->config->lower_TXP & (1 << tex)); 1045bf215546Sopenharmony_ci 1046bf215546Sopenharmony_ci if (opcode == TGSI_OPCODE_TXB2) { 1047bf215546Sopenharmony_ci samp = &inst->Src[2]; 1048bf215546Sopenharmony_ci } else { 1049bf215546Sopenharmony_ci samp = &inst->Src[1]; 1050bf215546Sopenharmony_ci } 1051bf215546Sopenharmony_ci 1052bf215546Sopenharmony_ci /* convert sampler # to bitmask to test: */ 1053bf215546Sopenharmony_ci smask = 1 << samp->Register.Index; 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_ci /* check if we actually need to lower this one: */ 1056bf215546Sopenharmony_ci if (!(ctx->saturate & smask) && !lower_txp) 1057bf215546Sopenharmony_ci return -1; 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci /* figure out which coordinates need saturating: 1060bf215546Sopenharmony_ci * - RECT textures should not get saturated 1061bf215546Sopenharmony_ci * - array index coords should not get saturated 1062bf215546Sopenharmony_ci */ 1063bf215546Sopenharmony_ci switch (tex) { 1064bf215546Sopenharmony_ci case TGSI_TEXTURE_3D: 1065bf215546Sopenharmony_ci case TGSI_TEXTURE_CUBE: 1066bf215546Sopenharmony_ci case TGSI_TEXTURE_CUBE_ARRAY: 1067bf215546Sopenharmony_ci case TGSI_TEXTURE_SHADOWCUBE: 1068bf215546Sopenharmony_ci case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 1069bf215546Sopenharmony_ci if (ctx->config->saturate_r & smask) 1070bf215546Sopenharmony_ci mask |= TGSI_WRITEMASK_Z; 1071bf215546Sopenharmony_ci pmask |= TGSI_WRITEMASK_Z; 1072bf215546Sopenharmony_ci FALLTHROUGH; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci case TGSI_TEXTURE_2D: 1075bf215546Sopenharmony_ci case TGSI_TEXTURE_2D_ARRAY: 1076bf215546Sopenharmony_ci case TGSI_TEXTURE_SHADOW2D: 1077bf215546Sopenharmony_ci case TGSI_TEXTURE_SHADOW2D_ARRAY: 1078bf215546Sopenharmony_ci case TGSI_TEXTURE_2D_MSAA: 1079bf215546Sopenharmony_ci case TGSI_TEXTURE_2D_ARRAY_MSAA: 1080bf215546Sopenharmony_ci if (ctx->config->saturate_t & smask) 1081bf215546Sopenharmony_ci mask |= TGSI_WRITEMASK_Y; 1082bf215546Sopenharmony_ci pmask |= TGSI_WRITEMASK_Y; 1083bf215546Sopenharmony_ci FALLTHROUGH; 1084bf215546Sopenharmony_ci 1085bf215546Sopenharmony_ci case TGSI_TEXTURE_1D: 1086bf215546Sopenharmony_ci case TGSI_TEXTURE_1D_ARRAY: 1087bf215546Sopenharmony_ci case TGSI_TEXTURE_SHADOW1D: 1088bf215546Sopenharmony_ci case TGSI_TEXTURE_SHADOW1D_ARRAY: 1089bf215546Sopenharmony_ci if (ctx->config->saturate_s & smask) 1090bf215546Sopenharmony_ci mask |= TGSI_WRITEMASK_X; 1091bf215546Sopenharmony_ci pmask |= TGSI_WRITEMASK_X; 1092bf215546Sopenharmony_ci break; 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci case TGSI_TEXTURE_RECT: 1095bf215546Sopenharmony_ci case TGSI_TEXTURE_SHADOWRECT: 1096bf215546Sopenharmony_ci /* we don't saturate, but in case of lower_txp we 1097bf215546Sopenharmony_ci * still need to do the perspective divide: 1098bf215546Sopenharmony_ci */ 1099bf215546Sopenharmony_ci pmask = TGSI_WRITEMASK_XY; 1100bf215546Sopenharmony_ci break; 1101bf215546Sopenharmony_ci } 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci /* sanity check.. driver could be asking to saturate a non- 1104bf215546Sopenharmony_ci * existent coordinate component: 1105bf215546Sopenharmony_ci */ 1106bf215546Sopenharmony_ci if (!mask && !lower_txp) 1107bf215546Sopenharmony_ci return -1; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci /* MOV tmpA, src0 */ 1110bf215546Sopenharmony_ci create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0); 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci /* This is a bit sad.. we need to clamp *after* the coords 1113bf215546Sopenharmony_ci * are projected, which means lowering TXP to TEX and doing 1114bf215546Sopenharmony_ci * the projection ourself. But since I haven't figured out 1115bf215546Sopenharmony_ci * how to make the lowering code deliver an electric shock 1116bf215546Sopenharmony_ci * to anyone using GL_CLAMP, we must do this instead: 1117bf215546Sopenharmony_ci */ 1118bf215546Sopenharmony_ci if (opcode == TGSI_OPCODE_TXP) { 1119bf215546Sopenharmony_ci /* RCP tmpB.x tmpA.w */ 1120bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 1121bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_RCP; 1122bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 1123bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X); 1124bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 1; 1125bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _)); 1126bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci /* MUL tmpA.mask, tmpA, tmpB.xxxx */ 1129bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 1130bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_MUL; 1131bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 1132bf215546Sopenharmony_ci reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask); 1133bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 2; 1134bf215546Sopenharmony_ci reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W)); 1135bf215546Sopenharmony_ci reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X)); 1136bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci opcode = TGSI_OPCODE_TEX; 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci /* MOV_SAT tmpA.<mask>, tmpA */ 1142bf215546Sopenharmony_ci if (mask) { 1143bf215546Sopenharmony_ci create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1); 1144bf215546Sopenharmony_ci } 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci /* modify the texture samp instruction to take fixed up coord: */ 1147bf215546Sopenharmony_ci new_inst = *inst; 1148bf215546Sopenharmony_ci new_inst.Instruction.Opcode = opcode; 1149bf215546Sopenharmony_ci new_inst.Src[0] = ctx->tmp[A].src; 1150bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci return 0; 1153bf215546Sopenharmony_ci} 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci/* Two-sided color emulation: 1156bf215546Sopenharmony_ci * For each COLOR input, create a corresponding BCOLOR input, plus 1157bf215546Sopenharmony_ci * CMP instruction to select front or back color based on FACE 1158bf215546Sopenharmony_ci */ 1159bf215546Sopenharmony_ci#define TWOSIDE_GROW(n) ( \ 1160bf215546Sopenharmony_ci 2 + /* FACE */ \ 1161bf215546Sopenharmony_ci ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\ 1162bf215546Sopenharmony_ci ((n) * 1) + /* TEMP[] */ \ 1163bf215546Sopenharmony_ci ((n) * NINST(3)) /* CMP instr */ \ 1164bf215546Sopenharmony_ci ) 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_cistatic void 1167bf215546Sopenharmony_ciemit_twoside(struct tgsi_transform_context *tctx) 1168bf215546Sopenharmony_ci{ 1169bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1170bf215546Sopenharmony_ci struct tgsi_shader_info *info = ctx->info; 1171bf215546Sopenharmony_ci struct tgsi_full_declaration decl; 1172bf215546Sopenharmony_ci struct tgsi_full_instruction new_inst; 1173bf215546Sopenharmony_ci unsigned inbase, tmpbase; 1174bf215546Sopenharmony_ci unsigned i; 1175bf215546Sopenharmony_ci 1176bf215546Sopenharmony_ci inbase = info->file_max[TGSI_FILE_INPUT] + 1; 1177bf215546Sopenharmony_ci tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1178bf215546Sopenharmony_ci 1179bf215546Sopenharmony_ci /* additional inputs for BCOLOR's */ 1180bf215546Sopenharmony_ci for (i = 0; i < ctx->two_side_colors; i++) { 1181bf215546Sopenharmony_ci unsigned in_idx = ctx->two_side_idx[i]; 1182bf215546Sopenharmony_ci decl = tgsi_default_full_declaration(); 1183bf215546Sopenharmony_ci decl.Declaration.File = TGSI_FILE_INPUT; 1184bf215546Sopenharmony_ci decl.Declaration.Semantic = true; 1185bf215546Sopenharmony_ci decl.Range.First = decl.Range.Last = inbase + i; 1186bf215546Sopenharmony_ci decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR; 1187bf215546Sopenharmony_ci decl.Semantic.Index = info->input_semantic_index[in_idx]; 1188bf215546Sopenharmony_ci decl.Declaration.Interpolate = true; 1189bf215546Sopenharmony_ci decl.Interp.Interpolate = info->input_interpolate[in_idx]; 1190bf215546Sopenharmony_ci decl.Interp.Location = info->input_interpolate_loc[in_idx]; 1191bf215546Sopenharmony_ci tctx->emit_declaration(tctx, &decl); 1192bf215546Sopenharmony_ci } 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ci /* additional input for FACE */ 1195bf215546Sopenharmony_ci if (ctx->two_side_colors && (ctx->face_idx == -1)) { 1196bf215546Sopenharmony_ci decl = tgsi_default_full_declaration(); 1197bf215546Sopenharmony_ci decl.Declaration.File = TGSI_FILE_INPUT; 1198bf215546Sopenharmony_ci decl.Declaration.Semantic = true; 1199bf215546Sopenharmony_ci decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors; 1200bf215546Sopenharmony_ci decl.Semantic.Name = TGSI_SEMANTIC_FACE; 1201bf215546Sopenharmony_ci decl.Semantic.Index = 0; 1202bf215546Sopenharmony_ci tctx->emit_declaration(tctx, &decl); 1203bf215546Sopenharmony_ci 1204bf215546Sopenharmony_ci ctx->face_idx = decl.Range.First; 1205bf215546Sopenharmony_ci } 1206bf215546Sopenharmony_ci 1207bf215546Sopenharmony_ci /* additional temps for COLOR/BCOLOR selection: */ 1208bf215546Sopenharmony_ci for (i = 0; i < ctx->two_side_colors; i++) { 1209bf215546Sopenharmony_ci decl = tgsi_default_full_declaration(); 1210bf215546Sopenharmony_ci decl.Declaration.File = TGSI_FILE_TEMPORARY; 1211bf215546Sopenharmony_ci decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i; 1212bf215546Sopenharmony_ci tctx->emit_declaration(tctx, &decl); 1213bf215546Sopenharmony_ci } 1214bf215546Sopenharmony_ci 1215bf215546Sopenharmony_ci /* and finally additional instructions to select COLOR/BCOLOR: */ 1216bf215546Sopenharmony_ci for (i = 0; i < ctx->two_side_colors; i++) { 1217bf215546Sopenharmony_ci new_inst = tgsi_default_full_instruction(); 1218bf215546Sopenharmony_ci new_inst.Instruction.Opcode = TGSI_OPCODE_CMP; 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci new_inst.Instruction.NumDstRegs = 1; 1221bf215546Sopenharmony_ci new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY; 1222bf215546Sopenharmony_ci new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i; 1223bf215546Sopenharmony_ci new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW; 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci new_inst.Instruction.NumSrcRegs = 3; 1226bf215546Sopenharmony_ci new_inst.Src[0].Register.File = TGSI_FILE_INPUT; 1227bf215546Sopenharmony_ci new_inst.Src[0].Register.Index = ctx->face_idx; 1228bf215546Sopenharmony_ci new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X; 1229bf215546Sopenharmony_ci new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X; 1230bf215546Sopenharmony_ci new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X; 1231bf215546Sopenharmony_ci new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X; 1232bf215546Sopenharmony_ci new_inst.Src[1].Register.File = TGSI_FILE_INPUT; 1233bf215546Sopenharmony_ci new_inst.Src[1].Register.Index = inbase + i; 1234bf215546Sopenharmony_ci new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X; 1235bf215546Sopenharmony_ci new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y; 1236bf215546Sopenharmony_ci new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1237bf215546Sopenharmony_ci new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W; 1238bf215546Sopenharmony_ci new_inst.Src[2].Register.File = TGSI_FILE_INPUT; 1239bf215546Sopenharmony_ci new_inst.Src[2].Register.Index = ctx->two_side_idx[i]; 1240bf215546Sopenharmony_ci new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X; 1241bf215546Sopenharmony_ci new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y; 1242bf215546Sopenharmony_ci new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z; 1243bf215546Sopenharmony_ci new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W; 1244bf215546Sopenharmony_ci 1245bf215546Sopenharmony_ci tctx->emit_instruction(tctx, &new_inst); 1246bf215546Sopenharmony_ci } 1247bf215546Sopenharmony_ci} 1248bf215546Sopenharmony_ci 1249bf215546Sopenharmony_cistatic void 1250bf215546Sopenharmony_ciemit_decls(struct tgsi_transform_context *tctx) 1251bf215546Sopenharmony_ci{ 1252bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1253bf215546Sopenharmony_ci struct tgsi_shader_info *info = ctx->info; 1254bf215546Sopenharmony_ci struct tgsi_full_declaration decl; 1255bf215546Sopenharmony_ci struct tgsi_full_immediate immed; 1256bf215546Sopenharmony_ci unsigned tmpbase; 1257bf215546Sopenharmony_ci unsigned i; 1258bf215546Sopenharmony_ci 1259bf215546Sopenharmony_ci tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1; 1260bf215546Sopenharmony_ci 1261bf215546Sopenharmony_ci ctx->color_base = tmpbase + ctx->numtmp; 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_ci /* declare immediate: */ 1264bf215546Sopenharmony_ci immed = tgsi_default_full_immediate(); 1265bf215546Sopenharmony_ci immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */ 1266bf215546Sopenharmony_ci immed.u[0].Float = 0.0; 1267bf215546Sopenharmony_ci immed.u[1].Float = 1.0; 1268bf215546Sopenharmony_ci immed.u[2].Float = 128.0; 1269bf215546Sopenharmony_ci immed.u[3].Float = 0.0; 1270bf215546Sopenharmony_ci tctx->emit_immediate(tctx, &immed); 1271bf215546Sopenharmony_ci 1272bf215546Sopenharmony_ci ctx->imm.Register.File = TGSI_FILE_IMMEDIATE; 1273bf215546Sopenharmony_ci ctx->imm.Register.Index = info->immediate_count; 1274bf215546Sopenharmony_ci ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X; 1275bf215546Sopenharmony_ci ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y; 1276bf215546Sopenharmony_ci ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1277bf215546Sopenharmony_ci ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W; 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_ci /* declare temp regs: */ 1280bf215546Sopenharmony_ci for (i = 0; i < ctx->numtmp; i++) { 1281bf215546Sopenharmony_ci decl = tgsi_default_full_declaration(); 1282bf215546Sopenharmony_ci decl.Declaration.File = TGSI_FILE_TEMPORARY; 1283bf215546Sopenharmony_ci decl.Range.First = decl.Range.Last = tmpbase + i; 1284bf215546Sopenharmony_ci tctx->emit_declaration(tctx, &decl); 1285bf215546Sopenharmony_ci 1286bf215546Sopenharmony_ci ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY; 1287bf215546Sopenharmony_ci ctx->tmp[i].src.Register.Index = tmpbase + i; 1288bf215546Sopenharmony_ci ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X; 1289bf215546Sopenharmony_ci ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y; 1290bf215546Sopenharmony_ci ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z; 1291bf215546Sopenharmony_ci ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W; 1292bf215546Sopenharmony_ci 1293bf215546Sopenharmony_ci ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY; 1294bf215546Sopenharmony_ci ctx->tmp[i].dst.Register.Index = tmpbase + i; 1295bf215546Sopenharmony_ci ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW; 1296bf215546Sopenharmony_ci } 1297bf215546Sopenharmony_ci 1298bf215546Sopenharmony_ci if (ctx->two_side_colors) 1299bf215546Sopenharmony_ci emit_twoside(tctx); 1300bf215546Sopenharmony_ci} 1301bf215546Sopenharmony_ci 1302bf215546Sopenharmony_cistatic void 1303bf215546Sopenharmony_cirename_color_inputs(struct tgsi_lowering_context *ctx, 1304bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 1305bf215546Sopenharmony_ci{ 1306bf215546Sopenharmony_ci unsigned i, j; 1307bf215546Sopenharmony_ci for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1308bf215546Sopenharmony_ci struct tgsi_src_register *src = &inst->Src[i].Register; 1309bf215546Sopenharmony_ci if (src->File == TGSI_FILE_INPUT) { 1310bf215546Sopenharmony_ci for (j = 0; j < ctx->two_side_colors; j++) { 1311bf215546Sopenharmony_ci if (src->Index == (int)ctx->two_side_idx[j]) { 1312bf215546Sopenharmony_ci src->File = TGSI_FILE_TEMPORARY; 1313bf215546Sopenharmony_ci src->Index = ctx->color_base + j; 1314bf215546Sopenharmony_ci break; 1315bf215546Sopenharmony_ci } 1316bf215546Sopenharmony_ci } 1317bf215546Sopenharmony_ci } 1318bf215546Sopenharmony_ci } 1319bf215546Sopenharmony_ci 1320bf215546Sopenharmony_ci} 1321bf215546Sopenharmony_ci 1322bf215546Sopenharmony_cistatic void 1323bf215546Sopenharmony_citransform_instr(struct tgsi_transform_context *tctx, 1324bf215546Sopenharmony_ci struct tgsi_full_instruction *inst) 1325bf215546Sopenharmony_ci{ 1326bf215546Sopenharmony_ci struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx); 1327bf215546Sopenharmony_ci 1328bf215546Sopenharmony_ci if (!ctx->emitted_decls) { 1329bf215546Sopenharmony_ci emit_decls(tctx); 1330bf215546Sopenharmony_ci ctx->emitted_decls = 1; 1331bf215546Sopenharmony_ci } 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci /* if emulating two-sided-color, we need to re-write some 1334bf215546Sopenharmony_ci * src registers: 1335bf215546Sopenharmony_ci */ 1336bf215546Sopenharmony_ci if (ctx->two_side_colors) 1337bf215546Sopenharmony_ci rename_color_inputs(ctx, inst); 1338bf215546Sopenharmony_ci 1339bf215546Sopenharmony_ci switch (inst->Instruction.Opcode) { 1340bf215546Sopenharmony_ci case TGSI_OPCODE_DST: 1341bf215546Sopenharmony_ci if (!ctx->config->lower_DST) 1342bf215546Sopenharmony_ci goto skip; 1343bf215546Sopenharmony_ci transform_dst(tctx, inst); 1344bf215546Sopenharmony_ci break; 1345bf215546Sopenharmony_ci case TGSI_OPCODE_LRP: 1346bf215546Sopenharmony_ci if (!ctx->config->lower_LRP) 1347bf215546Sopenharmony_ci goto skip; 1348bf215546Sopenharmony_ci transform_lrp(tctx, inst); 1349bf215546Sopenharmony_ci break; 1350bf215546Sopenharmony_ci case TGSI_OPCODE_FRC: 1351bf215546Sopenharmony_ci if (!ctx->config->lower_FRC) 1352bf215546Sopenharmony_ci goto skip; 1353bf215546Sopenharmony_ci transform_frc(tctx, inst); 1354bf215546Sopenharmony_ci break; 1355bf215546Sopenharmony_ci case TGSI_OPCODE_POW: 1356bf215546Sopenharmony_ci if (!ctx->config->lower_POW) 1357bf215546Sopenharmony_ci goto skip; 1358bf215546Sopenharmony_ci transform_pow(tctx, inst); 1359bf215546Sopenharmony_ci break; 1360bf215546Sopenharmony_ci case TGSI_OPCODE_LIT: 1361bf215546Sopenharmony_ci if (!ctx->config->lower_LIT) 1362bf215546Sopenharmony_ci goto skip; 1363bf215546Sopenharmony_ci transform_lit(tctx, inst); 1364bf215546Sopenharmony_ci break; 1365bf215546Sopenharmony_ci case TGSI_OPCODE_EXP: 1366bf215546Sopenharmony_ci if (!ctx->config->lower_EXP) 1367bf215546Sopenharmony_ci goto skip; 1368bf215546Sopenharmony_ci transform_exp(tctx, inst); 1369bf215546Sopenharmony_ci break; 1370bf215546Sopenharmony_ci case TGSI_OPCODE_LOG: 1371bf215546Sopenharmony_ci if (!ctx->config->lower_LOG) 1372bf215546Sopenharmony_ci goto skip; 1373bf215546Sopenharmony_ci transform_log(tctx, inst); 1374bf215546Sopenharmony_ci break; 1375bf215546Sopenharmony_ci case TGSI_OPCODE_DP4: 1376bf215546Sopenharmony_ci if (!ctx->config->lower_DP4) 1377bf215546Sopenharmony_ci goto skip; 1378bf215546Sopenharmony_ci transform_dotp(tctx, inst); 1379bf215546Sopenharmony_ci break; 1380bf215546Sopenharmony_ci case TGSI_OPCODE_DP3: 1381bf215546Sopenharmony_ci if (!ctx->config->lower_DP3) 1382bf215546Sopenharmony_ci goto skip; 1383bf215546Sopenharmony_ci transform_dotp(tctx, inst); 1384bf215546Sopenharmony_ci break; 1385bf215546Sopenharmony_ci case TGSI_OPCODE_DP2: 1386bf215546Sopenharmony_ci if (!ctx->config->lower_DP2) 1387bf215546Sopenharmony_ci goto skip; 1388bf215546Sopenharmony_ci transform_dotp(tctx, inst); 1389bf215546Sopenharmony_ci break; 1390bf215546Sopenharmony_ci case TGSI_OPCODE_FLR: 1391bf215546Sopenharmony_ci if (!ctx->config->lower_FLR) 1392bf215546Sopenharmony_ci goto skip; 1393bf215546Sopenharmony_ci transform_flr_ceil(tctx, inst); 1394bf215546Sopenharmony_ci break; 1395bf215546Sopenharmony_ci case TGSI_OPCODE_CEIL: 1396bf215546Sopenharmony_ci if (!ctx->config->lower_CEIL) 1397bf215546Sopenharmony_ci goto skip; 1398bf215546Sopenharmony_ci transform_flr_ceil(tctx, inst); 1399bf215546Sopenharmony_ci break; 1400bf215546Sopenharmony_ci case TGSI_OPCODE_TRUNC: 1401bf215546Sopenharmony_ci if (!ctx->config->lower_TRUNC) 1402bf215546Sopenharmony_ci goto skip; 1403bf215546Sopenharmony_ci transform_trunc(tctx, inst); 1404bf215546Sopenharmony_ci break; 1405bf215546Sopenharmony_ci case TGSI_OPCODE_TEX: 1406bf215546Sopenharmony_ci case TGSI_OPCODE_TXP: 1407bf215546Sopenharmony_ci case TGSI_OPCODE_TXB: 1408bf215546Sopenharmony_ci case TGSI_OPCODE_TXB2: 1409bf215546Sopenharmony_ci case TGSI_OPCODE_TXL: 1410bf215546Sopenharmony_ci if (transform_samp(tctx, inst)) 1411bf215546Sopenharmony_ci goto skip; 1412bf215546Sopenharmony_ci break; 1413bf215546Sopenharmony_ci default: 1414bf215546Sopenharmony_ci skip: 1415bf215546Sopenharmony_ci tctx->emit_instruction(tctx, inst); 1416bf215546Sopenharmony_ci break; 1417bf215546Sopenharmony_ci } 1418bf215546Sopenharmony_ci} 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci/* returns NULL if no lowering required, else returns the new 1421bf215546Sopenharmony_ci * tokens (which caller is required to free()). In either case 1422bf215546Sopenharmony_ci * returns the current info. 1423bf215546Sopenharmony_ci */ 1424bf215546Sopenharmony_ciconst struct tgsi_token * 1425bf215546Sopenharmony_citgsi_transform_lowering(const struct tgsi_lowering_config *config, 1426bf215546Sopenharmony_ci const struct tgsi_token *tokens, 1427bf215546Sopenharmony_ci struct tgsi_shader_info *info) 1428bf215546Sopenharmony_ci{ 1429bf215546Sopenharmony_ci struct tgsi_lowering_context ctx; 1430bf215546Sopenharmony_ci struct tgsi_token *newtoks; 1431bf215546Sopenharmony_ci int newlen, numtmp; 1432bf215546Sopenharmony_ci 1433bf215546Sopenharmony_ci /* sanity check in case limit is ever increased: */ 1434bf215546Sopenharmony_ci STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS); 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_ci /* sanity check the lowering */ 1437bf215546Sopenharmony_ci assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL))); 1438bf215546Sopenharmony_ci assert(!(config->lower_FRC && config->lower_TRUNC)); 1439bf215546Sopenharmony_ci 1440bf215546Sopenharmony_ci memset(&ctx, 0, sizeof(ctx)); 1441bf215546Sopenharmony_ci ctx.base.transform_instruction = transform_instr; 1442bf215546Sopenharmony_ci ctx.info = info; 1443bf215546Sopenharmony_ci ctx.config = config; 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci tgsi_scan_shader(tokens, info); 1446bf215546Sopenharmony_ci 1447bf215546Sopenharmony_ci /* if we are adding fragment shader support to emulate two-sided 1448bf215546Sopenharmony_ci * color, then figure out the number of additional inputs we need 1449bf215546Sopenharmony_ci * to create for BCOLOR's.. 1450bf215546Sopenharmony_ci */ 1451bf215546Sopenharmony_ci if ((info->processor == PIPE_SHADER_FRAGMENT) && 1452bf215546Sopenharmony_ci config->color_two_side) { 1453bf215546Sopenharmony_ci int i; 1454bf215546Sopenharmony_ci ctx.face_idx = -1; 1455bf215546Sopenharmony_ci for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) { 1456bf215546Sopenharmony_ci if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR) 1457bf215546Sopenharmony_ci ctx.two_side_idx[ctx.two_side_colors++] = i; 1458bf215546Sopenharmony_ci if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE) 1459bf215546Sopenharmony_ci ctx.face_idx = i; 1460bf215546Sopenharmony_ci } 1461bf215546Sopenharmony_ci } 1462bf215546Sopenharmony_ci 1463bf215546Sopenharmony_ci ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t; 1464bf215546Sopenharmony_ci 1465bf215546Sopenharmony_ci#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0) 1466bf215546Sopenharmony_ci /* if there are no instructions to lower, then we are done: */ 1467bf215546Sopenharmony_ci if (!(OPCS(DST) || 1468bf215546Sopenharmony_ci OPCS(LRP) || 1469bf215546Sopenharmony_ci OPCS(FRC) || 1470bf215546Sopenharmony_ci OPCS(POW) || 1471bf215546Sopenharmony_ci OPCS(LIT) || 1472bf215546Sopenharmony_ci OPCS(EXP) || 1473bf215546Sopenharmony_ci OPCS(LOG) || 1474bf215546Sopenharmony_ci OPCS(DP4) || 1475bf215546Sopenharmony_ci OPCS(DP3) || 1476bf215546Sopenharmony_ci OPCS(DP2) || 1477bf215546Sopenharmony_ci OPCS(FLR) || 1478bf215546Sopenharmony_ci OPCS(CEIL) || 1479bf215546Sopenharmony_ci OPCS(TRUNC) || 1480bf215546Sopenharmony_ci OPCS(TXP) || 1481bf215546Sopenharmony_ci ctx.two_side_colors || 1482bf215546Sopenharmony_ci ctx.saturate)) 1483bf215546Sopenharmony_ci return NULL; 1484bf215546Sopenharmony_ci 1485bf215546Sopenharmony_ci#if 0 /* debug */ 1486bf215546Sopenharmony_ci _debug_printf("BEFORE:"); 1487bf215546Sopenharmony_ci tgsi_dump(tokens, 0); 1488bf215546Sopenharmony_ci#endif 1489bf215546Sopenharmony_ci 1490bf215546Sopenharmony_ci numtmp = 0; 1491bf215546Sopenharmony_ci newlen = tgsi_num_tokens(tokens); 1492bf215546Sopenharmony_ci if (OPCS(DST)) { 1493bf215546Sopenharmony_ci newlen += DST_GROW * OPCS(DST); 1494bf215546Sopenharmony_ci numtmp = MAX2(numtmp, DST_TMP); 1495bf215546Sopenharmony_ci } 1496bf215546Sopenharmony_ci if (OPCS(LRP)) { 1497bf215546Sopenharmony_ci newlen += LRP_GROW * OPCS(LRP); 1498bf215546Sopenharmony_ci numtmp = MAX2(numtmp, LRP_TMP); 1499bf215546Sopenharmony_ci } 1500bf215546Sopenharmony_ci if (OPCS(FRC)) { 1501bf215546Sopenharmony_ci newlen += FRC_GROW * OPCS(FRC); 1502bf215546Sopenharmony_ci numtmp = MAX2(numtmp, FRC_TMP); 1503bf215546Sopenharmony_ci } 1504bf215546Sopenharmony_ci if (OPCS(POW)) { 1505bf215546Sopenharmony_ci newlen += POW_GROW * OPCS(POW); 1506bf215546Sopenharmony_ci numtmp = MAX2(numtmp, POW_TMP); 1507bf215546Sopenharmony_ci } 1508bf215546Sopenharmony_ci if (OPCS(LIT)) { 1509bf215546Sopenharmony_ci newlen += LIT_GROW * OPCS(LIT); 1510bf215546Sopenharmony_ci numtmp = MAX2(numtmp, LIT_TMP); 1511bf215546Sopenharmony_ci } 1512bf215546Sopenharmony_ci if (OPCS(EXP)) { 1513bf215546Sopenharmony_ci newlen += EXP_GROW * OPCS(EXP); 1514bf215546Sopenharmony_ci numtmp = MAX2(numtmp, EXP_TMP); 1515bf215546Sopenharmony_ci } 1516bf215546Sopenharmony_ci if (OPCS(LOG)) { 1517bf215546Sopenharmony_ci newlen += LOG_GROW * OPCS(LOG); 1518bf215546Sopenharmony_ci numtmp = MAX2(numtmp, LOG_TMP); 1519bf215546Sopenharmony_ci } 1520bf215546Sopenharmony_ci if (OPCS(DP4)) { 1521bf215546Sopenharmony_ci newlen += DP4_GROW * OPCS(DP4); 1522bf215546Sopenharmony_ci numtmp = MAX2(numtmp, DOTP_TMP); 1523bf215546Sopenharmony_ci } 1524bf215546Sopenharmony_ci if (OPCS(DP3)) { 1525bf215546Sopenharmony_ci newlen += DP3_GROW * OPCS(DP3); 1526bf215546Sopenharmony_ci numtmp = MAX2(numtmp, DOTP_TMP); 1527bf215546Sopenharmony_ci } 1528bf215546Sopenharmony_ci if (OPCS(DP2)) { 1529bf215546Sopenharmony_ci newlen += DP2_GROW * OPCS(DP2); 1530bf215546Sopenharmony_ci numtmp = MAX2(numtmp, DOTP_TMP); 1531bf215546Sopenharmony_ci } 1532bf215546Sopenharmony_ci if (OPCS(FLR)) { 1533bf215546Sopenharmony_ci newlen += FLR_GROW * OPCS(FLR); 1534bf215546Sopenharmony_ci numtmp = MAX2(numtmp, FLR_TMP); 1535bf215546Sopenharmony_ci } 1536bf215546Sopenharmony_ci if (OPCS(CEIL)) { 1537bf215546Sopenharmony_ci newlen += CEIL_GROW * OPCS(CEIL); 1538bf215546Sopenharmony_ci numtmp = MAX2(numtmp, CEIL_TMP); 1539bf215546Sopenharmony_ci } 1540bf215546Sopenharmony_ci if (OPCS(TRUNC)) { 1541bf215546Sopenharmony_ci newlen += TRUNC_GROW * OPCS(TRUNC); 1542bf215546Sopenharmony_ci numtmp = MAX2(numtmp, TRUNC_TMP); 1543bf215546Sopenharmony_ci } 1544bf215546Sopenharmony_ci if (ctx.saturate || config->lower_TXP) { 1545bf215546Sopenharmony_ci int n = 0; 1546bf215546Sopenharmony_ci 1547bf215546Sopenharmony_ci if (ctx.saturate) { 1548bf215546Sopenharmony_ci n = info->opcode_count[TGSI_OPCODE_TEX] + 1549bf215546Sopenharmony_ci info->opcode_count[TGSI_OPCODE_TXP] + 1550bf215546Sopenharmony_ci info->opcode_count[TGSI_OPCODE_TXB] + 1551bf215546Sopenharmony_ci info->opcode_count[TGSI_OPCODE_TXB2] + 1552bf215546Sopenharmony_ci info->opcode_count[TGSI_OPCODE_TXL]; 1553bf215546Sopenharmony_ci } else if (config->lower_TXP) { 1554bf215546Sopenharmony_ci n = info->opcode_count[TGSI_OPCODE_TXP]; 1555bf215546Sopenharmony_ci } 1556bf215546Sopenharmony_ci 1557bf215546Sopenharmony_ci newlen += SAMP_GROW * n; 1558bf215546Sopenharmony_ci numtmp = MAX2(numtmp, SAMP_TMP); 1559bf215546Sopenharmony_ci } 1560bf215546Sopenharmony_ci 1561bf215546Sopenharmony_ci /* specifically don't include two_side_colors temps in the count: */ 1562bf215546Sopenharmony_ci ctx.numtmp = numtmp; 1563bf215546Sopenharmony_ci 1564bf215546Sopenharmony_ci if (ctx.two_side_colors) { 1565bf215546Sopenharmony_ci newlen += TWOSIDE_GROW(ctx.two_side_colors); 1566bf215546Sopenharmony_ci /* note: we permanently consume temp regs, re-writing references 1567bf215546Sopenharmony_ci * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP 1568bf215546Sopenharmony_ci * instruction that selects which varying to use): 1569bf215546Sopenharmony_ci */ 1570bf215546Sopenharmony_ci numtmp += ctx.two_side_colors; 1571bf215546Sopenharmony_ci } 1572bf215546Sopenharmony_ci 1573bf215546Sopenharmony_ci newlen += 2 * numtmp; 1574bf215546Sopenharmony_ci newlen += 5; /* immediate */ 1575bf215546Sopenharmony_ci 1576bf215546Sopenharmony_ci newtoks = tgsi_transform_shader(tokens, newlen, &ctx.base); 1577bf215546Sopenharmony_ci if (!newtoks) 1578bf215546Sopenharmony_ci return NULL; 1579bf215546Sopenharmony_ci 1580bf215546Sopenharmony_ci tgsi_scan_shader(newtoks, info); 1581bf215546Sopenharmony_ci 1582bf215546Sopenharmony_ci#if 0 /* debug */ 1583bf215546Sopenharmony_ci _debug_printf("AFTER:"); 1584bf215546Sopenharmony_ci tgsi_dump(newtoks, 0); 1585bf215546Sopenharmony_ci#endif 1586bf215546Sopenharmony_ci 1587bf215546Sopenharmony_ci return newtoks; 1588bf215546Sopenharmony_ci} 1589