1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *    Rob Clark <robclark@freedesktop.org>
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "tgsi/tgsi_transform.h"
28bf215546Sopenharmony_ci#include "tgsi/tgsi_scan.h"
29bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "util/compiler.h"
32bf215546Sopenharmony_ci#include "util/u_debug.h"
33bf215546Sopenharmony_ci#include "util/u_math.h"
34bf215546Sopenharmony_ci
35bf215546Sopenharmony_ci#include "tgsi_lowering.h"
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_cistruct tgsi_lowering_context {
38bf215546Sopenharmony_ci   struct tgsi_transform_context base;
39bf215546Sopenharmony_ci   const struct tgsi_lowering_config *config;
40bf215546Sopenharmony_ci   struct tgsi_shader_info *info;
41bf215546Sopenharmony_ci   unsigned two_side_colors;
42bf215546Sopenharmony_ci   unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
43bf215546Sopenharmony_ci   unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
44bf215546Sopenharmony_ci   int face_idx;
45bf215546Sopenharmony_ci   unsigned numtmp;
46bf215546Sopenharmony_ci   struct {
47bf215546Sopenharmony_ci      struct tgsi_full_src_register src;
48bf215546Sopenharmony_ci      struct tgsi_full_dst_register dst;
49bf215546Sopenharmony_ci   } tmp[2];
50bf215546Sopenharmony_ci#define A 0
51bf215546Sopenharmony_ci#define B 1
52bf215546Sopenharmony_ci   struct tgsi_full_src_register imm;
53bf215546Sopenharmony_ci   int emitted_decls;
54bf215546Sopenharmony_ci   unsigned saturate;
55bf215546Sopenharmony_ci};
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_cistatic inline struct tgsi_lowering_context *
58bf215546Sopenharmony_citgsi_lowering_context(struct tgsi_transform_context *tctx)
59bf215546Sopenharmony_ci{
60bf215546Sopenharmony_ci   return (struct tgsi_lowering_context *)tctx;
61bf215546Sopenharmony_ci}
62bf215546Sopenharmony_ci
63bf215546Sopenharmony_ci/*
64bf215546Sopenharmony_ci * Utility helpers:
65bf215546Sopenharmony_ci */
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_cistatic void
68bf215546Sopenharmony_cireg_dst(struct tgsi_full_dst_register *dst,
69bf215546Sopenharmony_ci	const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
70bf215546Sopenharmony_ci{
71bf215546Sopenharmony_ci   *dst = *orig_dst;
72bf215546Sopenharmony_ci   dst->Register.WriteMask &= wrmask;
73bf215546Sopenharmony_ci   assert(dst->Register.WriteMask);
74bf215546Sopenharmony_ci}
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_cistatic inline void
77bf215546Sopenharmony_ciget_swiz(unsigned *swiz, const struct tgsi_src_register *src)
78bf215546Sopenharmony_ci{
79bf215546Sopenharmony_ci   swiz[0] = src->SwizzleX;
80bf215546Sopenharmony_ci   swiz[1] = src->SwizzleY;
81bf215546Sopenharmony_ci   swiz[2] = src->SwizzleZ;
82bf215546Sopenharmony_ci   swiz[3] = src->SwizzleW;
83bf215546Sopenharmony_ci}
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_cistatic void
86bf215546Sopenharmony_cireg_src(struct tgsi_full_src_register *src,
87bf215546Sopenharmony_ci	const struct tgsi_full_src_register *orig_src,
88bf215546Sopenharmony_ci	unsigned sx, unsigned sy, unsigned sz, unsigned sw)
89bf215546Sopenharmony_ci{
90bf215546Sopenharmony_ci   unsigned swiz[4];
91bf215546Sopenharmony_ci   get_swiz(swiz, &orig_src->Register);
92bf215546Sopenharmony_ci   *src = *orig_src;
93bf215546Sopenharmony_ci   src->Register.SwizzleX = swiz[sx];
94bf215546Sopenharmony_ci   src->Register.SwizzleY = swiz[sy];
95bf215546Sopenharmony_ci   src->Register.SwizzleZ = swiz[sz];
96bf215546Sopenharmony_ci   src->Register.SwizzleW = swiz[sw];
97bf215546Sopenharmony_ci}
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X  /* don't-care value! */
100bf215546Sopenharmony_ci#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y,   \
101bf215546Sopenharmony_ci      TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
102bf215546Sopenharmony_ci
103bf215546Sopenharmony_ci/*
104bf215546Sopenharmony_ci * if (dst.x aliases src.x) {
105bf215546Sopenharmony_ci *   MOV tmpA.x, src.x
106bf215546Sopenharmony_ci *   src = tmpA
107bf215546Sopenharmony_ci * }
108bf215546Sopenharmony_ci * COS dst.x, src.x
109bf215546Sopenharmony_ci * SIN dst.y, src.x
110bf215546Sopenharmony_ci * MOV dst.zw, imm{0.0, 1.0}
111bf215546Sopenharmony_ci */
112bf215546Sopenharmony_cistatic bool
113bf215546Sopenharmony_cialiases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
114bf215546Sopenharmony_ci	const struct tgsi_full_src_register *src, unsigned src_mask)
115bf215546Sopenharmony_ci{
116bf215546Sopenharmony_ci   if ((dst->Register.File == src->Register.File) &&
117bf215546Sopenharmony_ci       (dst->Register.Index == src->Register.Index)) {
118bf215546Sopenharmony_ci      unsigned i, actual_mask = 0;
119bf215546Sopenharmony_ci      unsigned swiz[4];
120bf215546Sopenharmony_ci      get_swiz(swiz, &src->Register);
121bf215546Sopenharmony_ci      for (i = 0; i < 4; i++)
122bf215546Sopenharmony_ci         if (src_mask & (1 << i))
123bf215546Sopenharmony_ci            actual_mask |= (1 << swiz[i]);
124bf215546Sopenharmony_ci      if (actual_mask & dst_mask)
125bf215546Sopenharmony_ci         return true;
126bf215546Sopenharmony_ci   }
127bf215546Sopenharmony_ci   return false;
128bf215546Sopenharmony_ci}
129bf215546Sopenharmony_ci
130bf215546Sopenharmony_cistatic void
131bf215546Sopenharmony_cicreate_mov(struct tgsi_transform_context *tctx,
132bf215546Sopenharmony_ci           const struct tgsi_full_dst_register *dst,
133bf215546Sopenharmony_ci           const struct tgsi_full_src_register *src,
134bf215546Sopenharmony_ci           unsigned mask, unsigned saturate)
135bf215546Sopenharmony_ci{
136bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   new_inst = tgsi_default_full_instruction();
139bf215546Sopenharmony_ci   new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
140bf215546Sopenharmony_ci   new_inst.Instruction.Saturate = saturate;
141bf215546Sopenharmony_ci   new_inst.Instruction.NumDstRegs = 1;
142bf215546Sopenharmony_ci   reg_dst(&new_inst.Dst[0], dst, mask);
143bf215546Sopenharmony_ci   new_inst.Instruction.NumSrcRegs = 1;
144bf215546Sopenharmony_ci   reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
145bf215546Sopenharmony_ci   tctx->emit_instruction(tctx, &new_inst);
146bf215546Sopenharmony_ci}
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci/* to help calculate # of tgsi tokens for a lowering.. we assume
149bf215546Sopenharmony_ci * the worst case, ie. removed instructions don't have ADDR[] or
150bf215546Sopenharmony_ci * anything which increases the # of tokens per src/dst and the
151bf215546Sopenharmony_ci * inserted instructions do.
152bf215546Sopenharmony_ci *
153bf215546Sopenharmony_ci * OINST() - old instruction
154bf215546Sopenharmony_ci *    1         : instruction itself
155bf215546Sopenharmony_ci *    1         : dst
156bf215546Sopenharmony_ci *    1 * nargs : srcN
157bf215546Sopenharmony_ci *
158bf215546Sopenharmony_ci * NINST() - new instruction
159bf215546Sopenharmony_ci *    1         : instruction itself
160bf215546Sopenharmony_ci *    2         : dst
161bf215546Sopenharmony_ci *    2 * nargs : srcN
162bf215546Sopenharmony_ci */
163bf215546Sopenharmony_ci
164bf215546Sopenharmony_ci#define OINST(nargs)  (1 + 1 + 1 * (nargs))
165bf215546Sopenharmony_ci#define NINST(nargs)  (1 + 2 + 2 * (nargs))
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci/*
168bf215546Sopenharmony_ci * Lowering Translators:
169bf215546Sopenharmony_ci */
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci/* DST - Distance Vector
172bf215546Sopenharmony_ci *   dst.x = 1.0
173bf215546Sopenharmony_ci *   dst.y = src0.y \times src1.y
174bf215546Sopenharmony_ci *   dst.z = src0.z
175bf215546Sopenharmony_ci *   dst.w = src1.w
176bf215546Sopenharmony_ci *
177bf215546Sopenharmony_ci * ; note: could be more clever and use just a single temp
178bf215546Sopenharmony_ci * ;       if I was clever enough to re-write the swizzles.
179bf215546Sopenharmony_ci * ; needs: 2 tmp, imm{1.0}
180bf215546Sopenharmony_ci * if (dst.y aliases src0.z) {
181bf215546Sopenharmony_ci *   MOV tmpA.yz, src0.yz
182bf215546Sopenharmony_ci *   src0 = tmpA
183bf215546Sopenharmony_ci * }
184bf215546Sopenharmony_ci * if (dst.yz aliases src1.w) {
185bf215546Sopenharmony_ci *   MOV tmpB.yw, src1.yw
186bf215546Sopenharmony_ci *   src1 = tmpB
187bf215546Sopenharmony_ci * }
188bf215546Sopenharmony_ci * MUL dst.y, src0.y, src1.y
189bf215546Sopenharmony_ci * MOV dst.z, src0.z
190bf215546Sopenharmony_ci * MOV dst.w, src1.w
191bf215546Sopenharmony_ci * MOV dst.x, imm{1.0}
192bf215546Sopenharmony_ci */
193bf215546Sopenharmony_ci#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
194bf215546Sopenharmony_ci		NINST(1) + NINST(1) - OINST(2))
195bf215546Sopenharmony_ci#define DST_TMP  2
196bf215546Sopenharmony_cistatic void
197bf215546Sopenharmony_citransform_dst(struct tgsi_transform_context *tctx,
198bf215546Sopenharmony_ci              struct tgsi_full_instruction *inst)
199bf215546Sopenharmony_ci{
200bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
201bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
202bf215546Sopenharmony_ci   struct tgsi_full_src_register *src0 = &inst->Src[0];
203bf215546Sopenharmony_ci   struct tgsi_full_src_register *src1 = &inst->Src[1];
204bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
207bf215546Sopenharmony_ci      create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
208bf215546Sopenharmony_ci      src0 = &ctx->tmp[A].src;
209bf215546Sopenharmony_ci   }
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci   if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
212bf215546Sopenharmony_ci      create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
213bf215546Sopenharmony_ci      src1 = &ctx->tmp[B].src;
214bf215546Sopenharmony_ci   }
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
217bf215546Sopenharmony_ci      /* MUL dst.y, src0.y, src1.y */
218bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
219bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
220bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
221bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
222bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
223bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
224bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
225bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
226bf215546Sopenharmony_ci   }
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
229bf215546Sopenharmony_ci      /* MOV dst.z, src0.z */
230bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
231bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
232bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
233bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
234bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
235bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
236bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
237bf215546Sopenharmony_ci   }
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
240bf215546Sopenharmony_ci      /* MOV dst.w, src1.w */
241bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
242bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
243bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
244bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
245bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
246bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
247bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
248bf215546Sopenharmony_ci   }
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
251bf215546Sopenharmony_ci      /* MOV dst.x, imm{1.0} */
252bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
253bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
254bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
255bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
256bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
257bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
258bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
259bf215546Sopenharmony_ci   }
260bf215546Sopenharmony_ci}
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci/* LRP - Linear Interpolate
263bf215546Sopenharmony_ci *  dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
264bf215546Sopenharmony_ci *  dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
265bf215546Sopenharmony_ci *  dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
266bf215546Sopenharmony_ci *  dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
267bf215546Sopenharmony_ci *
268bf215546Sopenharmony_ci * This becomes: src0 \times src1 + src2 - src0 \times src2, which
269bf215546Sopenharmony_ci * can then become: src0 \times src1 - (src0 \times src2 - src2)
270bf215546Sopenharmony_ci *
271bf215546Sopenharmony_ci * ; needs: 1 tmp
272bf215546Sopenharmony_ci * MAD tmpA, src0, src2, -src2
273bf215546Sopenharmony_ci * MAD dst, src0, src1, -tmpA
274bf215546Sopenharmony_ci */
275bf215546Sopenharmony_ci#define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
276bf215546Sopenharmony_ci#define LRP_TMP  1
277bf215546Sopenharmony_cistatic void
278bf215546Sopenharmony_citransform_lrp(struct tgsi_transform_context *tctx,
279bf215546Sopenharmony_ci              struct tgsi_full_instruction *inst)
280bf215546Sopenharmony_ci{
281bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
282bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
283bf215546Sopenharmony_ci   struct tgsi_full_src_register *src0 = &inst->Src[0];
284bf215546Sopenharmony_ci   struct tgsi_full_src_register *src1 = &inst->Src[1];
285bf215546Sopenharmony_ci   struct tgsi_full_src_register *src2 = &inst->Src[2];
286bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
289bf215546Sopenharmony_ci      /* MAD tmpA, src0, src2, -src2 */
290bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
291bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
292bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
293bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
294bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 3;
295bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
296bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
297bf215546Sopenharmony_ci      reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
298bf215546Sopenharmony_ci      new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
299bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
300bf215546Sopenharmony_ci
301bf215546Sopenharmony_ci      /* MAD dst, src0, src1, -tmpA */
302bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
303bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
304bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
305bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
306bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 3;
307bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
308bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
309bf215546Sopenharmony_ci      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
310bf215546Sopenharmony_ci      new_inst.Src[2].Register.Negate = true;
311bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
312bf215546Sopenharmony_ci   }
313bf215546Sopenharmony_ci}
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci/* FRC - Fraction
316bf215546Sopenharmony_ci *  dst.x = src.x - \lfloor src.x\rfloor
317bf215546Sopenharmony_ci *  dst.y = src.y - \lfloor src.y\rfloor
318bf215546Sopenharmony_ci *  dst.z = src.z - \lfloor src.z\rfloor
319bf215546Sopenharmony_ci *  dst.w = src.w - \lfloor src.w\rfloor
320bf215546Sopenharmony_ci *
321bf215546Sopenharmony_ci * ; needs: 1 tmp
322bf215546Sopenharmony_ci * FLR tmpA, src
323bf215546Sopenharmony_ci * SUB dst, src, tmpA
324bf215546Sopenharmony_ci */
325bf215546Sopenharmony_ci#define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
326bf215546Sopenharmony_ci#define FRC_TMP  1
327bf215546Sopenharmony_cistatic void
328bf215546Sopenharmony_citransform_frc(struct tgsi_transform_context *tctx,
329bf215546Sopenharmony_ci              struct tgsi_full_instruction *inst)
330bf215546Sopenharmony_ci{
331bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
332bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst = &inst->Dst[0];
333bf215546Sopenharmony_ci   struct tgsi_full_src_register *src = &inst->Src[0];
334bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
337bf215546Sopenharmony_ci      /* FLR tmpA, src */
338bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
339bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
340bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
341bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
342bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
343bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
344bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci      /* SUB dst, src, tmpA */
347bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
348bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
349bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
350bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
351bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
352bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
353bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
354bf215546Sopenharmony_ci      new_inst.Src[1].Register.Negate = 1;
355bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
356bf215546Sopenharmony_ci   }
357bf215546Sopenharmony_ci}
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci/* POW - Power
360bf215546Sopenharmony_ci *  dst.x = src0.x^{src1.x}
361bf215546Sopenharmony_ci *  dst.y = src0.x^{src1.x}
362bf215546Sopenharmony_ci *  dst.z = src0.x^{src1.x}
363bf215546Sopenharmony_ci *  dst.w = src0.x^{src1.x}
364bf215546Sopenharmony_ci *
365bf215546Sopenharmony_ci * ; needs: 1 tmp
366bf215546Sopenharmony_ci * LG2 tmpA.x, src0.x
367bf215546Sopenharmony_ci * MUL tmpA.x, src1.x, tmpA.x
368bf215546Sopenharmony_ci * EX2 dst, tmpA.x
369bf215546Sopenharmony_ci */
370bf215546Sopenharmony_ci#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
371bf215546Sopenharmony_ci#define POW_TMP  1
372bf215546Sopenharmony_cistatic void
373bf215546Sopenharmony_citransform_pow(struct tgsi_transform_context *tctx,
374bf215546Sopenharmony_ci              struct tgsi_full_instruction *inst)
375bf215546Sopenharmony_ci{
376bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
377bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
378bf215546Sopenharmony_ci   struct tgsi_full_src_register *src0 = &inst->Src[0];
379bf215546Sopenharmony_ci   struct tgsi_full_src_register *src1 = &inst->Src[1];
380bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
383bf215546Sopenharmony_ci      /* LG2 tmpA.x, src0.x */
384bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
385bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
386bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
387bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
388bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
389bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
390bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci      /* MUL tmpA.x, src1.x, tmpA.x */
393bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
394bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
395bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
396bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
397bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
398bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
399bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
400bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci      /* EX2 dst, tmpA.x */
403bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
404bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
405bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
406bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
407bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
408bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
409bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
410bf215546Sopenharmony_ci   }
411bf215546Sopenharmony_ci}
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci/* LIT - Light Coefficients
414bf215546Sopenharmony_ci *  dst.x = 1.0
415bf215546Sopenharmony_ci *  dst.y = max(src.x, 0.0)
416bf215546Sopenharmony_ci *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
417bf215546Sopenharmony_ci *  dst.w = 1.0
418bf215546Sopenharmony_ci *
419bf215546Sopenharmony_ci * ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
420bf215546Sopenharmony_ci * MAX tmpA.xy, src.xy, imm{0.0}
421bf215546Sopenharmony_ci * CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
422bf215546Sopenharmony_ci * LG2 tmpA.y, tmpA.y
423bf215546Sopenharmony_ci * MUL tmpA.y, tmpA.z, tmpA.y
424bf215546Sopenharmony_ci * EX2 tmpA.y, tmpA.y
425bf215546Sopenharmony_ci * CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
426bf215546Sopenharmony_ci * MOV dst.yz, tmpA.xy
427bf215546Sopenharmony_ci * MOV dst.xw, imm{1.0}
428bf215546Sopenharmony_ci */
429bf215546Sopenharmony_ci#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
430bf215546Sopenharmony_ci		NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
431bf215546Sopenharmony_ci#define LIT_TMP  1
432bf215546Sopenharmony_cistatic void
433bf215546Sopenharmony_citransform_lit(struct tgsi_transform_context *tctx,
434bf215546Sopenharmony_ci              struct tgsi_full_instruction *inst)
435bf215546Sopenharmony_ci{
436bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
437bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst = &inst->Dst[0];
438bf215546Sopenharmony_ci   struct tgsi_full_src_register *src = &inst->Src[0];
439bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
442bf215546Sopenharmony_ci      /* MAX tmpA.xy, src.xy, imm{0.0} */
443bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
444bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
445bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
446bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
447bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
448bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
449bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
450bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci      /* MIN tmpA.z, src.w, imm{128.0} */
453bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
454bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
455bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
456bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
457bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
458bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
459bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
460bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci      /* MAX tmpA.z, tmpA.z, -imm{128.0} */
463bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
464bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
465bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
466bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
467bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
468bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
469bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
470bf215546Sopenharmony_ci      new_inst.Src[1].Register.Negate = true;
471bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci      /* LG2 tmpA.y, tmpA.y */
474bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
475bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
476bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
477bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
478bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
479bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
480bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci      /* MUL tmpA.y, tmpA.z, tmpA.y */
483bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
484bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
485bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
486bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
487bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
488bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
489bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
490bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci      /* EX2 tmpA.y, tmpA.y */
493bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
494bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
495bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
496bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
497bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
498bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
499bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci      /* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
502bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
503bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
504bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
505bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
506bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 3;
507bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
508bf215546Sopenharmony_ci      new_inst.Src[0].Register.Negate = true;
509bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
510bf215546Sopenharmony_ci      reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
511bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
512bf215546Sopenharmony_ci
513bf215546Sopenharmony_ci      /* MOV dst.yz, tmpA.xy */
514bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
515bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
516bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
517bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
518bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
519bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
520bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
521bf215546Sopenharmony_ci   }
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
524bf215546Sopenharmony_ci      /* MOV dst.xw, imm{1.0} */
525bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
526bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
527bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
528bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
529bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
530bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
531bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
532bf215546Sopenharmony_ci   }
533bf215546Sopenharmony_ci}
534bf215546Sopenharmony_ci
535bf215546Sopenharmony_ci/* EXP - Approximate Exponential Base 2
536bf215546Sopenharmony_ci *  dst.x = 2^{\lfloor src.x\rfloor}
537bf215546Sopenharmony_ci *  dst.y = src.x - \lfloor src.x\rfloor
538bf215546Sopenharmony_ci *  dst.z = 2^{src.x}
539bf215546Sopenharmony_ci *  dst.w = 1.0
540bf215546Sopenharmony_ci *
541bf215546Sopenharmony_ci * ; needs: 1 tmp, imm{1.0}
542bf215546Sopenharmony_ci * if (lowering FLR) {
543bf215546Sopenharmony_ci *   FRC tmpA.x, src.x
544bf215546Sopenharmony_ci *   SUB tmpA.x, src.x, tmpA.x
545bf215546Sopenharmony_ci * } else {
546bf215546Sopenharmony_ci *   FLR tmpA.x, src.x
547bf215546Sopenharmony_ci * }
548bf215546Sopenharmony_ci * EX2 tmpA.y, src.x
549bf215546Sopenharmony_ci * SUB dst.y, src.x, tmpA.x
550bf215546Sopenharmony_ci * EX2 dst.x, tmpA.x
551bf215546Sopenharmony_ci * MOV dst.z, tmpA.y
552bf215546Sopenharmony_ci * MOV dst.w, imm{1.0}
553bf215546Sopenharmony_ci */
554bf215546Sopenharmony_ci#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
555bf215546Sopenharmony_ci		NINST(1)+ NINST(1) - OINST(1))
556bf215546Sopenharmony_ci#define EXP_TMP  1
557bf215546Sopenharmony_cistatic void
558bf215546Sopenharmony_citransform_exp(struct tgsi_transform_context *tctx,
559bf215546Sopenharmony_ci              struct tgsi_full_instruction *inst)
560bf215546Sopenharmony_ci{
561bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
562bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst = &inst->Dst[0];
563bf215546Sopenharmony_ci   struct tgsi_full_src_register *src = &inst->Src[0];
564bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
567bf215546Sopenharmony_ci      if (ctx->config->lower_FLR) {
568bf215546Sopenharmony_ci         /* FRC tmpA.x, src.x */
569bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
570bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
571bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
572bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
573bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 1;
574bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
575bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci         /* SUB tmpA.x, src.x, tmpA.x */
578bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
579bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
580bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
581bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
582bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 2;
583bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
584bf215546Sopenharmony_ci         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
585bf215546Sopenharmony_ci         new_inst.Src[1].Register.Negate = 1;
586bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
587bf215546Sopenharmony_ci     } else {
588bf215546Sopenharmony_ci         /* FLR tmpA.x, src.x */
589bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
590bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
591bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
592bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
593bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 1;
594bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
595bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
596bf215546Sopenharmony_ci      }
597bf215546Sopenharmony_ci   }
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
600bf215546Sopenharmony_ci      /* EX2 tmpA.y, src.x */
601bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
602bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
603bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
604bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
605bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
606bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
607bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
608bf215546Sopenharmony_ci   }
609bf215546Sopenharmony_ci
610bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
611bf215546Sopenharmony_ci      /* SUB dst.y, src.x, tmpA.x */
612bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
613bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
614bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
615bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
616bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
617bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
618bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
619bf215546Sopenharmony_ci      new_inst.Src[1].Register.Negate = 1;
620bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
621bf215546Sopenharmony_ci   }
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
624bf215546Sopenharmony_ci      /* EX2 dst.x, tmpA.x */
625bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
626bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
627bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
628bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
629bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
630bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
631bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
632bf215546Sopenharmony_ci   }
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
635bf215546Sopenharmony_ci      /* MOV dst.z, tmpA.y */
636bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
637bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
638bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
639bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
640bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
641bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
642bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
643bf215546Sopenharmony_ci   }
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
646bf215546Sopenharmony_ci      /* MOV dst.w, imm{1.0} */
647bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
648bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
649bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
650bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
651bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
652bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
653bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
654bf215546Sopenharmony_ci   }
655bf215546Sopenharmony_ci}
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci/* LOG - Approximate Logarithm Base 2
658bf215546Sopenharmony_ci *  dst.x = \lfloor\log_2{|src.x|}\rfloor
659bf215546Sopenharmony_ci *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
660bf215546Sopenharmony_ci *  dst.z = \log_2{|src.x|}
661bf215546Sopenharmony_ci *  dst.w = 1.0
662bf215546Sopenharmony_ci *
663bf215546Sopenharmony_ci * ; needs: 1 tmp, imm{1.0}
664bf215546Sopenharmony_ci * LG2 tmpA.x, |src.x|
665bf215546Sopenharmony_ci * if (lowering FLR) {
666bf215546Sopenharmony_ci *   FRC tmpA.y, tmpA.x
667bf215546Sopenharmony_ci *   SUB tmpA.y, tmpA.x, tmpA.y
668bf215546Sopenharmony_ci * } else {
669bf215546Sopenharmony_ci *   FLR tmpA.y, tmpA.x
670bf215546Sopenharmony_ci * }
671bf215546Sopenharmony_ci * EX2 tmpA.z, tmpA.y
672bf215546Sopenharmony_ci * RCP tmpA.z, tmpA.z
673bf215546Sopenharmony_ci * MUL dst.y, |src.x|, tmpA.z
674bf215546Sopenharmony_ci * MOV dst.xz, tmpA.yx
675bf215546Sopenharmony_ci * MOV dst.w, imm{1.0}
676bf215546Sopenharmony_ci */
677bf215546Sopenharmony_ci#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
678bf215546Sopenharmony_ci		NINST(2) + NINST(1) + NINST(1) - OINST(1))
679bf215546Sopenharmony_ci#define LOG_TMP  1
680bf215546Sopenharmony_cistatic void
681bf215546Sopenharmony_citransform_log(struct tgsi_transform_context *tctx,
682bf215546Sopenharmony_ci              struct tgsi_full_instruction *inst)
683bf215546Sopenharmony_ci{
684bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
685bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst = &inst->Dst[0];
686bf215546Sopenharmony_ci   struct tgsi_full_src_register *src = &inst->Src[0];
687bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
688bf215546Sopenharmony_ci
689bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
690bf215546Sopenharmony_ci      /* LG2 tmpA.x, |src.x| */
691bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
692bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
693bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
694bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
695bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
696bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
697bf215546Sopenharmony_ci      new_inst.Src[0].Register.Absolute = true;
698bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
699bf215546Sopenharmony_ci   }
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
702bf215546Sopenharmony_ci      if (ctx->config->lower_FLR) {
703bf215546Sopenharmony_ci         /* FRC tmpA.y, tmpA.x */
704bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
705bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
706bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
707bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
708bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 1;
709bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
710bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
711bf215546Sopenharmony_ci
712bf215546Sopenharmony_ci         /* SUB tmpA.y, tmpA.x, tmpA.y */
713bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
714bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
715bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
716bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
717bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 2;
718bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
719bf215546Sopenharmony_ci         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
720bf215546Sopenharmony_ci         new_inst.Src[1].Register.Negate = 1;
721bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
722bf215546Sopenharmony_ci      } else {
723bf215546Sopenharmony_ci         /* FLR tmpA.y, tmpA.x */
724bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
725bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
726bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
727bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
728bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 1;
729bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
730bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
731bf215546Sopenharmony_ci      }
732bf215546Sopenharmony_ci   }
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
735bf215546Sopenharmony_ci      /* EX2 tmpA.z, tmpA.y */
736bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
737bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
738bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
739bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
740bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
741bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
742bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci      /* RCP tmpA.z, tmpA.z */
745bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
746bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
747bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
748bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
749bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
750bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
751bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci      /* MUL dst.y, |src.x|, tmpA.z */
754bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
755bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
756bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
757bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
758bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
759bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
760bf215546Sopenharmony_ci      new_inst.Src[0].Register.Absolute = true;
761bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
762bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
763bf215546Sopenharmony_ci   }
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
766bf215546Sopenharmony_ci      /* MOV dst.xz, tmpA.yx */
767bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
768bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
769bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
770bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
771bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
772bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
773bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
774bf215546Sopenharmony_ci   }
775bf215546Sopenharmony_ci
776bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
777bf215546Sopenharmony_ci      /* MOV dst.w, imm{1.0} */
778bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
779bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
780bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
781bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
782bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
783bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
784bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
785bf215546Sopenharmony_ci   }
786bf215546Sopenharmony_ci}
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_ci/* DP4 - 4-component Dot Product
789bf215546Sopenharmony_ci *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
790bf215546Sopenharmony_ci *
791bf215546Sopenharmony_ci * DP3 - 3-component Dot Product
792bf215546Sopenharmony_ci *   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
793bf215546Sopenharmony_ci *
794bf215546Sopenharmony_ci * DP2 - 2-component Dot Product
795bf215546Sopenharmony_ci *   dst = src0.x \times src1.x + src0.y \times src1.y
796bf215546Sopenharmony_ci *
797bf215546Sopenharmony_ci * NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
798bf215546Sopenharmony_ci * operations, which is what you'd prefer for a ISA that is natively
799bf215546Sopenharmony_ci * scalar.  Probably a native vector ISA would at least already have
800bf215546Sopenharmony_ci * DP4/DP3 instructions, but perhaps there is room for an alternative
801bf215546Sopenharmony_ci * translation for DP2 using vector instructions.
802bf215546Sopenharmony_ci *
803bf215546Sopenharmony_ci * ; needs: 1 tmp
804bf215546Sopenharmony_ci * MUL tmpA.x, src0.x, src1.x
805bf215546Sopenharmony_ci * MAD tmpA.x, src0.y, src1.y, tmpA.x
806bf215546Sopenharmony_ci * if (DP3 || DP4) {
807bf215546Sopenharmony_ci *   MAD tmpA.x, src0.z, src1.z, tmpA.x
808bf215546Sopenharmony_ci *   if (DP4) {
809bf215546Sopenharmony_ci *     MAD tmpA.x, src0.w, src1.w, tmpA.x
810bf215546Sopenharmony_ci *   }
811bf215546Sopenharmony_ci * }
812bf215546Sopenharmony_ci * ; fixup last instruction to replicate into dst
813bf215546Sopenharmony_ci */
814bf215546Sopenharmony_ci#define DP4_GROW  (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
815bf215546Sopenharmony_ci#define DP3_GROW  (NINST(2) + NINST(3) + NINST(3) - OINST(2))
816bf215546Sopenharmony_ci#define DP2_GROW  (NINST(2) + NINST(3) - OINST(2))
817bf215546Sopenharmony_ci#define DOTP_TMP  1
818bf215546Sopenharmony_cistatic void
819bf215546Sopenharmony_citransform_dotp(struct tgsi_transform_context *tctx,
820bf215546Sopenharmony_ci               struct tgsi_full_instruction *inst)
821bf215546Sopenharmony_ci{
822bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
823bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
824bf215546Sopenharmony_ci   struct tgsi_full_src_register *src0 = &inst->Src[0];
825bf215546Sopenharmony_ci   struct tgsi_full_src_register *src1 = &inst->Src[1];
826bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
827bf215546Sopenharmony_ci   enum tgsi_opcode opcode = inst->Instruction.Opcode;
828bf215546Sopenharmony_ci
829bf215546Sopenharmony_ci   /* NOTE: any potential last instruction must replicate src on all
830bf215546Sopenharmony_ci    * components (since it could be re-written to write to final dst)
831bf215546Sopenharmony_ci    */
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
834bf215546Sopenharmony_ci      /* MUL tmpA.x, src0.x, src1.x */
835bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
836bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
837bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
838bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
839bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
840bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
841bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
842bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
843bf215546Sopenharmony_ci
844bf215546Sopenharmony_ci      /* MAD tmpA.x, src0.y, src1.y, tmpA.x */
845bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
846bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
847bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
848bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
849bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 3;
850bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
851bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
852bf215546Sopenharmony_ci      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
853bf215546Sopenharmony_ci
854bf215546Sopenharmony_ci      if ((opcode == TGSI_OPCODE_DP3) ||
855bf215546Sopenharmony_ci          (opcode == TGSI_OPCODE_DP4)) {
856bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci         /* MAD tmpA.x, src0.z, src1.z, tmpA.x */
859bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
860bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
861bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
862bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
863bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 3;
864bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
865bf215546Sopenharmony_ci         reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
866bf215546Sopenharmony_ci         reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
867bf215546Sopenharmony_ci
868bf215546Sopenharmony_ci         if (opcode == TGSI_OPCODE_DP4) {
869bf215546Sopenharmony_ci            tctx->emit_instruction(tctx, &new_inst);
870bf215546Sopenharmony_ci
871bf215546Sopenharmony_ci            /* MAD tmpA.x, src0.w, src1.w, tmpA.x */
872bf215546Sopenharmony_ci            new_inst = tgsi_default_full_instruction();
873bf215546Sopenharmony_ci            new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
874bf215546Sopenharmony_ci            new_inst.Instruction.NumDstRegs = 1;
875bf215546Sopenharmony_ci            reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
876bf215546Sopenharmony_ci            new_inst.Instruction.NumSrcRegs = 3;
877bf215546Sopenharmony_ci            reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
878bf215546Sopenharmony_ci            reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
879bf215546Sopenharmony_ci            reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
880bf215546Sopenharmony_ci         }
881bf215546Sopenharmony_ci      }
882bf215546Sopenharmony_ci
883bf215546Sopenharmony_ci      /* fixup last instruction to write to dst: */
884bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
885bf215546Sopenharmony_ci
886bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
887bf215546Sopenharmony_ci   }
888bf215546Sopenharmony_ci}
889bf215546Sopenharmony_ci
890bf215546Sopenharmony_ci/* FLR - floor, CEIL - ceil
891bf215546Sopenharmony_ci * ; needs: 1 tmp
892bf215546Sopenharmony_ci * if (CEIL) {
893bf215546Sopenharmony_ci *   FRC tmpA, -src
894bf215546Sopenharmony_ci *   ADD dst, src, tmpA
895bf215546Sopenharmony_ci * } else {
896bf215546Sopenharmony_ci *   FRC tmpA, src
897bf215546Sopenharmony_ci *   SUB dst, src, tmpA
898bf215546Sopenharmony_ci * }
899bf215546Sopenharmony_ci */
900bf215546Sopenharmony_ci#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
901bf215546Sopenharmony_ci#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
902bf215546Sopenharmony_ci#define FLR_TMP 1
903bf215546Sopenharmony_ci#define CEIL_TMP 1
904bf215546Sopenharmony_cistatic void
905bf215546Sopenharmony_citransform_flr_ceil(struct tgsi_transform_context *tctx,
906bf215546Sopenharmony_ci                   struct tgsi_full_instruction *inst)
907bf215546Sopenharmony_ci{
908bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
909bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
910bf215546Sopenharmony_ci   struct tgsi_full_src_register *src0 = &inst->Src[0];
911bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
912bf215546Sopenharmony_ci   enum tgsi_opcode opcode = inst->Instruction.Opcode;
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
915bf215546Sopenharmony_ci      /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
916bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
917bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
918bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
919bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
920bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
921bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci      if (opcode == TGSI_OPCODE_CEIL)
924bf215546Sopenharmony_ci         new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
925bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci      /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
928bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
929bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
930bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
931bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
932bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
933bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
934bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
935bf215546Sopenharmony_ci      if (opcode == TGSI_OPCODE_FLR)
936bf215546Sopenharmony_ci         new_inst.Src[1].Register.Negate = 1;
937bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
938bf215546Sopenharmony_ci   }
939bf215546Sopenharmony_ci}
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci/* TRUNC - truncate off fractional part
942bf215546Sopenharmony_ci *  dst.x = trunc(src.x)
943bf215546Sopenharmony_ci *  dst.y = trunc(src.y)
944bf215546Sopenharmony_ci *  dst.z = trunc(src.z)
945bf215546Sopenharmony_ci *  dst.w = trunc(src.w)
946bf215546Sopenharmony_ci *
947bf215546Sopenharmony_ci * ; needs: 1 tmp
948bf215546Sopenharmony_ci * if (lower FLR) {
949bf215546Sopenharmony_ci *   FRC tmpA, |src|
950bf215546Sopenharmony_ci *   SUB tmpA, |src|, tmpA
951bf215546Sopenharmony_ci * } else {
952bf215546Sopenharmony_ci *   FLR tmpA, |src|
953bf215546Sopenharmony_ci * }
954bf215546Sopenharmony_ci * CMP dst, src, -tmpA, tmpA
955bf215546Sopenharmony_ci */
956bf215546Sopenharmony_ci#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
957bf215546Sopenharmony_ci#define TRUNC_TMP 1
958bf215546Sopenharmony_cistatic void
959bf215546Sopenharmony_citransform_trunc(struct tgsi_transform_context *tctx,
960bf215546Sopenharmony_ci                struct tgsi_full_instruction *inst)
961bf215546Sopenharmony_ci{
962bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
963bf215546Sopenharmony_ci   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
964bf215546Sopenharmony_ci   struct tgsi_full_src_register *src0 = &inst->Src[0];
965bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
966bf215546Sopenharmony_ci
967bf215546Sopenharmony_ci   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
968bf215546Sopenharmony_ci      if (ctx->config->lower_FLR) {
969bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
970bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
971bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
972bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
973bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 1;
974bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
975bf215546Sopenharmony_ci         new_inst.Src[0].Register.Absolute = true;
976bf215546Sopenharmony_ci         new_inst.Src[0].Register.Negate = false;
977bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
980bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
981bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
982bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
983bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 2;
984bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
985bf215546Sopenharmony_ci         new_inst.Src[0].Register.Absolute = true;
986bf215546Sopenharmony_ci         new_inst.Src[0].Register.Negate = false;
987bf215546Sopenharmony_ci         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
988bf215546Sopenharmony_ci         new_inst.Src[1].Register.Negate = 1;
989bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
990bf215546Sopenharmony_ci      } else {
991bf215546Sopenharmony_ci         new_inst = tgsi_default_full_instruction();
992bf215546Sopenharmony_ci         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
993bf215546Sopenharmony_ci         new_inst.Instruction.NumDstRegs = 1;
994bf215546Sopenharmony_ci         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
995bf215546Sopenharmony_ci         new_inst.Instruction.NumSrcRegs = 1;
996bf215546Sopenharmony_ci         reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
997bf215546Sopenharmony_ci         new_inst.Src[0].Register.Absolute = true;
998bf215546Sopenharmony_ci         new_inst.Src[0].Register.Negate = false;
999bf215546Sopenharmony_ci         tctx->emit_instruction(tctx, &new_inst);
1000bf215546Sopenharmony_ci      }
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
1003bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1004bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
1005bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1006bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 3;
1007bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1008bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1009bf215546Sopenharmony_ci      new_inst.Src[1].Register.Negate = true;
1010bf215546Sopenharmony_ci      reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1011bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
1012bf215546Sopenharmony_ci   }
1013bf215546Sopenharmony_ci}
1014bf215546Sopenharmony_ci
1015bf215546Sopenharmony_ci/* Inserts a MOV_SAT for the needed components of tex coord.  Note that
1016bf215546Sopenharmony_ci * in the case of TXP, the clamping must happen *after* projection, so
1017bf215546Sopenharmony_ci * we need to lower TXP to TEX.
1018bf215546Sopenharmony_ci *
1019bf215546Sopenharmony_ci *   MOV tmpA, src0
1020bf215546Sopenharmony_ci *   if (opc == TXP) {
1021bf215546Sopenharmony_ci *     ; do perspective division manually before clamping:
1022bf215546Sopenharmony_ci *     RCP tmpB, tmpA.w
1023bf215546Sopenharmony_ci *     MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1024bf215546Sopenharmony_ci *     opc = TEX;
1025bf215546Sopenharmony_ci *   }
1026bf215546Sopenharmony_ci *   MOV_SAT tmpA.<mask>, tmpA  ; <mask> is the clamped s/t/r coords
1027bf215546Sopenharmony_ci *   <opc> dst, tmpA, ...
1028bf215546Sopenharmony_ci */
1029bf215546Sopenharmony_ci#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1030bf215546Sopenharmony_ci#define SAMP_TMP  2
1031bf215546Sopenharmony_cistatic int
1032bf215546Sopenharmony_citransform_samp(struct tgsi_transform_context *tctx,
1033bf215546Sopenharmony_ci               struct tgsi_full_instruction *inst)
1034bf215546Sopenharmony_ci{
1035bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1036bf215546Sopenharmony_ci   struct tgsi_full_src_register *coord = &inst->Src[0];
1037bf215546Sopenharmony_ci   struct tgsi_full_src_register *samp;
1038bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
1039bf215546Sopenharmony_ci   /* mask is clamped coords, pmask is all coords (for projection): */
1040bf215546Sopenharmony_ci   unsigned mask = 0, pmask = 0, smask;
1041bf215546Sopenharmony_ci   unsigned tex = inst->Texture.Texture;
1042bf215546Sopenharmony_ci   enum tgsi_opcode opcode = inst->Instruction.Opcode;
1043bf215546Sopenharmony_ci   bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1044bf215546Sopenharmony_ci		   (ctx->config->lower_TXP & (1 << tex));
1045bf215546Sopenharmony_ci
1046bf215546Sopenharmony_ci   if (opcode == TGSI_OPCODE_TXB2) {
1047bf215546Sopenharmony_ci      samp = &inst->Src[2];
1048bf215546Sopenharmony_ci   } else {
1049bf215546Sopenharmony_ci      samp = &inst->Src[1];
1050bf215546Sopenharmony_ci   }
1051bf215546Sopenharmony_ci
1052bf215546Sopenharmony_ci   /* convert sampler # to bitmask to test: */
1053bf215546Sopenharmony_ci   smask = 1 << samp->Register.Index;
1054bf215546Sopenharmony_ci
1055bf215546Sopenharmony_ci   /* check if we actually need to lower this one: */
1056bf215546Sopenharmony_ci   if (!(ctx->saturate & smask) && !lower_txp)
1057bf215546Sopenharmony_ci      return -1;
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_ci   /* figure out which coordinates need saturating:
1060bf215546Sopenharmony_ci    *   - RECT textures should not get saturated
1061bf215546Sopenharmony_ci    *   - array index coords should not get saturated
1062bf215546Sopenharmony_ci    */
1063bf215546Sopenharmony_ci   switch (tex) {
1064bf215546Sopenharmony_ci   case TGSI_TEXTURE_3D:
1065bf215546Sopenharmony_ci   case TGSI_TEXTURE_CUBE:
1066bf215546Sopenharmony_ci   case TGSI_TEXTURE_CUBE_ARRAY:
1067bf215546Sopenharmony_ci   case TGSI_TEXTURE_SHADOWCUBE:
1068bf215546Sopenharmony_ci   case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1069bf215546Sopenharmony_ci      if (ctx->config->saturate_r & smask)
1070bf215546Sopenharmony_ci         mask |= TGSI_WRITEMASK_Z;
1071bf215546Sopenharmony_ci      pmask |= TGSI_WRITEMASK_Z;
1072bf215546Sopenharmony_ci      FALLTHROUGH;
1073bf215546Sopenharmony_ci
1074bf215546Sopenharmony_ci   case TGSI_TEXTURE_2D:
1075bf215546Sopenharmony_ci   case TGSI_TEXTURE_2D_ARRAY:
1076bf215546Sopenharmony_ci   case TGSI_TEXTURE_SHADOW2D:
1077bf215546Sopenharmony_ci   case TGSI_TEXTURE_SHADOW2D_ARRAY:
1078bf215546Sopenharmony_ci   case TGSI_TEXTURE_2D_MSAA:
1079bf215546Sopenharmony_ci   case TGSI_TEXTURE_2D_ARRAY_MSAA:
1080bf215546Sopenharmony_ci      if (ctx->config->saturate_t & smask)
1081bf215546Sopenharmony_ci         mask |= TGSI_WRITEMASK_Y;
1082bf215546Sopenharmony_ci      pmask |= TGSI_WRITEMASK_Y;
1083bf215546Sopenharmony_ci      FALLTHROUGH;
1084bf215546Sopenharmony_ci
1085bf215546Sopenharmony_ci   case TGSI_TEXTURE_1D:
1086bf215546Sopenharmony_ci   case TGSI_TEXTURE_1D_ARRAY:
1087bf215546Sopenharmony_ci   case TGSI_TEXTURE_SHADOW1D:
1088bf215546Sopenharmony_ci   case TGSI_TEXTURE_SHADOW1D_ARRAY:
1089bf215546Sopenharmony_ci      if (ctx->config->saturate_s & smask)
1090bf215546Sopenharmony_ci         mask |= TGSI_WRITEMASK_X;
1091bf215546Sopenharmony_ci      pmask |= TGSI_WRITEMASK_X;
1092bf215546Sopenharmony_ci      break;
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci   case TGSI_TEXTURE_RECT:
1095bf215546Sopenharmony_ci   case TGSI_TEXTURE_SHADOWRECT:
1096bf215546Sopenharmony_ci      /* we don't saturate, but in case of lower_txp we
1097bf215546Sopenharmony_ci       * still need to do the perspective divide:
1098bf215546Sopenharmony_ci       */
1099bf215546Sopenharmony_ci       pmask = TGSI_WRITEMASK_XY;
1100bf215546Sopenharmony_ci       break;
1101bf215546Sopenharmony_ci   }
1102bf215546Sopenharmony_ci
1103bf215546Sopenharmony_ci   /* sanity check.. driver could be asking to saturate a non-
1104bf215546Sopenharmony_ci    * existent coordinate component:
1105bf215546Sopenharmony_ci    */
1106bf215546Sopenharmony_ci   if (!mask && !lower_txp)
1107bf215546Sopenharmony_ci      return -1;
1108bf215546Sopenharmony_ci
1109bf215546Sopenharmony_ci   /* MOV tmpA, src0 */
1110bf215546Sopenharmony_ci   create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci   /* This is a bit sad.. we need to clamp *after* the coords
1113bf215546Sopenharmony_ci    * are projected, which means lowering TXP to TEX and doing
1114bf215546Sopenharmony_ci    * the projection ourself.  But since I haven't figured out
1115bf215546Sopenharmony_ci    * how to make the lowering code deliver an electric shock
1116bf215546Sopenharmony_ci    * to anyone using GL_CLAMP, we must do this instead:
1117bf215546Sopenharmony_ci    */
1118bf215546Sopenharmony_ci   if (opcode == TGSI_OPCODE_TXP) {
1119bf215546Sopenharmony_ci      /* RCP tmpB.x tmpA.w */
1120bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
1121bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1122bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
1123bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1124bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 1;
1125bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1126bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_ci      /* MUL tmpA.mask, tmpA, tmpB.xxxx */
1129bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
1130bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1131bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
1132bf215546Sopenharmony_ci      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1133bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 2;
1134bf215546Sopenharmony_ci      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1135bf215546Sopenharmony_ci      reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1136bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
1137bf215546Sopenharmony_ci
1138bf215546Sopenharmony_ci      opcode = TGSI_OPCODE_TEX;
1139bf215546Sopenharmony_ci   }
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_ci   /* MOV_SAT tmpA.<mask>, tmpA */
1142bf215546Sopenharmony_ci   if (mask) {
1143bf215546Sopenharmony_ci      create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1144bf215546Sopenharmony_ci   }
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci   /* modify the texture samp instruction to take fixed up coord: */
1147bf215546Sopenharmony_ci   new_inst = *inst;
1148bf215546Sopenharmony_ci   new_inst.Instruction.Opcode = opcode;
1149bf215546Sopenharmony_ci   new_inst.Src[0] = ctx->tmp[A].src;
1150bf215546Sopenharmony_ci   tctx->emit_instruction(tctx, &new_inst);
1151bf215546Sopenharmony_ci
1152bf215546Sopenharmony_ci   return 0;
1153bf215546Sopenharmony_ci}
1154bf215546Sopenharmony_ci
1155bf215546Sopenharmony_ci/* Two-sided color emulation:
1156bf215546Sopenharmony_ci * For each COLOR input, create a corresponding BCOLOR input, plus
1157bf215546Sopenharmony_ci * CMP instruction to select front or back color based on FACE
1158bf215546Sopenharmony_ci */
1159bf215546Sopenharmony_ci#define TWOSIDE_GROW(n)  (                      \
1160bf215546Sopenharmony_ci      2 +         /* FACE */                    \
1161bf215546Sopenharmony_ci      ((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1162bf215546Sopenharmony_ci      ((n) * 1) + /* TEMP[] */                  \
1163bf215546Sopenharmony_ci      ((n) * NINST(3))   /* CMP instr */        \
1164bf215546Sopenharmony_ci      )
1165bf215546Sopenharmony_ci
1166bf215546Sopenharmony_cistatic void
1167bf215546Sopenharmony_ciemit_twoside(struct tgsi_transform_context *tctx)
1168bf215546Sopenharmony_ci{
1169bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1170bf215546Sopenharmony_ci   struct tgsi_shader_info *info = ctx->info;
1171bf215546Sopenharmony_ci   struct tgsi_full_declaration decl;
1172bf215546Sopenharmony_ci   struct tgsi_full_instruction new_inst;
1173bf215546Sopenharmony_ci   unsigned inbase, tmpbase;
1174bf215546Sopenharmony_ci   unsigned i;
1175bf215546Sopenharmony_ci
1176bf215546Sopenharmony_ci   inbase  = info->file_max[TGSI_FILE_INPUT] + 1;
1177bf215546Sopenharmony_ci   tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1178bf215546Sopenharmony_ci
1179bf215546Sopenharmony_ci   /* additional inputs for BCOLOR's */
1180bf215546Sopenharmony_ci   for (i = 0; i < ctx->two_side_colors; i++) {
1181bf215546Sopenharmony_ci      unsigned in_idx = ctx->two_side_idx[i];
1182bf215546Sopenharmony_ci      decl = tgsi_default_full_declaration();
1183bf215546Sopenharmony_ci      decl.Declaration.File = TGSI_FILE_INPUT;
1184bf215546Sopenharmony_ci      decl.Declaration.Semantic = true;
1185bf215546Sopenharmony_ci      decl.Range.First = decl.Range.Last = inbase + i;
1186bf215546Sopenharmony_ci      decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1187bf215546Sopenharmony_ci      decl.Semantic.Index = info->input_semantic_index[in_idx];
1188bf215546Sopenharmony_ci      decl.Declaration.Interpolate = true;
1189bf215546Sopenharmony_ci      decl.Interp.Interpolate = info->input_interpolate[in_idx];
1190bf215546Sopenharmony_ci      decl.Interp.Location = info->input_interpolate_loc[in_idx];
1191bf215546Sopenharmony_ci      tctx->emit_declaration(tctx, &decl);
1192bf215546Sopenharmony_ci   }
1193bf215546Sopenharmony_ci
1194bf215546Sopenharmony_ci   /* additional input for FACE */
1195bf215546Sopenharmony_ci   if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1196bf215546Sopenharmony_ci      decl = tgsi_default_full_declaration();
1197bf215546Sopenharmony_ci      decl.Declaration.File = TGSI_FILE_INPUT;
1198bf215546Sopenharmony_ci      decl.Declaration.Semantic = true;
1199bf215546Sopenharmony_ci      decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1200bf215546Sopenharmony_ci      decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1201bf215546Sopenharmony_ci      decl.Semantic.Index = 0;
1202bf215546Sopenharmony_ci      tctx->emit_declaration(tctx, &decl);
1203bf215546Sopenharmony_ci
1204bf215546Sopenharmony_ci      ctx->face_idx = decl.Range.First;
1205bf215546Sopenharmony_ci   }
1206bf215546Sopenharmony_ci
1207bf215546Sopenharmony_ci   /* additional temps for COLOR/BCOLOR selection: */
1208bf215546Sopenharmony_ci   for (i = 0; i < ctx->two_side_colors; i++) {
1209bf215546Sopenharmony_ci      decl = tgsi_default_full_declaration();
1210bf215546Sopenharmony_ci      decl.Declaration.File = TGSI_FILE_TEMPORARY;
1211bf215546Sopenharmony_ci      decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1212bf215546Sopenharmony_ci      tctx->emit_declaration(tctx, &decl);
1213bf215546Sopenharmony_ci   }
1214bf215546Sopenharmony_ci
1215bf215546Sopenharmony_ci   /* and finally additional instructions to select COLOR/BCOLOR: */
1216bf215546Sopenharmony_ci   for (i = 0; i < ctx->two_side_colors; i++) {
1217bf215546Sopenharmony_ci      new_inst = tgsi_default_full_instruction();
1218bf215546Sopenharmony_ci      new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1219bf215546Sopenharmony_ci
1220bf215546Sopenharmony_ci      new_inst.Instruction.NumDstRegs = 1;
1221bf215546Sopenharmony_ci      new_inst.Dst[0].Register.File  = TGSI_FILE_TEMPORARY;
1222bf215546Sopenharmony_ci      new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1223bf215546Sopenharmony_ci      new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1224bf215546Sopenharmony_ci
1225bf215546Sopenharmony_ci      new_inst.Instruction.NumSrcRegs = 3;
1226bf215546Sopenharmony_ci      new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
1227bf215546Sopenharmony_ci      new_inst.Src[0].Register.Index = ctx->face_idx;
1228bf215546Sopenharmony_ci      new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1229bf215546Sopenharmony_ci      new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1230bf215546Sopenharmony_ci      new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1231bf215546Sopenharmony_ci      new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1232bf215546Sopenharmony_ci      new_inst.Src[1].Register.File  = TGSI_FILE_INPUT;
1233bf215546Sopenharmony_ci      new_inst.Src[1].Register.Index = inbase + i;
1234bf215546Sopenharmony_ci      new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1235bf215546Sopenharmony_ci      new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1236bf215546Sopenharmony_ci      new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1237bf215546Sopenharmony_ci      new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1238bf215546Sopenharmony_ci      new_inst.Src[2].Register.File  = TGSI_FILE_INPUT;
1239bf215546Sopenharmony_ci      new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1240bf215546Sopenharmony_ci      new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1241bf215546Sopenharmony_ci      new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1242bf215546Sopenharmony_ci      new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1243bf215546Sopenharmony_ci      new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1244bf215546Sopenharmony_ci
1245bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, &new_inst);
1246bf215546Sopenharmony_ci   }
1247bf215546Sopenharmony_ci}
1248bf215546Sopenharmony_ci
1249bf215546Sopenharmony_cistatic void
1250bf215546Sopenharmony_ciemit_decls(struct tgsi_transform_context *tctx)
1251bf215546Sopenharmony_ci{
1252bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1253bf215546Sopenharmony_ci   struct tgsi_shader_info *info = ctx->info;
1254bf215546Sopenharmony_ci   struct tgsi_full_declaration decl;
1255bf215546Sopenharmony_ci   struct tgsi_full_immediate immed;
1256bf215546Sopenharmony_ci   unsigned tmpbase;
1257bf215546Sopenharmony_ci   unsigned i;
1258bf215546Sopenharmony_ci
1259bf215546Sopenharmony_ci   tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1260bf215546Sopenharmony_ci
1261bf215546Sopenharmony_ci   ctx->color_base = tmpbase + ctx->numtmp;
1262bf215546Sopenharmony_ci
1263bf215546Sopenharmony_ci   /* declare immediate: */
1264bf215546Sopenharmony_ci   immed = tgsi_default_full_immediate();
1265bf215546Sopenharmony_ci   immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1266bf215546Sopenharmony_ci   immed.u[0].Float = 0.0;
1267bf215546Sopenharmony_ci   immed.u[1].Float = 1.0;
1268bf215546Sopenharmony_ci   immed.u[2].Float = 128.0;
1269bf215546Sopenharmony_ci   immed.u[3].Float = 0.0;
1270bf215546Sopenharmony_ci   tctx->emit_immediate(tctx, &immed);
1271bf215546Sopenharmony_ci
1272bf215546Sopenharmony_ci   ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1273bf215546Sopenharmony_ci   ctx->imm.Register.Index = info->immediate_count;
1274bf215546Sopenharmony_ci   ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1275bf215546Sopenharmony_ci   ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1276bf215546Sopenharmony_ci   ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1277bf215546Sopenharmony_ci   ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1278bf215546Sopenharmony_ci
1279bf215546Sopenharmony_ci   /* declare temp regs: */
1280bf215546Sopenharmony_ci   for (i = 0; i < ctx->numtmp; i++) {
1281bf215546Sopenharmony_ci      decl = tgsi_default_full_declaration();
1282bf215546Sopenharmony_ci      decl.Declaration.File = TGSI_FILE_TEMPORARY;
1283bf215546Sopenharmony_ci      decl.Range.First = decl.Range.Last = tmpbase + i;
1284bf215546Sopenharmony_ci      tctx->emit_declaration(tctx, &decl);
1285bf215546Sopenharmony_ci
1286bf215546Sopenharmony_ci      ctx->tmp[i].src.Register.File  = TGSI_FILE_TEMPORARY;
1287bf215546Sopenharmony_ci      ctx->tmp[i].src.Register.Index = tmpbase + i;
1288bf215546Sopenharmony_ci      ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1289bf215546Sopenharmony_ci      ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1290bf215546Sopenharmony_ci      ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1291bf215546Sopenharmony_ci      ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1292bf215546Sopenharmony_ci
1293bf215546Sopenharmony_ci      ctx->tmp[i].dst.Register.File  = TGSI_FILE_TEMPORARY;
1294bf215546Sopenharmony_ci      ctx->tmp[i].dst.Register.Index = tmpbase + i;
1295bf215546Sopenharmony_ci      ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1296bf215546Sopenharmony_ci   }
1297bf215546Sopenharmony_ci
1298bf215546Sopenharmony_ci   if (ctx->two_side_colors)
1299bf215546Sopenharmony_ci      emit_twoside(tctx);
1300bf215546Sopenharmony_ci}
1301bf215546Sopenharmony_ci
1302bf215546Sopenharmony_cistatic void
1303bf215546Sopenharmony_cirename_color_inputs(struct tgsi_lowering_context *ctx,
1304bf215546Sopenharmony_ci                    struct tgsi_full_instruction *inst)
1305bf215546Sopenharmony_ci{
1306bf215546Sopenharmony_ci   unsigned i, j;
1307bf215546Sopenharmony_ci   for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1308bf215546Sopenharmony_ci      struct tgsi_src_register *src = &inst->Src[i].Register;
1309bf215546Sopenharmony_ci      if (src->File == TGSI_FILE_INPUT) {
1310bf215546Sopenharmony_ci         for (j = 0; j < ctx->two_side_colors; j++) {
1311bf215546Sopenharmony_ci	    if (src->Index == (int)ctx->two_side_idx[j]) {
1312bf215546Sopenharmony_ci               src->File = TGSI_FILE_TEMPORARY;
1313bf215546Sopenharmony_ci               src->Index = ctx->color_base + j;
1314bf215546Sopenharmony_ci               break;
1315bf215546Sopenharmony_ci            }
1316bf215546Sopenharmony_ci         }
1317bf215546Sopenharmony_ci      }
1318bf215546Sopenharmony_ci   }
1319bf215546Sopenharmony_ci
1320bf215546Sopenharmony_ci}
1321bf215546Sopenharmony_ci
1322bf215546Sopenharmony_cistatic void
1323bf215546Sopenharmony_citransform_instr(struct tgsi_transform_context *tctx,
1324bf215546Sopenharmony_ci		struct tgsi_full_instruction *inst)
1325bf215546Sopenharmony_ci{
1326bf215546Sopenharmony_ci   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1327bf215546Sopenharmony_ci
1328bf215546Sopenharmony_ci   if (!ctx->emitted_decls) {
1329bf215546Sopenharmony_ci      emit_decls(tctx);
1330bf215546Sopenharmony_ci      ctx->emitted_decls = 1;
1331bf215546Sopenharmony_ci   }
1332bf215546Sopenharmony_ci
1333bf215546Sopenharmony_ci   /* if emulating two-sided-color, we need to re-write some
1334bf215546Sopenharmony_ci    * src registers:
1335bf215546Sopenharmony_ci    */
1336bf215546Sopenharmony_ci   if (ctx->two_side_colors)
1337bf215546Sopenharmony_ci      rename_color_inputs(ctx, inst);
1338bf215546Sopenharmony_ci
1339bf215546Sopenharmony_ci   switch (inst->Instruction.Opcode) {
1340bf215546Sopenharmony_ci   case TGSI_OPCODE_DST:
1341bf215546Sopenharmony_ci      if (!ctx->config->lower_DST)
1342bf215546Sopenharmony_ci         goto skip;
1343bf215546Sopenharmony_ci      transform_dst(tctx, inst);
1344bf215546Sopenharmony_ci      break;
1345bf215546Sopenharmony_ci   case TGSI_OPCODE_LRP:
1346bf215546Sopenharmony_ci      if (!ctx->config->lower_LRP)
1347bf215546Sopenharmony_ci         goto skip;
1348bf215546Sopenharmony_ci      transform_lrp(tctx, inst);
1349bf215546Sopenharmony_ci      break;
1350bf215546Sopenharmony_ci   case TGSI_OPCODE_FRC:
1351bf215546Sopenharmony_ci      if (!ctx->config->lower_FRC)
1352bf215546Sopenharmony_ci         goto skip;
1353bf215546Sopenharmony_ci      transform_frc(tctx, inst);
1354bf215546Sopenharmony_ci      break;
1355bf215546Sopenharmony_ci   case TGSI_OPCODE_POW:
1356bf215546Sopenharmony_ci      if (!ctx->config->lower_POW)
1357bf215546Sopenharmony_ci         goto skip;
1358bf215546Sopenharmony_ci      transform_pow(tctx, inst);
1359bf215546Sopenharmony_ci      break;
1360bf215546Sopenharmony_ci   case TGSI_OPCODE_LIT:
1361bf215546Sopenharmony_ci      if (!ctx->config->lower_LIT)
1362bf215546Sopenharmony_ci         goto skip;
1363bf215546Sopenharmony_ci      transform_lit(tctx, inst);
1364bf215546Sopenharmony_ci      break;
1365bf215546Sopenharmony_ci   case TGSI_OPCODE_EXP:
1366bf215546Sopenharmony_ci      if (!ctx->config->lower_EXP)
1367bf215546Sopenharmony_ci         goto skip;
1368bf215546Sopenharmony_ci      transform_exp(tctx, inst);
1369bf215546Sopenharmony_ci      break;
1370bf215546Sopenharmony_ci   case TGSI_OPCODE_LOG:
1371bf215546Sopenharmony_ci      if (!ctx->config->lower_LOG)
1372bf215546Sopenharmony_ci         goto skip;
1373bf215546Sopenharmony_ci      transform_log(tctx, inst);
1374bf215546Sopenharmony_ci      break;
1375bf215546Sopenharmony_ci   case TGSI_OPCODE_DP4:
1376bf215546Sopenharmony_ci      if (!ctx->config->lower_DP4)
1377bf215546Sopenharmony_ci         goto skip;
1378bf215546Sopenharmony_ci      transform_dotp(tctx, inst);
1379bf215546Sopenharmony_ci      break;
1380bf215546Sopenharmony_ci   case TGSI_OPCODE_DP3:
1381bf215546Sopenharmony_ci      if (!ctx->config->lower_DP3)
1382bf215546Sopenharmony_ci         goto skip;
1383bf215546Sopenharmony_ci      transform_dotp(tctx, inst);
1384bf215546Sopenharmony_ci      break;
1385bf215546Sopenharmony_ci   case TGSI_OPCODE_DP2:
1386bf215546Sopenharmony_ci      if (!ctx->config->lower_DP2)
1387bf215546Sopenharmony_ci         goto skip;
1388bf215546Sopenharmony_ci      transform_dotp(tctx, inst);
1389bf215546Sopenharmony_ci      break;
1390bf215546Sopenharmony_ci   case TGSI_OPCODE_FLR:
1391bf215546Sopenharmony_ci      if (!ctx->config->lower_FLR)
1392bf215546Sopenharmony_ci         goto skip;
1393bf215546Sopenharmony_ci      transform_flr_ceil(tctx, inst);
1394bf215546Sopenharmony_ci      break;
1395bf215546Sopenharmony_ci   case TGSI_OPCODE_CEIL:
1396bf215546Sopenharmony_ci      if (!ctx->config->lower_CEIL)
1397bf215546Sopenharmony_ci         goto skip;
1398bf215546Sopenharmony_ci      transform_flr_ceil(tctx, inst);
1399bf215546Sopenharmony_ci      break;
1400bf215546Sopenharmony_ci   case TGSI_OPCODE_TRUNC:
1401bf215546Sopenharmony_ci      if (!ctx->config->lower_TRUNC)
1402bf215546Sopenharmony_ci         goto skip;
1403bf215546Sopenharmony_ci      transform_trunc(tctx, inst);
1404bf215546Sopenharmony_ci      break;
1405bf215546Sopenharmony_ci   case TGSI_OPCODE_TEX:
1406bf215546Sopenharmony_ci   case TGSI_OPCODE_TXP:
1407bf215546Sopenharmony_ci   case TGSI_OPCODE_TXB:
1408bf215546Sopenharmony_ci   case TGSI_OPCODE_TXB2:
1409bf215546Sopenharmony_ci   case TGSI_OPCODE_TXL:
1410bf215546Sopenharmony_ci      if (transform_samp(tctx, inst))
1411bf215546Sopenharmony_ci         goto skip;
1412bf215546Sopenharmony_ci      break;
1413bf215546Sopenharmony_ci   default:
1414bf215546Sopenharmony_ci   skip:
1415bf215546Sopenharmony_ci      tctx->emit_instruction(tctx, inst);
1416bf215546Sopenharmony_ci      break;
1417bf215546Sopenharmony_ci   }
1418bf215546Sopenharmony_ci}
1419bf215546Sopenharmony_ci
1420bf215546Sopenharmony_ci/* returns NULL if no lowering required, else returns the new
1421bf215546Sopenharmony_ci * tokens (which caller is required to free()).  In either case
1422bf215546Sopenharmony_ci * returns the current info.
1423bf215546Sopenharmony_ci */
1424bf215546Sopenharmony_ciconst struct tgsi_token *
1425bf215546Sopenharmony_citgsi_transform_lowering(const struct tgsi_lowering_config *config,
1426bf215546Sopenharmony_ci                        const struct tgsi_token *tokens,
1427bf215546Sopenharmony_ci                        struct tgsi_shader_info *info)
1428bf215546Sopenharmony_ci{
1429bf215546Sopenharmony_ci   struct tgsi_lowering_context ctx;
1430bf215546Sopenharmony_ci   struct tgsi_token *newtoks;
1431bf215546Sopenharmony_ci   int newlen, numtmp;
1432bf215546Sopenharmony_ci
1433bf215546Sopenharmony_ci   /* sanity check in case limit is ever increased: */
1434bf215546Sopenharmony_ci   STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1435bf215546Sopenharmony_ci
1436bf215546Sopenharmony_ci   /* sanity check the lowering */
1437bf215546Sopenharmony_ci   assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1438bf215546Sopenharmony_ci   assert(!(config->lower_FRC && config->lower_TRUNC));
1439bf215546Sopenharmony_ci
1440bf215546Sopenharmony_ci   memset(&ctx, 0, sizeof(ctx));
1441bf215546Sopenharmony_ci   ctx.base.transform_instruction = transform_instr;
1442bf215546Sopenharmony_ci   ctx.info = info;
1443bf215546Sopenharmony_ci   ctx.config = config;
1444bf215546Sopenharmony_ci
1445bf215546Sopenharmony_ci   tgsi_scan_shader(tokens, info);
1446bf215546Sopenharmony_ci
1447bf215546Sopenharmony_ci   /* if we are adding fragment shader support to emulate two-sided
1448bf215546Sopenharmony_ci    * color, then figure out the number of additional inputs we need
1449bf215546Sopenharmony_ci    * to create for BCOLOR's..
1450bf215546Sopenharmony_ci    */
1451bf215546Sopenharmony_ci   if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1452bf215546Sopenharmony_ci       config->color_two_side) {
1453bf215546Sopenharmony_ci      int i;
1454bf215546Sopenharmony_ci      ctx.face_idx = -1;
1455bf215546Sopenharmony_ci      for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1456bf215546Sopenharmony_ci         if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1457bf215546Sopenharmony_ci            ctx.two_side_idx[ctx.two_side_colors++] = i;
1458bf215546Sopenharmony_ci         if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1459bf215546Sopenharmony_ci            ctx.face_idx = i;
1460bf215546Sopenharmony_ci      }
1461bf215546Sopenharmony_ci   }
1462bf215546Sopenharmony_ci
1463bf215546Sopenharmony_ci   ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1464bf215546Sopenharmony_ci
1465bf215546Sopenharmony_ci#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1466bf215546Sopenharmony_ci   /* if there are no instructions to lower, then we are done: */
1467bf215546Sopenharmony_ci   if (!(OPCS(DST) ||
1468bf215546Sopenharmony_ci         OPCS(LRP) ||
1469bf215546Sopenharmony_ci         OPCS(FRC) ||
1470bf215546Sopenharmony_ci         OPCS(POW) ||
1471bf215546Sopenharmony_ci         OPCS(LIT) ||
1472bf215546Sopenharmony_ci         OPCS(EXP) ||
1473bf215546Sopenharmony_ci         OPCS(LOG) ||
1474bf215546Sopenharmony_ci         OPCS(DP4) ||
1475bf215546Sopenharmony_ci         OPCS(DP3) ||
1476bf215546Sopenharmony_ci         OPCS(DP2) ||
1477bf215546Sopenharmony_ci         OPCS(FLR) ||
1478bf215546Sopenharmony_ci         OPCS(CEIL) ||
1479bf215546Sopenharmony_ci         OPCS(TRUNC) ||
1480bf215546Sopenharmony_ci         OPCS(TXP) ||
1481bf215546Sopenharmony_ci         ctx.two_side_colors ||
1482bf215546Sopenharmony_ci         ctx.saturate))
1483bf215546Sopenharmony_ci      return NULL;
1484bf215546Sopenharmony_ci
1485bf215546Sopenharmony_ci#if 0  /* debug */
1486bf215546Sopenharmony_ci   _debug_printf("BEFORE:");
1487bf215546Sopenharmony_ci   tgsi_dump(tokens, 0);
1488bf215546Sopenharmony_ci#endif
1489bf215546Sopenharmony_ci
1490bf215546Sopenharmony_ci   numtmp = 0;
1491bf215546Sopenharmony_ci   newlen = tgsi_num_tokens(tokens);
1492bf215546Sopenharmony_ci   if (OPCS(DST)) {
1493bf215546Sopenharmony_ci      newlen += DST_GROW * OPCS(DST);
1494bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, DST_TMP);
1495bf215546Sopenharmony_ci   }
1496bf215546Sopenharmony_ci   if (OPCS(LRP)) {
1497bf215546Sopenharmony_ci      newlen += LRP_GROW * OPCS(LRP);
1498bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, LRP_TMP);
1499bf215546Sopenharmony_ci   }
1500bf215546Sopenharmony_ci   if (OPCS(FRC)) {
1501bf215546Sopenharmony_ci      newlen += FRC_GROW * OPCS(FRC);
1502bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, FRC_TMP);
1503bf215546Sopenharmony_ci   }
1504bf215546Sopenharmony_ci   if (OPCS(POW)) {
1505bf215546Sopenharmony_ci      newlen += POW_GROW * OPCS(POW);
1506bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, POW_TMP);
1507bf215546Sopenharmony_ci   }
1508bf215546Sopenharmony_ci   if (OPCS(LIT)) {
1509bf215546Sopenharmony_ci      newlen += LIT_GROW * OPCS(LIT);
1510bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, LIT_TMP);
1511bf215546Sopenharmony_ci   }
1512bf215546Sopenharmony_ci   if (OPCS(EXP)) {
1513bf215546Sopenharmony_ci      newlen += EXP_GROW * OPCS(EXP);
1514bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, EXP_TMP);
1515bf215546Sopenharmony_ci   }
1516bf215546Sopenharmony_ci   if (OPCS(LOG)) {
1517bf215546Sopenharmony_ci      newlen += LOG_GROW * OPCS(LOG);
1518bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, LOG_TMP);
1519bf215546Sopenharmony_ci   }
1520bf215546Sopenharmony_ci   if (OPCS(DP4)) {
1521bf215546Sopenharmony_ci      newlen += DP4_GROW * OPCS(DP4);
1522bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, DOTP_TMP);
1523bf215546Sopenharmony_ci   }
1524bf215546Sopenharmony_ci   if (OPCS(DP3)) {
1525bf215546Sopenharmony_ci      newlen += DP3_GROW * OPCS(DP3);
1526bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, DOTP_TMP);
1527bf215546Sopenharmony_ci   }
1528bf215546Sopenharmony_ci   if (OPCS(DP2)) {
1529bf215546Sopenharmony_ci      newlen += DP2_GROW * OPCS(DP2);
1530bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, DOTP_TMP);
1531bf215546Sopenharmony_ci   }
1532bf215546Sopenharmony_ci   if (OPCS(FLR)) {
1533bf215546Sopenharmony_ci      newlen += FLR_GROW * OPCS(FLR);
1534bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, FLR_TMP);
1535bf215546Sopenharmony_ci   }
1536bf215546Sopenharmony_ci   if (OPCS(CEIL)) {
1537bf215546Sopenharmony_ci      newlen += CEIL_GROW * OPCS(CEIL);
1538bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, CEIL_TMP);
1539bf215546Sopenharmony_ci   }
1540bf215546Sopenharmony_ci   if (OPCS(TRUNC)) {
1541bf215546Sopenharmony_ci      newlen += TRUNC_GROW * OPCS(TRUNC);
1542bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, TRUNC_TMP);
1543bf215546Sopenharmony_ci   }
1544bf215546Sopenharmony_ci   if (ctx.saturate || config->lower_TXP) {
1545bf215546Sopenharmony_ci      int n = 0;
1546bf215546Sopenharmony_ci
1547bf215546Sopenharmony_ci      if (ctx.saturate) {
1548bf215546Sopenharmony_ci         n = info->opcode_count[TGSI_OPCODE_TEX] +
1549bf215546Sopenharmony_ci            info->opcode_count[TGSI_OPCODE_TXP] +
1550bf215546Sopenharmony_ci            info->opcode_count[TGSI_OPCODE_TXB] +
1551bf215546Sopenharmony_ci            info->opcode_count[TGSI_OPCODE_TXB2] +
1552bf215546Sopenharmony_ci            info->opcode_count[TGSI_OPCODE_TXL];
1553bf215546Sopenharmony_ci      } else if (config->lower_TXP) {
1554bf215546Sopenharmony_ci          n = info->opcode_count[TGSI_OPCODE_TXP];
1555bf215546Sopenharmony_ci      }
1556bf215546Sopenharmony_ci
1557bf215546Sopenharmony_ci      newlen += SAMP_GROW * n;
1558bf215546Sopenharmony_ci      numtmp = MAX2(numtmp, SAMP_TMP);
1559bf215546Sopenharmony_ci   }
1560bf215546Sopenharmony_ci
1561bf215546Sopenharmony_ci   /* specifically don't include two_side_colors temps in the count: */
1562bf215546Sopenharmony_ci   ctx.numtmp = numtmp;
1563bf215546Sopenharmony_ci
1564bf215546Sopenharmony_ci   if (ctx.two_side_colors) {
1565bf215546Sopenharmony_ci      newlen += TWOSIDE_GROW(ctx.two_side_colors);
1566bf215546Sopenharmony_ci      /* note: we permanently consume temp regs, re-writing references
1567bf215546Sopenharmony_ci       * to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1568bf215546Sopenharmony_ci       * instruction that selects which varying to use):
1569bf215546Sopenharmony_ci       */
1570bf215546Sopenharmony_ci      numtmp += ctx.two_side_colors;
1571bf215546Sopenharmony_ci   }
1572bf215546Sopenharmony_ci
1573bf215546Sopenharmony_ci   newlen += 2 * numtmp;
1574bf215546Sopenharmony_ci   newlen += 5;        /* immediate */
1575bf215546Sopenharmony_ci
1576bf215546Sopenharmony_ci   newtoks = tgsi_transform_shader(tokens, newlen, &ctx.base);
1577bf215546Sopenharmony_ci   if (!newtoks)
1578bf215546Sopenharmony_ci      return NULL;
1579bf215546Sopenharmony_ci
1580bf215546Sopenharmony_ci   tgsi_scan_shader(newtoks, info);
1581bf215546Sopenharmony_ci
1582bf215546Sopenharmony_ci#if 0  /* debug */
1583bf215546Sopenharmony_ci   _debug_printf("AFTER:");
1584bf215546Sopenharmony_ci   tgsi_dump(newtoks, 0);
1585bf215546Sopenharmony_ci#endif
1586bf215546Sopenharmony_ci
1587bf215546Sopenharmony_ci   return newtoks;
1588bf215546Sopenharmony_ci}
1589