1bf215546Sopenharmony_ci
2bf215546Sopenharmony_ci/* FF is big and ugly so feel free to write lines as long as you like.
3bf215546Sopenharmony_ci * Aieeeeeeeee !
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Let me make that clearer:
6bf215546Sopenharmony_ci * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7bf215546Sopenharmony_ci */
8bf215546Sopenharmony_ci
9bf215546Sopenharmony_ci#include "device9.h"
10bf215546Sopenharmony_ci#include "basetexture9.h"
11bf215546Sopenharmony_ci#include "vertexdeclaration9.h"
12bf215546Sopenharmony_ci#include "vertexshader9.h"
13bf215546Sopenharmony_ci#include "pixelshader9.h"
14bf215546Sopenharmony_ci#include "nine_ff.h"
15bf215546Sopenharmony_ci#include "nine_defines.h"
16bf215546Sopenharmony_ci#include "nine_helpers.h"
17bf215546Sopenharmony_ci#include "nine_pipe.h"
18bf215546Sopenharmony_ci#include "nine_dump.h"
19bf215546Sopenharmony_ci
20bf215546Sopenharmony_ci#include "pipe/p_context.h"
21bf215546Sopenharmony_ci#include "tgsi/tgsi_ureg.h"
22bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h"
23bf215546Sopenharmony_ci#include "util/u_box.h"
24bf215546Sopenharmony_ci#include "util/u_hash_table.h"
25bf215546Sopenharmony_ci#include "util/u_upload_mgr.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#define DBG_CHANNEL DBG_FF
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#define NINE_FF_NUM_VS_CONST 196
30bf215546Sopenharmony_ci#define NINE_FF_NUM_PS_CONST 24
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistruct fvec4
33bf215546Sopenharmony_ci{
34bf215546Sopenharmony_ci    float x, y, z, w;
35bf215546Sopenharmony_ci};
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_cistruct nine_ff_vs_key
38bf215546Sopenharmony_ci{
39bf215546Sopenharmony_ci    union {
40bf215546Sopenharmony_ci        struct {
41bf215546Sopenharmony_ci            uint32_t position_t : 1;
42bf215546Sopenharmony_ci            uint32_t lighting   : 1;
43bf215546Sopenharmony_ci            uint32_t darkness   : 1; /* lighting enabled but no active lights */
44bf215546Sopenharmony_ci            uint32_t localviewer : 1;
45bf215546Sopenharmony_ci            uint32_t vertexpointsize : 1;
46bf215546Sopenharmony_ci            uint32_t pointscale : 1;
47bf215546Sopenharmony_ci            uint32_t vertexblend : 3;
48bf215546Sopenharmony_ci            uint32_t vertexblend_indexed : 1;
49bf215546Sopenharmony_ci            uint32_t vertextween : 1;
50bf215546Sopenharmony_ci            uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
51bf215546Sopenharmony_ci            uint32_t mtl_ambient : 2;
52bf215546Sopenharmony_ci            uint32_t mtl_specular : 2;
53bf215546Sopenharmony_ci            uint32_t mtl_emissive : 2;
54bf215546Sopenharmony_ci            uint32_t fog_mode : 2;
55bf215546Sopenharmony_ci            uint32_t fog_range : 1;
56bf215546Sopenharmony_ci            uint32_t color0in_one : 1;
57bf215546Sopenharmony_ci            uint32_t color1in_zero : 1;
58bf215546Sopenharmony_ci            uint32_t has_normal : 1;
59bf215546Sopenharmony_ci            uint32_t fog : 1;
60bf215546Sopenharmony_ci            uint32_t normalizenormals : 1;
61bf215546Sopenharmony_ci            uint32_t ucp : 1;
62bf215546Sopenharmony_ci            uint32_t pad1 : 4;
63bf215546Sopenharmony_ci            uint32_t tc_dim_input: 16; /* 8 * 2 bits */
64bf215546Sopenharmony_ci            uint32_t pad2 : 16;
65bf215546Sopenharmony_ci            uint32_t tc_dim_output: 24; /* 8 * 3 bits */
66bf215546Sopenharmony_ci            uint32_t pad3 : 8;
67bf215546Sopenharmony_ci            uint32_t tc_gen : 24; /* 8 * 3 bits */
68bf215546Sopenharmony_ci            uint32_t pad4 : 8;
69bf215546Sopenharmony_ci            uint32_t tc_idx : 24;
70bf215546Sopenharmony_ci            uint32_t pad5 : 8;
71bf215546Sopenharmony_ci            uint32_t passthrough;
72bf215546Sopenharmony_ci        };
73bf215546Sopenharmony_ci        uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
74bf215546Sopenharmony_ci        uint32_t value32[6];
75bf215546Sopenharmony_ci    };
76bf215546Sopenharmony_ci};
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci/* Texture stage state:
79bf215546Sopenharmony_ci *
80bf215546Sopenharmony_ci * COLOROP       D3DTOP 5 bit
81bf215546Sopenharmony_ci * ALPHAOP       D3DTOP 5 bit
82bf215546Sopenharmony_ci * COLORARG0     D3DTA  3 bit
83bf215546Sopenharmony_ci * COLORARG1     D3DTA  3 bit
84bf215546Sopenharmony_ci * COLORARG2     D3DTA  3 bit
85bf215546Sopenharmony_ci * ALPHAARG0     D3DTA  3 bit
86bf215546Sopenharmony_ci * ALPHAARG1     D3DTA  3 bit
87bf215546Sopenharmony_ci * ALPHAARG2     D3DTA  3 bit
88bf215546Sopenharmony_ci * RESULTARG     D3DTA  1 bit (CURRENT:0 or TEMP:1)
89bf215546Sopenharmony_ci * TEXCOORDINDEX 0 - 7  3 bit
90bf215546Sopenharmony_ci * ===========================
91bf215546Sopenharmony_ci *                     32 bit per stage
92bf215546Sopenharmony_ci */
93bf215546Sopenharmony_cistruct nine_ff_ps_key
94bf215546Sopenharmony_ci{
95bf215546Sopenharmony_ci    union {
96bf215546Sopenharmony_ci        struct {
97bf215546Sopenharmony_ci            struct {
98bf215546Sopenharmony_ci                uint32_t colorop   : 5;
99bf215546Sopenharmony_ci                uint32_t alphaop   : 5;
100bf215546Sopenharmony_ci                uint32_t colorarg0 : 3;
101bf215546Sopenharmony_ci                uint32_t colorarg1 : 3;
102bf215546Sopenharmony_ci                uint32_t colorarg2 : 3;
103bf215546Sopenharmony_ci                uint32_t alphaarg0 : 3;
104bf215546Sopenharmony_ci                uint32_t alphaarg1 : 3;
105bf215546Sopenharmony_ci                uint32_t alphaarg2 : 3;
106bf215546Sopenharmony_ci                uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
107bf215546Sopenharmony_ci                uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
108bf215546Sopenharmony_ci                uint32_t pad       : 1;
109bf215546Sopenharmony_ci                /* that's 32 bit exactly */
110bf215546Sopenharmony_ci            } ts[8];
111bf215546Sopenharmony_ci            uint32_t projected : 16;
112bf215546Sopenharmony_ci            uint32_t fog : 1; /* for vFog coming from VS */
113bf215546Sopenharmony_ci            uint32_t fog_mode : 2;
114bf215546Sopenharmony_ci            uint32_t fog_source : 1; /* 0: Z, 1: W */
115bf215546Sopenharmony_ci            uint32_t specular : 1;
116bf215546Sopenharmony_ci            uint32_t pad1 : 11; /* 9 32-bit words with this */
117bf215546Sopenharmony_ci            uint8_t colorarg_b4[3];
118bf215546Sopenharmony_ci            uint8_t colorarg_b5[3];
119bf215546Sopenharmony_ci            uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
120bf215546Sopenharmony_ci            uint8_t pad2[3];
121bf215546Sopenharmony_ci        };
122bf215546Sopenharmony_ci        uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
123bf215546Sopenharmony_ci        uint32_t value32[12];
124bf215546Sopenharmony_ci    };
125bf215546Sopenharmony_ci};
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_cistatic uint32_t nine_ff_vs_key_hash(const void *key)
128bf215546Sopenharmony_ci{
129bf215546Sopenharmony_ci    const struct nine_ff_vs_key *vs = key;
130bf215546Sopenharmony_ci    unsigned i;
131bf215546Sopenharmony_ci    uint32_t hash = vs->value32[0];
132bf215546Sopenharmony_ci    for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
133bf215546Sopenharmony_ci        hash ^= vs->value32[i];
134bf215546Sopenharmony_ci    return hash;
135bf215546Sopenharmony_ci}
136bf215546Sopenharmony_cistatic bool nine_ff_vs_key_comp(const void *key1, const void *key2)
137bf215546Sopenharmony_ci{
138bf215546Sopenharmony_ci    struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
139bf215546Sopenharmony_ci    struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci    return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
142bf215546Sopenharmony_ci}
143bf215546Sopenharmony_cistatic uint32_t nine_ff_ps_key_hash(const void *key)
144bf215546Sopenharmony_ci{
145bf215546Sopenharmony_ci    const struct nine_ff_ps_key *ps = key;
146bf215546Sopenharmony_ci    unsigned i;
147bf215546Sopenharmony_ci    uint32_t hash = ps->value32[0];
148bf215546Sopenharmony_ci    for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
149bf215546Sopenharmony_ci        hash ^= ps->value32[i];
150bf215546Sopenharmony_ci    return hash;
151bf215546Sopenharmony_ci}
152bf215546Sopenharmony_cistatic bool nine_ff_ps_key_comp(const void *key1, const void *key2)
153bf215546Sopenharmony_ci{
154bf215546Sopenharmony_ci    struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
155bf215546Sopenharmony_ci    struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci    return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
158bf215546Sopenharmony_ci}
159bf215546Sopenharmony_cistatic uint32_t nine_ff_fvf_key_hash(const void *key)
160bf215546Sopenharmony_ci{
161bf215546Sopenharmony_ci    return *(DWORD *)key;
162bf215546Sopenharmony_ci}
163bf215546Sopenharmony_cistatic bool nine_ff_fvf_key_comp(const void *key1, const void *key2)
164bf215546Sopenharmony_ci{
165bf215546Sopenharmony_ci    return *(DWORD *)key1 == *(DWORD *)key2;
166bf215546Sopenharmony_ci}
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_cistatic void nine_ff_prune_vs(struct NineDevice9 *);
169bf215546Sopenharmony_cistatic void nine_ff_prune_ps(struct NineDevice9 *);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_cistatic void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
172bf215546Sopenharmony_ci{
173bf215546Sopenharmony_ci    if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
174bf215546Sopenharmony_ci        const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL);
175bf215546Sopenharmony_ci        tgsi_dump(toks, 0);
176bf215546Sopenharmony_ci        ureg_free_tokens(toks);
177bf215546Sopenharmony_ci    }
178bf215546Sopenharmony_ci}
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci#define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
181bf215546Sopenharmony_ci#define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
182bf215546Sopenharmony_ci#define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
183bf215546Sopenharmony_ci#define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci#define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
186bf215546Sopenharmony_ci#define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
187bf215546Sopenharmony_ci#define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
188bf215546Sopenharmony_ci#define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci#define _XYZW(r) (r)
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci/* AL should contain base address of lights table. */
193bf215546Sopenharmony_ci#define LIGHT_CONST(i)                                                \
194bf215546Sopenharmony_ci    ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci#define MATERIAL_CONST(i) \
197bf215546Sopenharmony_ci    ureg_DECL_constant(ureg, 19 + (i))
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_ci#define _CONST(n) ureg_DECL_constant(ureg, n)
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci/* VS FF constants layout:
202bf215546Sopenharmony_ci *
203bf215546Sopenharmony_ci * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
204bf215546Sopenharmony_ci * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
205bf215546Sopenharmony_ci * CONST[ 8..11] D3DTS_PROJECTION
206bf215546Sopenharmony_ci * CONST[12..15] D3DTS_VIEW^(-1)
207bf215546Sopenharmony_ci * CONST[16..18] Normal matrix
208bf215546Sopenharmony_ci *
209bf215546Sopenharmony_ci * CONST[19].xyz  MATERIAL.Emissive + Material.Ambient * RS.Ambient
210bf215546Sopenharmony_ci * CONST[20]      MATERIAL.Diffuse
211bf215546Sopenharmony_ci * CONST[21]      MATERIAL.Ambient
212bf215546Sopenharmony_ci * CONST[22]      MATERIAL.Specular
213bf215546Sopenharmony_ci * CONST[23].x___ MATERIAL.Power
214bf215546Sopenharmony_ci * CONST[24]      MATERIAL.Emissive
215bf215546Sopenharmony_ci * CONST[25]      RS.Ambient
216bf215546Sopenharmony_ci *
217bf215546Sopenharmony_ci * CONST[26].x___ RS.PointSizeMin
218bf215546Sopenharmony_ci * CONST[26]._y__ RS.PointSizeMax
219bf215546Sopenharmony_ci * CONST[26].__z_ RS.PointSize
220bf215546Sopenharmony_ci * CONST[26].___w RS.PointScaleA
221bf215546Sopenharmony_ci * CONST[27].x___ RS.PointScaleB
222bf215546Sopenharmony_ci * CONST[27]._y__ RS.PointScaleC
223bf215546Sopenharmony_ci *
224bf215546Sopenharmony_ci * CONST[28].x___ RS.FogEnd
225bf215546Sopenharmony_ci * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
226bf215546Sopenharmony_ci * CONST[28].__z_ RS.FogDensity
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_ci * CONST[30].x___ TWEENFACTOR
229bf215546Sopenharmony_ci *
230bf215546Sopenharmony_ci * CONST[32].x___ LIGHT[0].Type
231bf215546Sopenharmony_ci * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
232bf215546Sopenharmony_ci * CONST[33]      LIGHT[0].Diffuse
233bf215546Sopenharmony_ci * CONST[34]      LIGHT[0].Specular
234bf215546Sopenharmony_ci * CONST[35]      LIGHT[0].Ambient
235bf215546Sopenharmony_ci * CONST[36].xyz_ LIGHT[0].Position
236bf215546Sopenharmony_ci * CONST[36].___w LIGHT[0].Range
237bf215546Sopenharmony_ci * CONST[37].xyz_ LIGHT[0].Direction
238bf215546Sopenharmony_ci * CONST[37].___w LIGHT[0].Falloff
239bf215546Sopenharmony_ci * CONST[38].x___ cos(LIGHT[0].Theta / 2)
240bf215546Sopenharmony_ci * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
241bf215546Sopenharmony_ci * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
242bf215546Sopenharmony_ci * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
243bf215546Sopenharmony_ci * CONST[39].___w 1 if this is the last active light, 0 if not
244bf215546Sopenharmony_ci * CONST[40]      LIGHT[1]
245bf215546Sopenharmony_ci * CONST[48]      LIGHT[2]
246bf215546Sopenharmony_ci * CONST[56]      LIGHT[3]
247bf215546Sopenharmony_ci * CONST[64]      LIGHT[4]
248bf215546Sopenharmony_ci * CONST[72]      LIGHT[5]
249bf215546Sopenharmony_ci * CONST[80]      LIGHT[6]
250bf215546Sopenharmony_ci * CONST[88]      LIGHT[7]
251bf215546Sopenharmony_ci * NOTE: no lighting code is generated if there are no active lights
252bf215546Sopenharmony_ci *
253bf215546Sopenharmony_ci * CONST[100].x___ Viewport 2/width
254bf215546Sopenharmony_ci * CONST[100]._y__ Viewport 2/height
255bf215546Sopenharmony_ci * CONST[100].__z_ Viewport 1/(zmax - zmin)
256bf215546Sopenharmony_ci * CONST[100].___w Viewport width
257bf215546Sopenharmony_ci * CONST[101].x___ Viewport x0
258bf215546Sopenharmony_ci * CONST[101]._y__ Viewport y0
259bf215546Sopenharmony_ci * CONST[101].__z_ Viewport z0
260bf215546Sopenharmony_ci *
261bf215546Sopenharmony_ci * CONST[128..131] D3DTS_TEXTURE0
262bf215546Sopenharmony_ci * CONST[132..135] D3DTS_TEXTURE1
263bf215546Sopenharmony_ci * CONST[136..139] D3DTS_TEXTURE2
264bf215546Sopenharmony_ci * CONST[140..143] D3DTS_TEXTURE3
265bf215546Sopenharmony_ci * CONST[144..147] D3DTS_TEXTURE4
266bf215546Sopenharmony_ci * CONST[148..151] D3DTS_TEXTURE5
267bf215546Sopenharmony_ci * CONST[152..155] D3DTS_TEXTURE6
268bf215546Sopenharmony_ci * CONST[156..159] D3DTS_TEXTURE7
269bf215546Sopenharmony_ci *
270bf215546Sopenharmony_ci * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
271bf215546Sopenharmony_ci * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
272bf215546Sopenharmony_ci * ...
273bf215546Sopenharmony_ci * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
274bf215546Sopenharmony_ci */
275bf215546Sopenharmony_cistruct vs_build_ctx
276bf215546Sopenharmony_ci{
277bf215546Sopenharmony_ci    struct ureg_program *ureg;
278bf215546Sopenharmony_ci    const struct nine_ff_vs_key *key;
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_ci    uint16_t input[PIPE_MAX_ATTRIBS];
281bf215546Sopenharmony_ci    unsigned num_inputs;
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci    struct ureg_src aVtx;
284bf215546Sopenharmony_ci    struct ureg_src aNrm;
285bf215546Sopenharmony_ci    struct ureg_src aCol[2];
286bf215546Sopenharmony_ci    struct ureg_src aTex[8];
287bf215546Sopenharmony_ci    struct ureg_src aPsz;
288bf215546Sopenharmony_ci    struct ureg_src aInd;
289bf215546Sopenharmony_ci    struct ureg_src aWgt;
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci    struct ureg_src aVtx1; /* tweening */
292bf215546Sopenharmony_ci    struct ureg_src aNrm1;
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci    struct ureg_src mtlA;
295bf215546Sopenharmony_ci    struct ureg_src mtlD;
296bf215546Sopenharmony_ci    struct ureg_src mtlS;
297bf215546Sopenharmony_ci    struct ureg_src mtlE;
298bf215546Sopenharmony_ci};
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_cistatic inline unsigned
301bf215546Sopenharmony_ciget_texcoord_sn(struct pipe_screen *screen)
302bf215546Sopenharmony_ci{
303bf215546Sopenharmony_ci    if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
304bf215546Sopenharmony_ci        return TGSI_SEMANTIC_TEXCOORD;
305bf215546Sopenharmony_ci    return TGSI_SEMANTIC_GENERIC;
306bf215546Sopenharmony_ci}
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_cistatic inline struct ureg_src
309bf215546Sopenharmony_cibuild_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
310bf215546Sopenharmony_ci{
311bf215546Sopenharmony_ci    const unsigned i = vs->num_inputs++;
312bf215546Sopenharmony_ci    assert(i < PIPE_MAX_ATTRIBS);
313bf215546Sopenharmony_ci    vs->input[i] = ndecl;
314bf215546Sopenharmony_ci    return ureg_DECL_vs_input(vs->ureg, i);
315bf215546Sopenharmony_ci}
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci/* NOTE: dst may alias src */
318bf215546Sopenharmony_cistatic inline void
319bf215546Sopenharmony_ciureg_normalize3(struct ureg_program *ureg,
320bf215546Sopenharmony_ci                struct ureg_dst dst, struct ureg_src src)
321bf215546Sopenharmony_ci{
322bf215546Sopenharmony_ci    struct ureg_dst tmp = ureg_DECL_temporary(ureg);
323bf215546Sopenharmony_ci    struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci    ureg_DP3(ureg, tmp_x, src, src);
326bf215546Sopenharmony_ci    ureg_RSQ(ureg, tmp_x, _X(tmp));
327bf215546Sopenharmony_ci    ureg_MUL(ureg, dst, src, _X(tmp));
328bf215546Sopenharmony_ci    ureg_release_temporary(ureg, tmp);
329bf215546Sopenharmony_ci}
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_cistatic void *
332bf215546Sopenharmony_cinine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
333bf215546Sopenharmony_ci{
334bf215546Sopenharmony_ci    const struct nine_ff_vs_key *key = vs->key;
335bf215546Sopenharmony_ci    struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
336bf215546Sopenharmony_ci    struct ureg_dst oPos, oCol[2], oPsz, oFog;
337bf215546Sopenharmony_ci    struct ureg_dst AR;
338bf215546Sopenharmony_ci    unsigned i, c;
339bf215546Sopenharmony_ci    unsigned label[32], l = 0;
340bf215546Sopenharmony_ci    boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
341bf215546Sopenharmony_ci    boolean has_aNrm;
342bf215546Sopenharmony_ci    boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp;
343bf215546Sopenharmony_ci    const unsigned texcoord_sn = get_texcoord_sn(device->screen);
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_ci    vs->ureg = ureg;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci    /* Check which inputs we should transform. */
348bf215546Sopenharmony_ci    for (i = 0; i < 8 * 3; i += 3) {
349bf215546Sopenharmony_ci        switch ((key->tc_gen >> i) & 0x7) {
350bf215546Sopenharmony_ci        case NINED3DTSS_TCI_CAMERASPACENORMAL:
351bf215546Sopenharmony_ci            need_aNrm = TRUE;
352bf215546Sopenharmony_ci            break;
353bf215546Sopenharmony_ci        case NINED3DTSS_TCI_CAMERASPACEPOSITION:
354bf215546Sopenharmony_ci            need_aVtx = TRUE;
355bf215546Sopenharmony_ci            break;
356bf215546Sopenharmony_ci        case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
357bf215546Sopenharmony_ci            need_aVtx = need_aNrm = TRUE;
358bf215546Sopenharmony_ci            break;
359bf215546Sopenharmony_ci        case NINED3DTSS_TCI_SPHEREMAP:
360bf215546Sopenharmony_ci            need_aVtx = need_aNrm = TRUE;
361bf215546Sopenharmony_ci            break;
362bf215546Sopenharmony_ci        default:
363bf215546Sopenharmony_ci            break;
364bf215546Sopenharmony_ci        }
365bf215546Sopenharmony_ci    }
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci    has_aNrm = need_aNrm && key->has_normal;
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci    /* Declare and record used inputs (needed for linkage with vertex format):
370bf215546Sopenharmony_ci     * (texture coordinates handled later)
371bf215546Sopenharmony_ci     */
372bf215546Sopenharmony_ci    vs->aVtx = build_vs_add_input(vs,
373bf215546Sopenharmony_ci        key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci    vs->aNrm = ureg_imm1f(ureg, 0.0f);
376bf215546Sopenharmony_ci    if (has_aNrm)
377bf215546Sopenharmony_ci        vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci    vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
380bf215546Sopenharmony_ci    vs->aCol[1] = ureg_imm1f(ureg, 0.0f);
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci    if (key->lighting || key->darkness) {
383bf215546Sopenharmony_ci        const unsigned mask = key->mtl_diffuse | key->mtl_specular |
384bf215546Sopenharmony_ci                              key->mtl_ambient | key->mtl_emissive;
385bf215546Sopenharmony_ci        if ((mask & 0x1) && !key->color0in_one)
386bf215546Sopenharmony_ci            vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
387bf215546Sopenharmony_ci        if ((mask & 0x2) && !key->color1in_zero)
388bf215546Sopenharmony_ci            vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci        vs->mtlD = MATERIAL_CONST(1);
391bf215546Sopenharmony_ci        vs->mtlA = MATERIAL_CONST(2);
392bf215546Sopenharmony_ci        vs->mtlS = MATERIAL_CONST(3);
393bf215546Sopenharmony_ci        vs->mtlE = MATERIAL_CONST(5);
394bf215546Sopenharmony_ci        if (key->mtl_diffuse  == 1) vs->mtlD = vs->aCol[0]; else
395bf215546Sopenharmony_ci        if (key->mtl_diffuse  == 2) vs->mtlD = vs->aCol[1];
396bf215546Sopenharmony_ci        if (key->mtl_ambient  == 1) vs->mtlA = vs->aCol[0]; else
397bf215546Sopenharmony_ci        if (key->mtl_ambient  == 2) vs->mtlA = vs->aCol[1];
398bf215546Sopenharmony_ci        if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
399bf215546Sopenharmony_ci        if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
400bf215546Sopenharmony_ci        if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
401bf215546Sopenharmony_ci        if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
402bf215546Sopenharmony_ci    } else {
403bf215546Sopenharmony_ci        if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
404bf215546Sopenharmony_ci        if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
405bf215546Sopenharmony_ci    }
406bf215546Sopenharmony_ci
407bf215546Sopenharmony_ci    if (key->vertexpointsize)
408bf215546Sopenharmony_ci        vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
409bf215546Sopenharmony_ci
410bf215546Sopenharmony_ci    if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
411bf215546Sopenharmony_ci        vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
412bf215546Sopenharmony_ci    if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
413bf215546Sopenharmony_ci        vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
414bf215546Sopenharmony_ci    if (key->vertextween) {
415bf215546Sopenharmony_ci        vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
416bf215546Sopenharmony_ci        vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
417bf215546Sopenharmony_ci    }
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_ci    /* Declare outputs:
420bf215546Sopenharmony_ci     */
421bf215546Sopenharmony_ci    oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
422bf215546Sopenharmony_ci    oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
423bf215546Sopenharmony_ci    oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
424bf215546Sopenharmony_ci    if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
425bf215546Sopenharmony_ci        oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16);
426bf215546Sopenharmony_ci        oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
427bf215546Sopenharmony_ci    }
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci    if (key->vertexpointsize || key->pointscale) {
430bf215546Sopenharmony_ci        oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
431bf215546Sopenharmony_ci                                       TGSI_WRITEMASK_X, 0, 1);
432bf215546Sopenharmony_ci        oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
433bf215546Sopenharmony_ci    }
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci    if (key->lighting || key->vertexblend)
436bf215546Sopenharmony_ci        AR = ureg_DECL_address(ureg);
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_ci    /* === Vertex transformation / vertex blending:
439bf215546Sopenharmony_ci     */
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci    if (key->position_t) {
442bf215546Sopenharmony_ci        if (device->driver_caps.window_space_position_support) {
443bf215546Sopenharmony_ci            ureg_MOV(ureg, oPos, vs->aVtx);
444bf215546Sopenharmony_ci        } else {
445bf215546Sopenharmony_ci            struct ureg_dst tmp = ureg_DECL_temporary(ureg);
446bf215546Sopenharmony_ci            /* vs->aVtx contains the coordinates buffer wise.
447bf215546Sopenharmony_ci            * later in the pipeline, clipping, viewport and division
448bf215546Sopenharmony_ci            * by w (rhw = 1/w) are going to be applied, so do the reverse
449bf215546Sopenharmony_ci            * of these transformations (except clipping) to have the good
450bf215546Sopenharmony_ci            * position at the end.*/
451bf215546Sopenharmony_ci            ureg_MOV(ureg, tmp, vs->aVtx);
452bf215546Sopenharmony_ci            /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
453bf215546Sopenharmony_ci            ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
454bf215546Sopenharmony_ci            ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
455bf215546Sopenharmony_ci            ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
456bf215546Sopenharmony_ci            /* Y needs to be reversed */
457bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
458bf215546Sopenharmony_ci            /* Replace w by 1 if it equals to 0 */
459bf215546Sopenharmony_ci            ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W))),
460bf215546Sopenharmony_ci                     ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W), ureg_imm1f(ureg, 1.0f));
461bf215546Sopenharmony_ci            /* inverse rhw */
462bf215546Sopenharmony_ci            ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
463bf215546Sopenharmony_ci            /* multiply X, Y, Z by w */
464bf215546Sopenharmony_ci            ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
465bf215546Sopenharmony_ci            ureg_MOV(ureg, oPos, ureg_src(tmp));
466bf215546Sopenharmony_ci            ureg_release_temporary(ureg, tmp);
467bf215546Sopenharmony_ci        }
468bf215546Sopenharmony_ci    } else if (key->vertexblend) {
469bf215546Sopenharmony_ci        struct ureg_dst tmp = ureg_DECL_temporary(ureg);
470bf215546Sopenharmony_ci        struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
471bf215546Sopenharmony_ci        struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
472bf215546Sopenharmony_ci        struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
473bf215546Sopenharmony_ci        struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
474bf215546Sopenharmony_ci        struct ureg_src cWM[4];
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci        for (i = 160; i <= 195; ++i)
477bf215546Sopenharmony_ci            ureg_DECL_constant(ureg, i);
478bf215546Sopenharmony_ci
479bf215546Sopenharmony_ci        /* translate world matrix index to constant file index */
480bf215546Sopenharmony_ci        if (key->vertexblend_indexed) {
481bf215546Sopenharmony_ci            ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
482bf215546Sopenharmony_ci            ureg_ARL(ureg, AR, ureg_src(tmp));
483bf215546Sopenharmony_ci        }
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci        ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
486bf215546Sopenharmony_ci        ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
487bf215546Sopenharmony_ci        ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci        for (i = 0; i < key->vertexblend; ++i) {
490bf215546Sopenharmony_ci            for (c = 0; c < 4; ++c) {
491bf215546Sopenharmony_ci                cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0);
492bf215546Sopenharmony_ci                if (key->vertexblend_indexed)
493bf215546Sopenharmony_ci                    cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
494bf215546Sopenharmony_ci            }
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci            /* multiply by WORLD(index) */
497bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
498bf215546Sopenharmony_ci            ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
499bf215546Sopenharmony_ci            ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
500bf215546Sopenharmony_ci            ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_ci            if (has_aNrm) {
503bf215546Sopenharmony_ci                /* Note: the spec says the transpose of the inverse of the
504bf215546Sopenharmony_ci                 * WorldView matrices should be used, but all tests show
505bf215546Sopenharmony_ci                 * otherwise.
506bf215546Sopenharmony_ci                 * Only case unknown: D3DVBF_0WEIGHTS */
507bf215546Sopenharmony_ci                ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
508bf215546Sopenharmony_ci                ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
509bf215546Sopenharmony_ci                ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
510bf215546Sopenharmony_ci            }
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci            if (i < (key->vertexblend - 1)) {
513bf215546Sopenharmony_ci                /* accumulate weighted position value */
514bf215546Sopenharmony_ci                ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
515bf215546Sopenharmony_ci                if (has_aNrm)
516bf215546Sopenharmony_ci                    ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
517bf215546Sopenharmony_ci                /* subtract weighted position value for last value */
518bf215546Sopenharmony_ci                ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
519bf215546Sopenharmony_ci            }
520bf215546Sopenharmony_ci        }
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci        /* the last weighted position is always 1 - sum_of_previous_weights */
523bf215546Sopenharmony_ci        ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
524bf215546Sopenharmony_ci        if (has_aNrm)
525bf215546Sopenharmony_ci            ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
526bf215546Sopenharmony_ci
527bf215546Sopenharmony_ci        /* multiply by VIEW_PROJ */
528bf215546Sopenharmony_ci        ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
529bf215546Sopenharmony_ci        ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9),  ureg_src(tmp));
530bf215546Sopenharmony_ci        ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
531bf215546Sopenharmony_ci        ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
532bf215546Sopenharmony_ci
533bf215546Sopenharmony_ci        if (need_aVtx)
534bf215546Sopenharmony_ci            vs->aVtx = ureg_src(aVtx_dst);
535bf215546Sopenharmony_ci
536bf215546Sopenharmony_ci        ureg_release_temporary(ureg, tmp);
537bf215546Sopenharmony_ci        ureg_release_temporary(ureg, tmp2);
538bf215546Sopenharmony_ci        ureg_release_temporary(ureg, sum_blendweights);
539bf215546Sopenharmony_ci        if (!need_aVtx)
540bf215546Sopenharmony_ci            ureg_release_temporary(ureg, aVtx_dst);
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci        if (has_aNrm) {
543bf215546Sopenharmony_ci            if (key->normalizenormals)
544bf215546Sopenharmony_ci               ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
545bf215546Sopenharmony_ci            vs->aNrm = ureg_src(aNrm_dst);
546bf215546Sopenharmony_ci        } else
547bf215546Sopenharmony_ci            ureg_release_temporary(ureg, aNrm_dst);
548bf215546Sopenharmony_ci    } else {
549bf215546Sopenharmony_ci        struct ureg_dst tmp = ureg_DECL_temporary(ureg);
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci        if (key->vertextween) {
552bf215546Sopenharmony_ci            struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
553bf215546Sopenharmony_ci            ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);
554bf215546Sopenharmony_ci            vs->aVtx = ureg_src(aVtx_dst);
555bf215546Sopenharmony_ci            if (has_aNrm) {
556bf215546Sopenharmony_ci                struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
557bf215546Sopenharmony_ci                ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);
558bf215546Sopenharmony_ci                vs->aNrm = ureg_src(aNrm_dst);
559bf215546Sopenharmony_ci            }
560bf215546Sopenharmony_ci        }
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci        /* position = vertex * WORLD_VIEW_PROJ */
563bf215546Sopenharmony_ci        ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
564bf215546Sopenharmony_ci        ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
565bf215546Sopenharmony_ci        ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
566bf215546Sopenharmony_ci        ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
567bf215546Sopenharmony_ci        ureg_release_temporary(ureg, tmp);
568bf215546Sopenharmony_ci
569bf215546Sopenharmony_ci        if (need_aVtx) {
570bf215546Sopenharmony_ci            struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
571bf215546Sopenharmony_ci            ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
572bf215546Sopenharmony_ci            ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
573bf215546Sopenharmony_ci            ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
574bf215546Sopenharmony_ci            ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
575bf215546Sopenharmony_ci            vs->aVtx = ureg_src(aVtx_dst);
576bf215546Sopenharmony_ci        }
577bf215546Sopenharmony_ci        if (has_aNrm) {
578bf215546Sopenharmony_ci            struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
579bf215546Sopenharmony_ci            ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
580bf215546Sopenharmony_ci            ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
581bf215546Sopenharmony_ci            ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
582bf215546Sopenharmony_ci            if (key->normalizenormals)
583bf215546Sopenharmony_ci               ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
584bf215546Sopenharmony_ci            vs->aNrm = ureg_src(aNrm_dst);
585bf215546Sopenharmony_ci        }
586bf215546Sopenharmony_ci    }
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci    /* === Process point size:
589bf215546Sopenharmony_ci     */
590bf215546Sopenharmony_ci    if (key->vertexpointsize || key->pointscale) {
591bf215546Sopenharmony_ci        struct ureg_dst tmp = ureg_DECL_temporary(ureg);
592bf215546Sopenharmony_ci        struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
593bf215546Sopenharmony_ci        struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
594bf215546Sopenharmony_ci        struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
595bf215546Sopenharmony_ci        if (key->vertexpointsize) {
596bf215546Sopenharmony_ci            struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
597bf215546Sopenharmony_ci            ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
598bf215546Sopenharmony_ci            ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
599bf215546Sopenharmony_ci        } else {
600bf215546Sopenharmony_ci            struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
601bf215546Sopenharmony_ci            ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
602bf215546Sopenharmony_ci        }
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci        if (key->pointscale) {
605bf215546Sopenharmony_ci            struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
606bf215546Sopenharmony_ci            struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci            ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
609bf215546Sopenharmony_ci            ureg_RSQ(ureg, tmp_y, _X(tmp));
610bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
611bf215546Sopenharmony_ci            ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
612bf215546Sopenharmony_ci            ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
613bf215546Sopenharmony_ci            ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
614bf215546Sopenharmony_ci            ureg_RSQ(ureg, tmp_x, _X(tmp));
615bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
616bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
617bf215546Sopenharmony_ci            ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
618bf215546Sopenharmony_ci            ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
619bf215546Sopenharmony_ci        }
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci        ureg_MOV(ureg, oPsz, _Z(tmp));
622bf215546Sopenharmony_ci        ureg_release_temporary(ureg, tmp);
623bf215546Sopenharmony_ci    }
624bf215546Sopenharmony_ci
625bf215546Sopenharmony_ci    for (i = 0; i < 8; ++i) {
626bf215546Sopenharmony_ci        struct ureg_dst tmp, tmp_x, tmp2;
627bf215546Sopenharmony_ci        struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;
628bf215546Sopenharmony_ci        unsigned c, writemask;
629bf215546Sopenharmony_ci        const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
630bf215546Sopenharmony_ci        const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
631bf215546Sopenharmony_ci        unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
632bf215546Sopenharmony_ci        const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci        /* No texture output of index s */
635bf215546Sopenharmony_ci        if (tci == NINED3DTSS_TCI_DISABLE)
636bf215546Sopenharmony_ci            continue;
637bf215546Sopenharmony_ci        oTex = ureg_DECL_output(ureg, texcoord_sn, i);
638bf215546Sopenharmony_ci        tmp = ureg_DECL_temporary(ureg);
639bf215546Sopenharmony_ci        tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
640bf215546Sopenharmony_ci        input_coord = ureg_DECL_temporary(ureg);
641bf215546Sopenharmony_ci        transformed = ureg_DECL_temporary(ureg);
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci        /* Get the coordinate */
644bf215546Sopenharmony_ci        switch (tci) {
645bf215546Sopenharmony_ci        case NINED3DTSS_TCI_PASSTHRU:
646bf215546Sopenharmony_ci            /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
647bf215546Sopenharmony_ci             * Else the idx is used only to determine wrapping mode. */
648bf215546Sopenharmony_ci            vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
649bf215546Sopenharmony_ci            ureg_MOV(ureg, input_coord, vs->aTex[idx]);
650bf215546Sopenharmony_ci            break;
651bf215546Sopenharmony_ci        case NINED3DTSS_TCI_CAMERASPACENORMAL:
652bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
653bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
654bf215546Sopenharmony_ci            dim_input = 4;
655bf215546Sopenharmony_ci            break;
656bf215546Sopenharmony_ci        case NINED3DTSS_TCI_CAMERASPACEPOSITION:
657bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
658bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
659bf215546Sopenharmony_ci            dim_input = 4;
660bf215546Sopenharmony_ci            break;
661bf215546Sopenharmony_ci        case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
662bf215546Sopenharmony_ci            tmp.WriteMask = TGSI_WRITEMASK_XYZ;
663bf215546Sopenharmony_ci            aVtx_normed = ureg_DECL_temporary(ureg);
664bf215546Sopenharmony_ci            ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
665bf215546Sopenharmony_ci            ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
666bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
667bf215546Sopenharmony_ci            ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
668bf215546Sopenharmony_ci            ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
669bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
670bf215546Sopenharmony_ci            ureg_release_temporary(ureg, aVtx_normed);
671bf215546Sopenharmony_ci            dim_input = 4;
672bf215546Sopenharmony_ci            tmp.WriteMask = TGSI_WRITEMASK_XYZW;
673bf215546Sopenharmony_ci            break;
674bf215546Sopenharmony_ci        case NINED3DTSS_TCI_SPHEREMAP:
675bf215546Sopenharmony_ci            /* Implement the formula of GL_SPHERE_MAP */
676bf215546Sopenharmony_ci            tmp.WriteMask = TGSI_WRITEMASK_XYZ;
677bf215546Sopenharmony_ci            aVtx_normed = ureg_DECL_temporary(ureg);
678bf215546Sopenharmony_ci            tmp2 = ureg_DECL_temporary(ureg);
679bf215546Sopenharmony_ci            ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
680bf215546Sopenharmony_ci            ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
681bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
682bf215546Sopenharmony_ci            ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
683bf215546Sopenharmony_ci            ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
684bf215546Sopenharmony_ci            /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
685bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
686bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
687bf215546Sopenharmony_ci            ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
688bf215546Sopenharmony_ci            ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
689bf215546Sopenharmony_ci            ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
690bf215546Sopenharmony_ci            /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
691bf215546Sopenharmony_ci             * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
692bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
693bf215546Sopenharmony_ci            ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
694bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
695bf215546Sopenharmony_ci            ureg_release_temporary(ureg, aVtx_normed);
696bf215546Sopenharmony_ci            ureg_release_temporary(ureg, tmp2);
697bf215546Sopenharmony_ci            dim_input = 4;
698bf215546Sopenharmony_ci            tmp.WriteMask = TGSI_WRITEMASK_XYZW;
699bf215546Sopenharmony_ci            break;
700bf215546Sopenharmony_ci        default:
701bf215546Sopenharmony_ci            assert(0);
702bf215546Sopenharmony_ci            break;
703bf215546Sopenharmony_ci        }
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci        /* Apply the transformation */
706bf215546Sopenharmony_ci        /* dim_output == 0 => do not transform the components.
707bf215546Sopenharmony_ci         * XYZRHW also disables transformation */
708bf215546Sopenharmony_ci        if (!dim_output || key->position_t) {
709bf215546Sopenharmony_ci            ureg_release_temporary(ureg, transformed);
710bf215546Sopenharmony_ci            transformed = input_coord;
711bf215546Sopenharmony_ci            writemask = TGSI_WRITEMASK_XYZW;
712bf215546Sopenharmony_ci        } else {
713bf215546Sopenharmony_ci            for (c = 0; c < dim_output; c++) {
714bf215546Sopenharmony_ci                t = ureg_writemask(transformed, 1 << c);
715bf215546Sopenharmony_ci                switch (dim_input) {
716bf215546Sopenharmony_ci                /* dim_input = 1 2 3: -> we add trailing 1 to input*/
717bf215546Sopenharmony_ci                case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
718bf215546Sopenharmony_ci                        break;
719bf215546Sopenharmony_ci                case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
720bf215546Sopenharmony_ci                        ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
721bf215546Sopenharmony_ci                        break;
722bf215546Sopenharmony_ci                case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
723bf215546Sopenharmony_ci                        ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
724bf215546Sopenharmony_ci                        break;
725bf215546Sopenharmony_ci                case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
726bf215546Sopenharmony_ci                default:
727bf215546Sopenharmony_ci                    assert(0);
728bf215546Sopenharmony_ci                }
729bf215546Sopenharmony_ci            }
730bf215546Sopenharmony_ci            writemask = (1 << dim_output) - 1;
731bf215546Sopenharmony_ci            ureg_release_temporary(ureg, input_coord);
732bf215546Sopenharmony_ci        }
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ci        ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
735bf215546Sopenharmony_ci        ureg_release_temporary(ureg, transformed);
736bf215546Sopenharmony_ci        ureg_release_temporary(ureg, tmp);
737bf215546Sopenharmony_ci    }
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci    /* === Lighting:
740bf215546Sopenharmony_ci     *
741bf215546Sopenharmony_ci     * DIRECTIONAL:  Light at infinite distance, parallel rays, no attenuation.
742bf215546Sopenharmony_ci     * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
743bf215546Sopenharmony_ci     * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
744bf215546Sopenharmony_ci     *
745bf215546Sopenharmony_ci     * vec3 normal = normalize(in.Normal * NormalMatrix);
746bf215546Sopenharmony_ci     * vec3 hitDir = light.direction;
747bf215546Sopenharmony_ci     * float atten = 1.0;
748bf215546Sopenharmony_ci     *
749bf215546Sopenharmony_ci     * if (light.type != DIRECTIONAL)
750bf215546Sopenharmony_ci     * {
751bf215546Sopenharmony_ci     *     vec3 hitVec = light.position - eyeVertex;
752bf215546Sopenharmony_ci     *     float d = length(hitVec);
753bf215546Sopenharmony_ci     *     hitDir = hitVec / d;
754bf215546Sopenharmony_ci     *     atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
755bf215546Sopenharmony_ci     * }
756bf215546Sopenharmony_ci     *
757bf215546Sopenharmony_ci     * if (light.type == SPOTLIGHT)
758bf215546Sopenharmony_ci     * {
759bf215546Sopenharmony_ci     *     float rho = dp3(-hitVec, light.direction);
760bf215546Sopenharmony_ci     *     if (rho < cos(light.phi / 2))
761bf215546Sopenharmony_ci     *         atten = 0;
762bf215546Sopenharmony_ci     *     if (rho < cos(light.theta / 2))
763bf215546Sopenharmony_ci     *         atten *= pow(some_func(rho), light.falloff);
764bf215546Sopenharmony_ci     * }
765bf215546Sopenharmony_ci     *
766bf215546Sopenharmony_ci     * float nDotHit = dp3_sat(normal, hitVec);
767bf215546Sopenharmony_ci     * float powFact = 0.0;
768bf215546Sopenharmony_ci     *
769bf215546Sopenharmony_ci     * if (nDotHit > 0.0)
770bf215546Sopenharmony_ci     * {
771bf215546Sopenharmony_ci     *     vec3 midVec = normalize(hitDir + eye);
772bf215546Sopenharmony_ci     *     float nDotMid = dp3_sat(normal, midVec);
773bf215546Sopenharmony_ci     *     pFact = pow(nDotMid, material.power);
774bf215546Sopenharmony_ci     * }
775bf215546Sopenharmony_ci     *
776bf215546Sopenharmony_ci     * ambient += light.ambient * atten;
777bf215546Sopenharmony_ci     * diffuse += light.diffuse * atten * nDotHit;
778bf215546Sopenharmony_ci     * specular += light.specular * atten * powFact;
779bf215546Sopenharmony_ci     */
780bf215546Sopenharmony_ci    if (key->lighting) {
781bf215546Sopenharmony_ci        struct ureg_dst tmp = ureg_DECL_temporary(ureg);
782bf215546Sopenharmony_ci        struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
783bf215546Sopenharmony_ci        struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
784bf215546Sopenharmony_ci        struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
785bf215546Sopenharmony_ci        struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
786bf215546Sopenharmony_ci        struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
787bf215546Sopenharmony_ci        struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
788bf215546Sopenharmony_ci
789bf215546Sopenharmony_ci        struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci        struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci        /* Light.*.Alpha is not used. */
794bf215546Sopenharmony_ci        struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
795bf215546Sopenharmony_ci        struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
796bf215546Sopenharmony_ci        struct ureg_dst rS = ureg_DECL_temporary(ureg);
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci        struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci        struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
801bf215546Sopenharmony_ci        struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
802bf215546Sopenharmony_ci        struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
803bf215546Sopenharmony_ci        struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
804bf215546Sopenharmony_ci        struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
805bf215546Sopenharmony_ci        struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
806bf215546Sopenharmony_ci        struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
807bf215546Sopenharmony_ci        struct ureg_src cLPos  = _XYZW(LIGHT_CONST(4));
808bf215546Sopenharmony_ci        struct ureg_src cLRng  = _WWWW(LIGHT_CONST(4));
809bf215546Sopenharmony_ci        struct ureg_src cLDir  = _XYZW(LIGHT_CONST(5));
810bf215546Sopenharmony_ci        struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
811bf215546Sopenharmony_ci        struct ureg_src cLTht  = _XXXX(LIGHT_CONST(6));
812bf215546Sopenharmony_ci        struct ureg_src cLPhi  = _YYYY(LIGHT_CONST(6));
813bf215546Sopenharmony_ci        struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
814bf215546Sopenharmony_ci        struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci        const unsigned loop_label = l++;
817bf215546Sopenharmony_ci
818bf215546Sopenharmony_ci        /* Declare all light constants to allow indirect adressing */
819bf215546Sopenharmony_ci        for (i = 32; i < 96; i++)
820bf215546Sopenharmony_ci            ureg_DECL_constant(ureg, i);
821bf215546Sopenharmony_ci
822bf215546Sopenharmony_ci        ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
823bf215546Sopenharmony_ci        ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
824bf215546Sopenharmony_ci        ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
825bf215546Sopenharmony_ci        ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci        /* loop management */
828bf215546Sopenharmony_ci        ureg_BGNLOOP(ureg, &label[loop_label]);
829bf215546Sopenharmony_ci        ureg_ARL(ureg, AL, _W(rCtr));
830bf215546Sopenharmony_ci
831bf215546Sopenharmony_ci        /* if (not DIRECTIONAL light): */
832bf215546Sopenharmony_ci        ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
833bf215546Sopenharmony_ci        ureg_MOV(ureg, rHit, ureg_negate(cLDir));
834bf215546Sopenharmony_ci        ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
835bf215546Sopenharmony_ci        ureg_IF(ureg, _X(tmp), &label[l++]);
836bf215546Sopenharmony_ci        {
837bf215546Sopenharmony_ci            /* hitDir = light.position - eyeVtx
838bf215546Sopenharmony_ci             * d = length(hitDir)
839bf215546Sopenharmony_ci             */
840bf215546Sopenharmony_ci            ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
841bf215546Sopenharmony_ci            ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
842bf215546Sopenharmony_ci            ureg_RSQ(ureg, tmp_y, _X(tmp));
843bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
844bf215546Sopenharmony_ci
845bf215546Sopenharmony_ci            /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
846bf215546Sopenharmony_ci            ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
847bf215546Sopenharmony_ci            ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
848bf215546Sopenharmony_ci            ureg_RCP(ureg, rAtt, _W(rAtt));
849bf215546Sopenharmony_ci            /* cut-off if distance exceeds Light.Range */
850bf215546Sopenharmony_ci            ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
851bf215546Sopenharmony_ci            ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
852bf215546Sopenharmony_ci        }
853bf215546Sopenharmony_ci        ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
854bf215546Sopenharmony_ci        ureg_ENDIF(ureg);
855bf215546Sopenharmony_ci
856bf215546Sopenharmony_ci        /* normalize hitDir */
857bf215546Sopenharmony_ci        ureg_normalize3(ureg, rHit, ureg_src(rHit));
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci        /* if (SPOT light) */
860bf215546Sopenharmony_ci        ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
861bf215546Sopenharmony_ci        ureg_IF(ureg, _X(tmp), &label[l++]);
862bf215546Sopenharmony_ci        {
863bf215546Sopenharmony_ci            /* rho = dp3(-hitDir, light.spotDir)
864bf215546Sopenharmony_ci             *
865bf215546Sopenharmony_ci             * if (rho  > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
866bf215546Sopenharmony_ci             *     spotAtt = 1
867bf215546Sopenharmony_ci             * else
868bf215546Sopenharmony_ci             * if (rho <= light.cphi2)
869bf215546Sopenharmony_ci             *     spotAtt = 0
870bf215546Sopenharmony_ci             * else
871bf215546Sopenharmony_ci             *     spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
872bf215546Sopenharmony_ci             */
873bf215546Sopenharmony_ci            ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
874bf215546Sopenharmony_ci            ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
875bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
876bf215546Sopenharmony_ci            ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
877bf215546Sopenharmony_ci            ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
878bf215546Sopenharmony_ci            ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
879bf215546Sopenharmony_ci            ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
880bf215546Sopenharmony_ci            ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
881bf215546Sopenharmony_ci        }
882bf215546Sopenharmony_ci        ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
883bf215546Sopenharmony_ci        ureg_ENDIF(ureg);
884bf215546Sopenharmony_ci
885bf215546Sopenharmony_ci        /* directional factors, let's not use LIT because of clarity */
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci        if (has_aNrm) {
888bf215546Sopenharmony_ci            if (key->localviewer) {
889bf215546Sopenharmony_ci                ureg_normalize3(ureg, rMid, vs->aVtx);
890bf215546Sopenharmony_ci                ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
891bf215546Sopenharmony_ci            } else {
892bf215546Sopenharmony_ci                ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
893bf215546Sopenharmony_ci            }
894bf215546Sopenharmony_ci            ureg_normalize3(ureg, rMid, ureg_src(rMid));
895bf215546Sopenharmony_ci            ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
896bf215546Sopenharmony_ci            ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
897bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));
898bf215546Sopenharmony_ci            /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
899bf215546Sopenharmony_ci             * For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
900bf215546Sopenharmony_ci             * No tests were made for backfacing, so add the two conditions */
901bf215546Sopenharmony_ci            ureg_IF(ureg, _Z(tmp), &label[l++]);
902bf215546Sopenharmony_ci            {
903bf215546Sopenharmony_ci                ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
904bf215546Sopenharmony_ci                ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
905bf215546Sopenharmony_ci                ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
906bf215546Sopenharmony_ci                ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
907bf215546Sopenharmony_ci            }
908bf215546Sopenharmony_ci            ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
909bf215546Sopenharmony_ci            ureg_ENDIF(ureg);
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
912bf215546Sopenharmony_ci            ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
913bf215546Sopenharmony_ci        }
914bf215546Sopenharmony_ci
915bf215546Sopenharmony_ci        ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
916bf215546Sopenharmony_ci
917bf215546Sopenharmony_ci        /* break if this was the last light */
918bf215546Sopenharmony_ci        ureg_IF(ureg, cLLast, &label[l++]);
919bf215546Sopenharmony_ci        ureg_BRK(ureg);
920bf215546Sopenharmony_ci        ureg_ENDIF(ureg);
921bf215546Sopenharmony_ci        ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
922bf215546Sopenharmony_ci
923bf215546Sopenharmony_ci        ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
924bf215546Sopenharmony_ci        ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
925bf215546Sopenharmony_ci        ureg_ENDLOOP(ureg, &label[loop_label]);
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci        /* Apply to material:
928bf215546Sopenharmony_ci         *
929bf215546Sopenharmony_ci         * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
930bf215546Sopenharmony_ci         *           material.ambient * ambient +
931bf215546Sopenharmony_ci         *           material.diffuse * diffuse +
932bf215546Sopenharmony_ci         * oCol[1] = material.specular * specular;
933bf215546Sopenharmony_ci         */
934bf215546Sopenharmony_ci        if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
935bf215546Sopenharmony_ci            ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19));
936bf215546Sopenharmony_ci        else {
937bf215546Sopenharmony_ci            ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
938bf215546Sopenharmony_ci            ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
939bf215546Sopenharmony_ci        }
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci        ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp));
942bf215546Sopenharmony_ci        ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
943bf215546Sopenharmony_ci        ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
944bf215546Sopenharmony_ci        ureg_release_temporary(ureg, rAtt);
945bf215546Sopenharmony_ci        ureg_release_temporary(ureg, rHit);
946bf215546Sopenharmony_ci        ureg_release_temporary(ureg, rMid);
947bf215546Sopenharmony_ci        ureg_release_temporary(ureg, rCtr);
948bf215546Sopenharmony_ci        ureg_release_temporary(ureg, rD);
949bf215546Sopenharmony_ci        ureg_release_temporary(ureg, rA);
950bf215546Sopenharmony_ci        ureg_release_temporary(ureg, rS);
951bf215546Sopenharmony_ci        ureg_release_temporary(ureg, rAtt);
952bf215546Sopenharmony_ci        ureg_release_temporary(ureg, tmp);
953bf215546Sopenharmony_ci    } else
954bf215546Sopenharmony_ci    /* COLOR */
955bf215546Sopenharmony_ci    if (key->darkness) {
956bf215546Sopenharmony_ci        if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
957bf215546Sopenharmony_ci            ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19));
958bf215546Sopenharmony_ci        else
959bf215546Sopenharmony_ci            ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
960bf215546Sopenharmony_ci        ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
961bf215546Sopenharmony_ci        ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f));
962bf215546Sopenharmony_ci    } else {
963bf215546Sopenharmony_ci        ureg_MOV(ureg, oCol[0], vs->aCol[0]);
964bf215546Sopenharmony_ci        ureg_MOV(ureg, oCol[1], vs->aCol[1]);
965bf215546Sopenharmony_ci    }
966bf215546Sopenharmony_ci
967bf215546Sopenharmony_ci    /* === Process fog.
968bf215546Sopenharmony_ci     *
969bf215546Sopenharmony_ci     * exp(x) = ex2(log2(e) * x)
970bf215546Sopenharmony_ci     */
971bf215546Sopenharmony_ci    if (key->fog_mode) {
972bf215546Sopenharmony_ci        struct ureg_dst tmp = ureg_DECL_temporary(ureg);
973bf215546Sopenharmony_ci        struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
974bf215546Sopenharmony_ci        struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
975bf215546Sopenharmony_ci        if (key->fog_range) {
976bf215546Sopenharmony_ci            ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
977bf215546Sopenharmony_ci            ureg_RSQ(ureg, tmp_z, _X(tmp));
978bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
979bf215546Sopenharmony_ci        } else {
980bf215546Sopenharmony_ci            ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
981bf215546Sopenharmony_ci        }
982bf215546Sopenharmony_ci
983bf215546Sopenharmony_ci        if (key->fog_mode == D3DFOG_EXP) {
984bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
985bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
986bf215546Sopenharmony_ci            ureg_EX2(ureg, tmp_x, _X(tmp));
987bf215546Sopenharmony_ci        } else
988bf215546Sopenharmony_ci        if (key->fog_mode == D3DFOG_EXP2) {
989bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
990bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
991bf215546Sopenharmony_ci            ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
992bf215546Sopenharmony_ci            ureg_EX2(ureg, tmp_x, _X(tmp));
993bf215546Sopenharmony_ci        } else
994bf215546Sopenharmony_ci        if (key->fog_mode == D3DFOG_LINEAR) {
995bf215546Sopenharmony_ci            ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
996bf215546Sopenharmony_ci            ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
997bf215546Sopenharmony_ci        }
998bf215546Sopenharmony_ci        ureg_MOV(ureg, oFog, _X(tmp));
999bf215546Sopenharmony_ci        ureg_release_temporary(ureg, tmp);
1000bf215546Sopenharmony_ci    } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
1001bf215546Sopenharmony_ci        ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
1002bf215546Sopenharmony_ci    }
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci    if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
1005bf215546Sopenharmony_ci        struct ureg_src input;
1006bf215546Sopenharmony_ci        struct ureg_dst output;
1007bf215546Sopenharmony_ci        input = vs->aWgt;
1008bf215546Sopenharmony_ci        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
1009bf215546Sopenharmony_ci        ureg_MOV(ureg, output, input);
1010bf215546Sopenharmony_ci    }
1011bf215546Sopenharmony_ci    if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
1012bf215546Sopenharmony_ci        struct ureg_src input;
1013bf215546Sopenharmony_ci        struct ureg_dst output;
1014bf215546Sopenharmony_ci        input = vs->aInd;
1015bf215546Sopenharmony_ci        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
1016bf215546Sopenharmony_ci        ureg_MOV(ureg, output, input);
1017bf215546Sopenharmony_ci    }
1018bf215546Sopenharmony_ci    if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
1019bf215546Sopenharmony_ci        struct ureg_src input;
1020bf215546Sopenharmony_ci        struct ureg_dst output;
1021bf215546Sopenharmony_ci        input = vs->aNrm;
1022bf215546Sopenharmony_ci        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1023bf215546Sopenharmony_ci        ureg_MOV(ureg, output, input);
1024bf215546Sopenharmony_ci    }
1025bf215546Sopenharmony_ci    if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1026bf215546Sopenharmony_ci        struct ureg_src input;
1027bf215546Sopenharmony_ci        struct ureg_dst output;
1028bf215546Sopenharmony_ci        input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1029bf215546Sopenharmony_ci        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1030bf215546Sopenharmony_ci        ureg_MOV(ureg, output, input);
1031bf215546Sopenharmony_ci    }
1032bf215546Sopenharmony_ci    if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1033bf215546Sopenharmony_ci        struct ureg_src input;
1034bf215546Sopenharmony_ci        struct ureg_dst output;
1035bf215546Sopenharmony_ci        input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1036bf215546Sopenharmony_ci        output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23);
1037bf215546Sopenharmony_ci        ureg_MOV(ureg, output, input);
1038bf215546Sopenharmony_ci    }
1039bf215546Sopenharmony_ci    if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1040bf215546Sopenharmony_ci        struct ureg_src input;
1041bf215546Sopenharmony_ci        struct ureg_dst output;
1042bf215546Sopenharmony_ci        input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1043bf215546Sopenharmony_ci        input = ureg_scalar(input, TGSI_SWIZZLE_X);
1044bf215546Sopenharmony_ci        output = oFog;
1045bf215546Sopenharmony_ci        ureg_MOV(ureg, output, input);
1046bf215546Sopenharmony_ci    }
1047bf215546Sopenharmony_ci    if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1048bf215546Sopenharmony_ci        (void) 0; /* TODO: replace z of position output ? */
1049bf215546Sopenharmony_ci    }
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci    /* ucp for ff applies on world coordinates.
1052bf215546Sopenharmony_ci     * aVtx is in worldview coordinates. */
1053bf215546Sopenharmony_ci    if (key->ucp) {
1054bf215546Sopenharmony_ci        struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0);
1055bf215546Sopenharmony_ci        struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1056bf215546Sopenharmony_ci        ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12));
1057bf215546Sopenharmony_ci        ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13),  ureg_src(tmp));
1058bf215546Sopenharmony_ci        ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp));
1059bf215546Sopenharmony_ci        ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp));
1060bf215546Sopenharmony_ci        ureg_release_temporary(ureg, tmp);
1061bf215546Sopenharmony_ci    }
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_ci    if (key->position_t && device->driver_caps.window_space_position_support)
1064bf215546Sopenharmony_ci        ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
1065bf215546Sopenharmony_ci
1066bf215546Sopenharmony_ci    ureg_END(ureg);
1067bf215546Sopenharmony_ci    nine_ureg_tgsi_dump(ureg, FALSE);
1068bf215546Sopenharmony_ci    return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1069bf215546Sopenharmony_ci}
1070bf215546Sopenharmony_ci
1071bf215546Sopenharmony_ci/* PS FF constants layout:
1072bf215546Sopenharmony_ci *
1073bf215546Sopenharmony_ci * CONST[ 0.. 7]      stage[i].D3DTSS_CONSTANT
1074bf215546Sopenharmony_ci * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1075bf215546Sopenharmony_ci * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1076bf215546Sopenharmony_ci * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1077bf215546Sopenharmony_ci * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1078bf215546Sopenharmony_ci * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1079bf215546Sopenharmony_ci * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1080bf215546Sopenharmony_ci *
1081bf215546Sopenharmony_ci * CONST[20] D3DRS_TEXTUREFACTOR
1082bf215546Sopenharmony_ci * CONST[21] D3DRS_FOGCOLOR
1083bf215546Sopenharmony_ci * CONST[22].x___ RS.FogEnd
1084bf215546Sopenharmony_ci * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1085bf215546Sopenharmony_ci * CONST[22].__z_ RS.FogDensity
1086bf215546Sopenharmony_ci */
1087bf215546Sopenharmony_cistruct ps_build_ctx
1088bf215546Sopenharmony_ci{
1089bf215546Sopenharmony_ci    struct ureg_program *ureg;
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci    struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1092bf215546Sopenharmony_ci    struct ureg_src vT[8]; /* TEXCOORD[i] */
1093bf215546Sopenharmony_ci    struct ureg_dst rCur; /* D3DTA_CURRENT */
1094bf215546Sopenharmony_ci    struct ureg_dst rMod;
1095bf215546Sopenharmony_ci    struct ureg_src rCurSrc;
1096bf215546Sopenharmony_ci    struct ureg_dst rTmp; /* D3DTA_TEMP */
1097bf215546Sopenharmony_ci    struct ureg_src rTmpSrc;
1098bf215546Sopenharmony_ci    struct ureg_dst rTex;
1099bf215546Sopenharmony_ci    struct ureg_src rTexSrc;
1100bf215546Sopenharmony_ci    struct ureg_src cBEM[8];
1101bf215546Sopenharmony_ci    struct ureg_src s[8];
1102bf215546Sopenharmony_ci
1103bf215546Sopenharmony_ci    struct {
1104bf215546Sopenharmony_ci        unsigned index;
1105bf215546Sopenharmony_ci        unsigned index_pre_mod;
1106bf215546Sopenharmony_ci    } stage;
1107bf215546Sopenharmony_ci};
1108bf215546Sopenharmony_ci
1109bf215546Sopenharmony_cistatic struct ureg_src
1110bf215546Sopenharmony_cips_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1111bf215546Sopenharmony_ci{
1112bf215546Sopenharmony_ci    struct ureg_src reg;
1113bf215546Sopenharmony_ci
1114bf215546Sopenharmony_ci    switch (ta & D3DTA_SELECTMASK) {
1115bf215546Sopenharmony_ci    case D3DTA_CONSTANT:
1116bf215546Sopenharmony_ci        reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1117bf215546Sopenharmony_ci        break;
1118bf215546Sopenharmony_ci    case D3DTA_CURRENT:
1119bf215546Sopenharmony_ci        reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1120bf215546Sopenharmony_ci        break;
1121bf215546Sopenharmony_ci    case D3DTA_DIFFUSE:
1122bf215546Sopenharmony_ci        reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1123bf215546Sopenharmony_ci        break;
1124bf215546Sopenharmony_ci    case D3DTA_SPECULAR:
1125bf215546Sopenharmony_ci        reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1126bf215546Sopenharmony_ci        break;
1127bf215546Sopenharmony_ci    case D3DTA_TEMP:
1128bf215546Sopenharmony_ci        reg = ps->rTmpSrc;
1129bf215546Sopenharmony_ci        break;
1130bf215546Sopenharmony_ci    case D3DTA_TEXTURE:
1131bf215546Sopenharmony_ci        reg = ps->rTexSrc;
1132bf215546Sopenharmony_ci        break;
1133bf215546Sopenharmony_ci    case D3DTA_TFACTOR:
1134bf215546Sopenharmony_ci        reg = ureg_DECL_constant(ps->ureg, 20);
1135bf215546Sopenharmony_ci        break;
1136bf215546Sopenharmony_ci    default:
1137bf215546Sopenharmony_ci        assert(0);
1138bf215546Sopenharmony_ci        reg = ureg_src_undef();
1139bf215546Sopenharmony_ci        break;
1140bf215546Sopenharmony_ci    }
1141bf215546Sopenharmony_ci    if (ta & D3DTA_COMPLEMENT) {
1142bf215546Sopenharmony_ci        struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
1143bf215546Sopenharmony_ci        ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
1144bf215546Sopenharmony_ci        reg = ureg_src(dst);
1145bf215546Sopenharmony_ci    }
1146bf215546Sopenharmony_ci    if (ta & D3DTA_ALPHAREPLICATE)
1147bf215546Sopenharmony_ci        reg = _WWWW(reg);
1148bf215546Sopenharmony_ci    return reg;
1149bf215546Sopenharmony_ci}
1150bf215546Sopenharmony_ci
1151bf215546Sopenharmony_cistatic struct ureg_dst
1152bf215546Sopenharmony_cips_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1153bf215546Sopenharmony_ci{
1154bf215546Sopenharmony_ci    assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1155bf215546Sopenharmony_ci
1156bf215546Sopenharmony_ci    switch (ta & D3DTA_SELECTMASK) {
1157bf215546Sopenharmony_ci    case D3DTA_CURRENT:
1158bf215546Sopenharmony_ci        return ps->rCur;
1159bf215546Sopenharmony_ci    case D3DTA_TEMP:
1160bf215546Sopenharmony_ci        return ps->rTmp;
1161bf215546Sopenharmony_ci    default:
1162bf215546Sopenharmony_ci        assert(0);
1163bf215546Sopenharmony_ci        return ureg_dst_undef();
1164bf215546Sopenharmony_ci    }
1165bf215546Sopenharmony_ci}
1166bf215546Sopenharmony_ci
1167bf215546Sopenharmony_cistatic uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1168bf215546Sopenharmony_ci{
1169bf215546Sopenharmony_ci    switch (top) {
1170bf215546Sopenharmony_ci    case D3DTOP_DISABLE:
1171bf215546Sopenharmony_ci        return 0x0;
1172bf215546Sopenharmony_ci    case D3DTOP_SELECTARG1:
1173bf215546Sopenharmony_ci    case D3DTOP_PREMODULATE:
1174bf215546Sopenharmony_ci        return 0x2;
1175bf215546Sopenharmony_ci    case D3DTOP_SELECTARG2:
1176bf215546Sopenharmony_ci        return 0x4;
1177bf215546Sopenharmony_ci    case D3DTOP_MULTIPLYADD:
1178bf215546Sopenharmony_ci    case D3DTOP_LERP:
1179bf215546Sopenharmony_ci        return 0x7;
1180bf215546Sopenharmony_ci    default:
1181bf215546Sopenharmony_ci        return 0x6;
1182bf215546Sopenharmony_ci    }
1183bf215546Sopenharmony_ci}
1184bf215546Sopenharmony_ci
1185bf215546Sopenharmony_cistatic inline boolean
1186bf215546Sopenharmony_ciis_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1187bf215546Sopenharmony_ci{
1188bf215546Sopenharmony_ci    return !dst.WriteMask ||
1189bf215546Sopenharmony_ci        (dst.File == src.File &&
1190bf215546Sopenharmony_ci         dst.Index == src.Index &&
1191bf215546Sopenharmony_ci         !dst.Indirect &&
1192bf215546Sopenharmony_ci         !dst.Saturate &&
1193bf215546Sopenharmony_ci         !src.Indirect &&
1194bf215546Sopenharmony_ci         !src.Negate &&
1195bf215546Sopenharmony_ci         !src.Absolute &&
1196bf215546Sopenharmony_ci         (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1197bf215546Sopenharmony_ci         (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1198bf215546Sopenharmony_ci         (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1199bf215546Sopenharmony_ci         (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1200bf215546Sopenharmony_ci
1201bf215546Sopenharmony_ci}
1202bf215546Sopenharmony_ci
1203bf215546Sopenharmony_cistatic void
1204bf215546Sopenharmony_cips_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1205bf215546Sopenharmony_ci{
1206bf215546Sopenharmony_ci    struct ureg_program *ureg = ps->ureg;
1207bf215546Sopenharmony_ci    struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1208bf215546Sopenharmony_ci    struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
1209bf215546Sopenharmony_ci    struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1210bf215546Sopenharmony_ci
1211bf215546Sopenharmony_ci    tmp.WriteMask = dst.WriteMask;
1212bf215546Sopenharmony_ci
1213bf215546Sopenharmony_ci    if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1214bf215546Sopenharmony_ci        top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1215bf215546Sopenharmony_ci        top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1216bf215546Sopenharmony_ci        top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1217bf215546Sopenharmony_ci        top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1218bf215546Sopenharmony_ci        top != D3DTOP_LERP)
1219bf215546Sopenharmony_ci        dst = ureg_saturate(dst);
1220bf215546Sopenharmony_ci
1221bf215546Sopenharmony_ci    switch (top) {
1222bf215546Sopenharmony_ci    case D3DTOP_SELECTARG1:
1223bf215546Sopenharmony_ci        if (!is_MOV_no_op(dst, arg[1]))
1224bf215546Sopenharmony_ci            ureg_MOV(ureg, dst, arg[1]);
1225bf215546Sopenharmony_ci        break;
1226bf215546Sopenharmony_ci    case D3DTOP_SELECTARG2:
1227bf215546Sopenharmony_ci        if (!is_MOV_no_op(dst, arg[2]))
1228bf215546Sopenharmony_ci            ureg_MOV(ureg, dst, arg[2]);
1229bf215546Sopenharmony_ci        break;
1230bf215546Sopenharmony_ci    case D3DTOP_MODULATE:
1231bf215546Sopenharmony_ci        ureg_MUL(ureg, dst, arg[1], arg[2]);
1232bf215546Sopenharmony_ci        break;
1233bf215546Sopenharmony_ci    case D3DTOP_MODULATE2X:
1234bf215546Sopenharmony_ci        ureg_MUL(ureg, tmp, arg[1], arg[2]);
1235bf215546Sopenharmony_ci        ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1236bf215546Sopenharmony_ci        break;
1237bf215546Sopenharmony_ci    case D3DTOP_MODULATE4X:
1238bf215546Sopenharmony_ci        ureg_MUL(ureg, tmp, arg[1], arg[2]);
1239bf215546Sopenharmony_ci        ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1240bf215546Sopenharmony_ci        break;
1241bf215546Sopenharmony_ci    case D3DTOP_ADD:
1242bf215546Sopenharmony_ci        ureg_ADD(ureg, dst, arg[1], arg[2]);
1243bf215546Sopenharmony_ci        break;
1244bf215546Sopenharmony_ci    case D3DTOP_ADDSIGNED:
1245bf215546Sopenharmony_ci        ureg_ADD(ureg, tmp, arg[1], arg[2]);
1246bf215546Sopenharmony_ci        ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
1247bf215546Sopenharmony_ci        break;
1248bf215546Sopenharmony_ci    case D3DTOP_ADDSIGNED2X:
1249bf215546Sopenharmony_ci        ureg_ADD(ureg, tmp, arg[1], arg[2]);
1250bf215546Sopenharmony_ci        ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1251bf215546Sopenharmony_ci        break;
1252bf215546Sopenharmony_ci    case D3DTOP_SUBTRACT:
1253bf215546Sopenharmony_ci        ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
1254bf215546Sopenharmony_ci        break;
1255bf215546Sopenharmony_ci    case D3DTOP_ADDSMOOTH:
1256bf215546Sopenharmony_ci        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1257bf215546Sopenharmony_ci        ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1258bf215546Sopenharmony_ci        break;
1259bf215546Sopenharmony_ci    case D3DTOP_BLENDDIFFUSEALPHA:
1260bf215546Sopenharmony_ci        ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1261bf215546Sopenharmony_ci        break;
1262bf215546Sopenharmony_ci    case D3DTOP_BLENDTEXTUREALPHA:
1263bf215546Sopenharmony_ci        /* XXX: alpha taken from previous stage, texture or result ? */
1264bf215546Sopenharmony_ci        ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1265bf215546Sopenharmony_ci        break;
1266bf215546Sopenharmony_ci    case D3DTOP_BLENDFACTORALPHA:
1267bf215546Sopenharmony_ci        ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1268bf215546Sopenharmony_ci        break;
1269bf215546Sopenharmony_ci    case D3DTOP_BLENDTEXTUREALPHAPM:
1270bf215546Sopenharmony_ci        ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
1271bf215546Sopenharmony_ci        ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1272bf215546Sopenharmony_ci        break;
1273bf215546Sopenharmony_ci    case D3DTOP_BLENDCURRENTALPHA:
1274bf215546Sopenharmony_ci        ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1275bf215546Sopenharmony_ci        break;
1276bf215546Sopenharmony_ci    case D3DTOP_PREMODULATE:
1277bf215546Sopenharmony_ci        ureg_MOV(ureg, dst, arg[1]);
1278bf215546Sopenharmony_ci        ps->stage.index_pre_mod = ps->stage.index + 1;
1279bf215546Sopenharmony_ci        break;
1280bf215546Sopenharmony_ci    case D3DTOP_MODULATEALPHA_ADDCOLOR:
1281bf215546Sopenharmony_ci        ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1282bf215546Sopenharmony_ci        break;
1283bf215546Sopenharmony_ci    case D3DTOP_MODULATECOLOR_ADDALPHA:
1284bf215546Sopenharmony_ci        ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1285bf215546Sopenharmony_ci        break;
1286bf215546Sopenharmony_ci    case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1287bf215546Sopenharmony_ci        ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
1288bf215546Sopenharmony_ci        ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1289bf215546Sopenharmony_ci        break;
1290bf215546Sopenharmony_ci    case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1291bf215546Sopenharmony_ci        ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1292bf215546Sopenharmony_ci        ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1293bf215546Sopenharmony_ci        break;
1294bf215546Sopenharmony_ci    case D3DTOP_BUMPENVMAP:
1295bf215546Sopenharmony_ci        break;
1296bf215546Sopenharmony_ci    case D3DTOP_BUMPENVMAPLUMINANCE:
1297bf215546Sopenharmony_ci        break;
1298bf215546Sopenharmony_ci    case D3DTOP_DOTPRODUCT3:
1299bf215546Sopenharmony_ci        ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1300bf215546Sopenharmony_ci        ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1301bf215546Sopenharmony_ci        ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1302bf215546Sopenharmony_ci        ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1303bf215546Sopenharmony_ci        break;
1304bf215546Sopenharmony_ci    case D3DTOP_MULTIPLYADD:
1305bf215546Sopenharmony_ci        ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1306bf215546Sopenharmony_ci        break;
1307bf215546Sopenharmony_ci    case D3DTOP_LERP:
1308bf215546Sopenharmony_ci        ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1309bf215546Sopenharmony_ci        break;
1310bf215546Sopenharmony_ci    case D3DTOP_DISABLE:
1311bf215546Sopenharmony_ci        /* no-op ? */
1312bf215546Sopenharmony_ci        break;
1313bf215546Sopenharmony_ci    default:
1314bf215546Sopenharmony_ci        assert(!"invalid D3DTOP");
1315bf215546Sopenharmony_ci        break;
1316bf215546Sopenharmony_ci    }
1317bf215546Sopenharmony_ci    ureg_release_temporary(ureg, tmp);
1318bf215546Sopenharmony_ci    ureg_release_temporary(ureg, tmp2);
1319bf215546Sopenharmony_ci}
1320bf215546Sopenharmony_ci
1321bf215546Sopenharmony_cistatic void *
1322bf215546Sopenharmony_cinine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1323bf215546Sopenharmony_ci{
1324bf215546Sopenharmony_ci    struct ps_build_ctx ps;
1325bf215546Sopenharmony_ci    struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1326bf215546Sopenharmony_ci    struct ureg_dst oCol;
1327bf215546Sopenharmony_ci    unsigned s;
1328bf215546Sopenharmony_ci    const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1329bf215546Sopenharmony_ci
1330bf215546Sopenharmony_ci    memset(&ps, 0, sizeof(ps));
1331bf215546Sopenharmony_ci    ps.ureg = ureg;
1332bf215546Sopenharmony_ci    ps.stage.index_pre_mod = -1;
1333bf215546Sopenharmony_ci
1334bf215546Sopenharmony_ci    ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1335bf215546Sopenharmony_ci
1336bf215546Sopenharmony_ci    ps.rCur = ureg_DECL_temporary(ureg);
1337bf215546Sopenharmony_ci    ps.rTmp = ureg_DECL_temporary(ureg);
1338bf215546Sopenharmony_ci    ps.rTex = ureg_DECL_temporary(ureg);
1339bf215546Sopenharmony_ci    ps.rCurSrc = ureg_src(ps.rCur);
1340bf215546Sopenharmony_ci    ps.rTmpSrc = ureg_src(ps.rTmp);
1341bf215546Sopenharmony_ci    ps.rTexSrc = ureg_src(ps.rTex);
1342bf215546Sopenharmony_ci
1343bf215546Sopenharmony_ci    /* Initial values */
1344bf215546Sopenharmony_ci    ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1345bf215546Sopenharmony_ci    ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f));
1346bf215546Sopenharmony_ci    ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f));
1347bf215546Sopenharmony_ci
1348bf215546Sopenharmony_ci    for (s = 0; s < 8; ++s) {
1349bf215546Sopenharmony_ci        ps.s[s] = ureg_src_undef();
1350bf215546Sopenharmony_ci
1351bf215546Sopenharmony_ci        if (key->ts[s].colorop != D3DTOP_DISABLE) {
1352bf215546Sopenharmony_ci            if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1353bf215546Sopenharmony_ci                key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1354bf215546Sopenharmony_ci                key->ts[s].colorarg2 == D3DTA_SPECULAR)
1355bf215546Sopenharmony_ci                ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1356bf215546Sopenharmony_ci
1357bf215546Sopenharmony_ci            if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1358bf215546Sopenharmony_ci                key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1359bf215546Sopenharmony_ci                key->ts[s].colorarg2 == D3DTA_TEXTURE ||
1360bf215546Sopenharmony_ci                key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1361bf215546Sopenharmony_ci                key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1362bf215546Sopenharmony_ci                ps.s[s] = ureg_DECL_sampler(ureg, s);
1363bf215546Sopenharmony_ci                ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1364bf215546Sopenharmony_ci            }
1365bf215546Sopenharmony_ci            if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1366bf215546Sopenharmony_ci                      key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1367bf215546Sopenharmony_ci                ps.s[s] = ureg_DECL_sampler(ureg, s);
1368bf215546Sopenharmony_ci        }
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_ci        if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1371bf215546Sopenharmony_ci            if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1372bf215546Sopenharmony_ci                key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1373bf215546Sopenharmony_ci                key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1374bf215546Sopenharmony_ci                ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1375bf215546Sopenharmony_ci
1376bf215546Sopenharmony_ci            if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1377bf215546Sopenharmony_ci                key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1378bf215546Sopenharmony_ci                key->ts[s].alphaarg2 == D3DTA_TEXTURE ||
1379bf215546Sopenharmony_ci                key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1380bf215546Sopenharmony_ci                key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1381bf215546Sopenharmony_ci                ps.s[s] = ureg_DECL_sampler(ureg, s);
1382bf215546Sopenharmony_ci                ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1383bf215546Sopenharmony_ci            }
1384bf215546Sopenharmony_ci        }
1385bf215546Sopenharmony_ci    }
1386bf215546Sopenharmony_ci    if (key->specular)
1387bf215546Sopenharmony_ci        ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1388bf215546Sopenharmony_ci
1389bf215546Sopenharmony_ci    oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1390bf215546Sopenharmony_ci
1391bf215546Sopenharmony_ci    /* Run stages.
1392bf215546Sopenharmony_ci     */
1393bf215546Sopenharmony_ci    for (s = 0; s < 8; ++s) {
1394bf215546Sopenharmony_ci        unsigned colorarg[3];
1395bf215546Sopenharmony_ci        unsigned alphaarg[3];
1396bf215546Sopenharmony_ci        const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1397bf215546Sopenharmony_ci        const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1398bf215546Sopenharmony_ci        struct ureg_dst dst;
1399bf215546Sopenharmony_ci        struct ureg_src arg[3];
1400bf215546Sopenharmony_ci
1401bf215546Sopenharmony_ci        if (key->ts[s].colorop == D3DTOP_DISABLE) {
1402bf215546Sopenharmony_ci            assert (key->ts[s].alphaop == D3DTOP_DISABLE);
1403bf215546Sopenharmony_ci            continue;
1404bf215546Sopenharmony_ci        }
1405bf215546Sopenharmony_ci        ps.stage.index = s;
1406bf215546Sopenharmony_ci
1407bf215546Sopenharmony_ci        DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1408bf215546Sopenharmony_ci            nine_D3DTOP_to_str(key->ts[s].colorop),
1409bf215546Sopenharmony_ci            nine_D3DTOP_to_str(key->ts[s].alphaop));
1410bf215546Sopenharmony_ci
1411bf215546Sopenharmony_ci        if (!ureg_src_is_undef(ps.s[s])) {
1412bf215546Sopenharmony_ci            unsigned target;
1413bf215546Sopenharmony_ci            struct ureg_src texture_coord = ps.vT[s];
1414bf215546Sopenharmony_ci            struct ureg_dst delta;
1415bf215546Sopenharmony_ci            switch (key->ts[s].textarget) {
1416bf215546Sopenharmony_ci            case 0: target = TGSI_TEXTURE_1D; break;
1417bf215546Sopenharmony_ci            case 1: target = TGSI_TEXTURE_2D; break;
1418bf215546Sopenharmony_ci            case 2: target = TGSI_TEXTURE_3D; break;
1419bf215546Sopenharmony_ci            case 3: target = TGSI_TEXTURE_CUBE; break;
1420bf215546Sopenharmony_ci            /* this is a 2 bit bitfield, do I really need a default case ? */
1421bf215546Sopenharmony_ci            }
1422bf215546Sopenharmony_ci
1423bf215546Sopenharmony_ci            /* Modify coordinates */
1424bf215546Sopenharmony_ci            if (s >= 1 &&
1425bf215546Sopenharmony_ci                (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1426bf215546Sopenharmony_ci                 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1427bf215546Sopenharmony_ci                delta = ureg_DECL_temporary(ureg);
1428bf215546Sopenharmony_ci                /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1429bf215546Sopenharmony_ci                ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1430bf215546Sopenharmony_ci                ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1431bf215546Sopenharmony_ci                /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1432bf215546Sopenharmony_ci                ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1433bf215546Sopenharmony_ci                ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1434bf215546Sopenharmony_ci                texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1435bf215546Sopenharmony_ci                ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1436bf215546Sopenharmony_ci                ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1437bf215546Sopenharmony_ci                /* Prepare luminance multiplier
1438bf215546Sopenharmony_ci                 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1439bf215546Sopenharmony_ci                if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1440bf215546Sopenharmony_ci                    struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1441bf215546Sopenharmony_ci                    struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1442bf215546Sopenharmony_ci
1443bf215546Sopenharmony_ci                    ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1444bf215546Sopenharmony_ci                }
1445bf215546Sopenharmony_ci            }
1446bf215546Sopenharmony_ci            if (key->projected & (3 << (s *2))) {
1447bf215546Sopenharmony_ci                unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1448bf215546Sopenharmony_ci                if (dim == 4)
1449bf215546Sopenharmony_ci                    ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1450bf215546Sopenharmony_ci                else {
1451bf215546Sopenharmony_ci                    struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1452bf215546Sopenharmony_ci                    ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1453bf215546Sopenharmony_ci                    ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord);
1454bf215546Sopenharmony_ci                    ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1455bf215546Sopenharmony_ci                    ureg_release_temporary(ureg, tmp);
1456bf215546Sopenharmony_ci                }
1457bf215546Sopenharmony_ci            } else {
1458bf215546Sopenharmony_ci                ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1459bf215546Sopenharmony_ci            }
1460bf215546Sopenharmony_ci            if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1461bf215546Sopenharmony_ci                ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1462bf215546Sopenharmony_ci        }
1463bf215546Sopenharmony_ci
1464bf215546Sopenharmony_ci        if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1465bf215546Sopenharmony_ci            key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1466bf215546Sopenharmony_ci            continue;
1467bf215546Sopenharmony_ci
1468bf215546Sopenharmony_ci        dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1469bf215546Sopenharmony_ci
1470bf215546Sopenharmony_ci        if (ps.stage.index_pre_mod == ps.stage.index) {
1471bf215546Sopenharmony_ci            ps.rMod = ureg_DECL_temporary(ureg);
1472bf215546Sopenharmony_ci            ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1473bf215546Sopenharmony_ci        }
1474bf215546Sopenharmony_ci
1475bf215546Sopenharmony_ci        colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1476bf215546Sopenharmony_ci        colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1477bf215546Sopenharmony_ci        colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1478bf215546Sopenharmony_ci        alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1479bf215546Sopenharmony_ci        alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1480bf215546Sopenharmony_ci        alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1481bf215546Sopenharmony_ci
1482bf215546Sopenharmony_ci        if (key->ts[s].colorop != key->ts[s].alphaop ||
1483bf215546Sopenharmony_ci            colorarg[0] != alphaarg[0] ||
1484bf215546Sopenharmony_ci            colorarg[1] != alphaarg[1] ||
1485bf215546Sopenharmony_ci            colorarg[2] != alphaarg[2])
1486bf215546Sopenharmony_ci            dst.WriteMask = TGSI_WRITEMASK_XYZ;
1487bf215546Sopenharmony_ci
1488bf215546Sopenharmony_ci        /* Special DOTPRODUCT behaviour (see wine tests) */
1489bf215546Sopenharmony_ci        if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1490bf215546Sopenharmony_ci            dst.WriteMask = TGSI_WRITEMASK_XYZW;
1491bf215546Sopenharmony_ci
1492bf215546Sopenharmony_ci        if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1493bf215546Sopenharmony_ci        if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1494bf215546Sopenharmony_ci        if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1495bf215546Sopenharmony_ci        ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1496bf215546Sopenharmony_ci
1497bf215546Sopenharmony_ci        if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1498bf215546Sopenharmony_ci            dst.WriteMask = TGSI_WRITEMASK_W;
1499bf215546Sopenharmony_ci
1500bf215546Sopenharmony_ci            if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1501bf215546Sopenharmony_ci            if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1502bf215546Sopenharmony_ci            if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1503bf215546Sopenharmony_ci            ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1504bf215546Sopenharmony_ci        }
1505bf215546Sopenharmony_ci    }
1506bf215546Sopenharmony_ci
1507bf215546Sopenharmony_ci    if (key->specular)
1508bf215546Sopenharmony_ci        ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]);
1509bf215546Sopenharmony_ci
1510bf215546Sopenharmony_ci    /* Fog.
1511bf215546Sopenharmony_ci     */
1512bf215546Sopenharmony_ci    if (key->fog_mode) {
1513bf215546Sopenharmony_ci        struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1514bf215546Sopenharmony_ci        struct ureg_src vPos;
1515bf215546Sopenharmony_ci        if (device->screen->get_param(device->screen,
1516bf215546Sopenharmony_ci                                      PIPE_CAP_FS_POSITION_IS_SYSVAL)) {
1517bf215546Sopenharmony_ci            vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1518bf215546Sopenharmony_ci        } else {
1519bf215546Sopenharmony_ci            vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1520bf215546Sopenharmony_ci                                      TGSI_INTERPOLATE_LINEAR);
1521bf215546Sopenharmony_ci        }
1522bf215546Sopenharmony_ci
1523bf215546Sopenharmony_ci        /* Source is either W or Z.
1524bf215546Sopenharmony_ci         * When we use vs ff,
1525bf215546Sopenharmony_ci         * Z is when an orthogonal projection matrix is detected,
1526bf215546Sopenharmony_ci         * W (WFOG) else.
1527bf215546Sopenharmony_ci         * Z is used for programmable vs.
1528bf215546Sopenharmony_ci         * Note: Tests indicate that the projection matrix coefficients do
1529bf215546Sopenharmony_ci         * actually affect pixel fog (and not vertex fog) when vs ff is used,
1530bf215546Sopenharmony_ci         * which justifies taking the position's w instead of taking the z coordinate
1531bf215546Sopenharmony_ci         * before the projection in the vs shader.
1532bf215546Sopenharmony_ci         */
1533bf215546Sopenharmony_ci        if (!key->fog_source)
1534bf215546Sopenharmony_ci            ureg_MOV(ureg, rFog, _ZZZZ(vPos));
1535bf215546Sopenharmony_ci        else
1536bf215546Sopenharmony_ci            /* Position's w is 1/w */
1537bf215546Sopenharmony_ci            ureg_RCP(ureg, rFog, _WWWW(vPos));
1538bf215546Sopenharmony_ci
1539bf215546Sopenharmony_ci        if (key->fog_mode == D3DFOG_EXP) {
1540bf215546Sopenharmony_ci            ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1541bf215546Sopenharmony_ci            ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1542bf215546Sopenharmony_ci            ureg_EX2(ureg, rFog, _X(rFog));
1543bf215546Sopenharmony_ci        } else
1544bf215546Sopenharmony_ci        if (key->fog_mode == D3DFOG_EXP2) {
1545bf215546Sopenharmony_ci            ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1546bf215546Sopenharmony_ci            ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1547bf215546Sopenharmony_ci            ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1548bf215546Sopenharmony_ci            ureg_EX2(ureg, rFog, _X(rFog));
1549bf215546Sopenharmony_ci        } else
1550bf215546Sopenharmony_ci        if (key->fog_mode == D3DFOG_LINEAR) {
1551bf215546Sopenharmony_ci            ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
1552bf215546Sopenharmony_ci            ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1553bf215546Sopenharmony_ci        }
1554bf215546Sopenharmony_ci        ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1555bf215546Sopenharmony_ci        ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1556bf215546Sopenharmony_ci    } else
1557bf215546Sopenharmony_ci    if (key->fog) {
1558bf215546Sopenharmony_ci        struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE);
1559bf215546Sopenharmony_ci        ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1560bf215546Sopenharmony_ci        ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1561bf215546Sopenharmony_ci    } else {
1562bf215546Sopenharmony_ci        ureg_MOV(ureg, oCol, ps.rCurSrc);
1563bf215546Sopenharmony_ci    }
1564bf215546Sopenharmony_ci
1565bf215546Sopenharmony_ci    ureg_END(ureg);
1566bf215546Sopenharmony_ci    nine_ureg_tgsi_dump(ureg, FALSE);
1567bf215546Sopenharmony_ci    return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1568bf215546Sopenharmony_ci}
1569bf215546Sopenharmony_ci
1570bf215546Sopenharmony_cistatic struct NineVertexShader9 *
1571bf215546Sopenharmony_cinine_ff_get_vs(struct NineDevice9 *device)
1572bf215546Sopenharmony_ci{
1573bf215546Sopenharmony_ci    const struct nine_context *context = &device->context;
1574bf215546Sopenharmony_ci    struct NineVertexShader9 *vs;
1575bf215546Sopenharmony_ci    struct vs_build_ctx bld;
1576bf215546Sopenharmony_ci    struct nine_ff_vs_key key;
1577bf215546Sopenharmony_ci    unsigned s, i;
1578bf215546Sopenharmony_ci    boolean has_indexes = false;
1579bf215546Sopenharmony_ci    boolean has_weights = false;
1580bf215546Sopenharmony_ci    int8_t input_texture_coord[8];
1581bf215546Sopenharmony_ci
1582bf215546Sopenharmony_ci    assert(sizeof(key) <= sizeof(key.value32));
1583bf215546Sopenharmony_ci
1584bf215546Sopenharmony_ci    memset(&key, 0, sizeof(key));
1585bf215546Sopenharmony_ci    memset(&bld, 0, sizeof(bld));
1586bf215546Sopenharmony_ci    memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1587bf215546Sopenharmony_ci
1588bf215546Sopenharmony_ci    bld.key = &key;
1589bf215546Sopenharmony_ci
1590bf215546Sopenharmony_ci    /* FIXME: this shouldn't be NULL, but it is on init */
1591bf215546Sopenharmony_ci    if (context->vdecl) {
1592bf215546Sopenharmony_ci        key.color0in_one = 1;
1593bf215546Sopenharmony_ci        key.color1in_zero = 1;
1594bf215546Sopenharmony_ci        for (i = 0; i < context->vdecl->nelems; i++) {
1595bf215546Sopenharmony_ci            uint16_t usage = context->vdecl->usage_map[i];
1596bf215546Sopenharmony_ci            if (usage == NINE_DECLUSAGE_POSITIONT)
1597bf215546Sopenharmony_ci                key.position_t = 1;
1598bf215546Sopenharmony_ci            else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1599bf215546Sopenharmony_ci                key.color0in_one = 0;
1600bf215546Sopenharmony_ci            else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1601bf215546Sopenharmony_ci                key.color1in_zero = 0;
1602bf215546Sopenharmony_ci            else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {
1603bf215546Sopenharmony_ci                has_indexes = true;
1604bf215546Sopenharmony_ci                key.passthrough |= 1 << usage;
1605bf215546Sopenharmony_ci            } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {
1606bf215546Sopenharmony_ci                has_weights = true;
1607bf215546Sopenharmony_ci                key.passthrough |= 1 << usage;
1608bf215546Sopenharmony_ci            } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) {
1609bf215546Sopenharmony_ci                key.has_normal = 1;
1610bf215546Sopenharmony_ci                key.passthrough |= 1 << usage;
1611bf215546Sopenharmony_ci            } else if (usage == NINE_DECLUSAGE_PSIZE)
1612bf215546Sopenharmony_ci                key.vertexpointsize = 1;
1613bf215546Sopenharmony_ci            else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1614bf215546Sopenharmony_ci                s = usage / NINE_DECLUSAGE_COUNT;
1615bf215546Sopenharmony_ci                if (s < 8)
1616bf215546Sopenharmony_ci                    input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type);
1617bf215546Sopenharmony_ci                else
1618bf215546Sopenharmony_ci                    DBG("FF given texture coordinate >= 8. Ignoring\n");
1619bf215546Sopenharmony_ci            } else if (usage < NINE_DECLUSAGE_NONE)
1620bf215546Sopenharmony_ci                key.passthrough |= 1 << usage;
1621bf215546Sopenharmony_ci        }
1622bf215546Sopenharmony_ci    }
1623bf215546Sopenharmony_ci    /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1624bf215546Sopenharmony_ci     * We do restrict to indices 0 */
1625bf215546Sopenharmony_ci    key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1626bf215546Sopenharmony_ci                         (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1627bf215546Sopenharmony_ci                         (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1628bf215546Sopenharmony_ci    if (!key.position_t)
1629bf215546Sopenharmony_ci        key.passthrough = 0;
1630bf215546Sopenharmony_ci    key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE];
1631bf215546Sopenharmony_ci
1632bf215546Sopenharmony_ci    key.lighting = !!context->rs[D3DRS_LIGHTING] &&  context->ff.num_lights_active;
1633bf215546Sopenharmony_ci    key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active;
1634bf215546Sopenharmony_ci    if (key.position_t) {
1635bf215546Sopenharmony_ci        key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1636bf215546Sopenharmony_ci        key.lighting = 0;
1637bf215546Sopenharmony_ci    }
1638bf215546Sopenharmony_ci    if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) {
1639bf215546Sopenharmony_ci        uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);
1640bf215546Sopenharmony_ci        key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
1641bf215546Sopenharmony_ci        key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;
1642bf215546Sopenharmony_ci        key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;
1643bf215546Sopenharmony_ci        key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
1644bf215546Sopenharmony_ci    }
1645bf215546Sopenharmony_ci    key.fog = !!context->rs[D3DRS_FOGENABLE];
1646bf215546Sopenharmony_ci    key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0;
1647bf215546Sopenharmony_ci    if (key.fog_mode)
1648bf215546Sopenharmony_ci        key.fog_range = context->rs[D3DRS_RANGEFOGENABLE];
1649bf215546Sopenharmony_ci
1650bf215546Sopenharmony_ci    key.localviewer = !!context->rs[D3DRS_LOCALVIEWER];
1651bf215546Sopenharmony_ci    key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS];
1652bf215546Sopenharmony_ci    key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE];
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_ci    if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1655bf215546Sopenharmony_ci        key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;
1656bf215546Sopenharmony_ci
1657bf215546Sopenharmony_ci        switch (context->rs[D3DRS_VERTEXBLEND]) {
1658bf215546Sopenharmony_ci        case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1659bf215546Sopenharmony_ci        case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1660bf215546Sopenharmony_ci        case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1661bf215546Sopenharmony_ci        case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1662bf215546Sopenharmony_ci        case D3DVBF_TWEENING: key.vertextween = 1; break;
1663bf215546Sopenharmony_ci        default:
1664bf215546Sopenharmony_ci            assert(!"invalid D3DVBF");
1665bf215546Sopenharmony_ci            break;
1666bf215546Sopenharmony_ci        }
1667bf215546Sopenharmony_ci        if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)
1668bf215546Sopenharmony_ci            key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1669bf215546Sopenharmony_ci    }
1670bf215546Sopenharmony_ci
1671bf215546Sopenharmony_ci    for (s = 0; s < 8; ++s) {
1672bf215546Sopenharmony_ci        unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1673bf215546Sopenharmony_ci        unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;
1674bf215546Sopenharmony_ci        unsigned dim;
1675bf215546Sopenharmony_ci
1676bf215546Sopenharmony_ci        if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1677bf215546Sopenharmony_ci            gen = NINED3DTSS_TCI_PASSTHRU;
1678bf215546Sopenharmony_ci
1679bf215546Sopenharmony_ci        if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU)
1680bf215546Sopenharmony_ci            gen = NINED3DTSS_TCI_DISABLE;
1681bf215546Sopenharmony_ci
1682bf215546Sopenharmony_ci        key.tc_gen |= gen << (s * 3);
1683bf215546Sopenharmony_ci        key.tc_idx |= idx << (s * 3);
1684bf215546Sopenharmony_ci        key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2);
1685bf215546Sopenharmony_ci
1686bf215546Sopenharmony_ci        dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1687bf215546Sopenharmony_ci        if (dim > 4)
1688bf215546Sopenharmony_ci            dim = input_texture_coord[idx];
1689bf215546Sopenharmony_ci        if (dim == 1) /* NV behaviour */
1690bf215546Sopenharmony_ci            dim = 0;
1691bf215546Sopenharmony_ci        key.tc_dim_output |= dim << (s * 3);
1692bf215546Sopenharmony_ci    }
1693bf215546Sopenharmony_ci
1694bf215546Sopenharmony_ci    DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key));
1695bf215546Sopenharmony_ci    vs = util_hash_table_get(device->ff.ht_vs, &key);
1696bf215546Sopenharmony_ci    if (vs)
1697bf215546Sopenharmony_ci        return vs;
1698bf215546Sopenharmony_ci    NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1699bf215546Sopenharmony_ci
1700bf215546Sopenharmony_ci    nine_ff_prune_vs(device);
1701bf215546Sopenharmony_ci    if (vs) {
1702bf215546Sopenharmony_ci        unsigned n;
1703bf215546Sopenharmony_ci
1704bf215546Sopenharmony_ci        memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1705bf215546Sopenharmony_ci
1706bf215546Sopenharmony_ci        _mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs);
1707bf215546Sopenharmony_ci        device->ff.num_vs++;
1708bf215546Sopenharmony_ci
1709bf215546Sopenharmony_ci        vs->num_inputs = bld.num_inputs;
1710bf215546Sopenharmony_ci        for (n = 0; n < bld.num_inputs; ++n)
1711bf215546Sopenharmony_ci            vs->input_map[n].ndecl = bld.input[n];
1712bf215546Sopenharmony_ci
1713bf215546Sopenharmony_ci        vs->position_t = key.position_t;
1714bf215546Sopenharmony_ci        vs->point_size = key.vertexpointsize | key.pointscale;
1715bf215546Sopenharmony_ci    }
1716bf215546Sopenharmony_ci    return vs;
1717bf215546Sopenharmony_ci}
1718bf215546Sopenharmony_ci
1719bf215546Sopenharmony_ci#define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)
1720bf215546Sopenharmony_ci#define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_cistatic struct NinePixelShader9 *
1723bf215546Sopenharmony_cinine_ff_get_ps(struct NineDevice9 *device)
1724bf215546Sopenharmony_ci{
1725bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
1726bf215546Sopenharmony_ci    D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION);
1727bf215546Sopenharmony_ci    struct NinePixelShader9 *ps;
1728bf215546Sopenharmony_ci    struct nine_ff_ps_key key;
1729bf215546Sopenharmony_ci    unsigned s;
1730bf215546Sopenharmony_ci    uint8_t sampler_mask = 0;
1731bf215546Sopenharmony_ci
1732bf215546Sopenharmony_ci    assert(sizeof(key) <= sizeof(key.value32));
1733bf215546Sopenharmony_ci
1734bf215546Sopenharmony_ci    memset(&key, 0, sizeof(key));
1735bf215546Sopenharmony_ci    for (s = 0; s < 8; ++s) {
1736bf215546Sopenharmony_ci        key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP];
1737bf215546Sopenharmony_ci        key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP];
1738bf215546Sopenharmony_ci        const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1739bf215546Sopenharmony_ci        const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1740bf215546Sopenharmony_ci        /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.
1741bf215546Sopenharmony_ci         * ALPHAOP cannot be enabled if COLOROP is disabled.
1742bf215546Sopenharmony_ci         * Verified on Windows. */
1743bf215546Sopenharmony_ci        if (key.ts[s].colorop == D3DTOP_DISABLE) {
1744bf215546Sopenharmony_ci            key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1745bf215546Sopenharmony_ci            break;
1746bf215546Sopenharmony_ci        }
1747bf215546Sopenharmony_ci
1748bf215546Sopenharmony_ci        if (!context->texture[s].enabled &&
1749bf215546Sopenharmony_ci            ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&
1750bf215546Sopenharmony_ci              used_c & 0x1) ||
1751bf215546Sopenharmony_ci             (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&
1752bf215546Sopenharmony_ci              used_c & 0x2) ||
1753bf215546Sopenharmony_ci             (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&
1754bf215546Sopenharmony_ci              used_c & 0x4))) {
1755bf215546Sopenharmony_ci            /* Tested on Windows: Invalid texture read disables the stage
1756bf215546Sopenharmony_ci             * and the subsequent ones, but only for colorop. For alpha,
1757bf215546Sopenharmony_ci             * it's as if the texture had alpha of 1.0, which is what
1758bf215546Sopenharmony_ci             * has our dummy texture in that case. Invalid color also
1759bf215546Sopenharmony_ci             * disabled the following alpha stages. */
1760bf215546Sopenharmony_ci            key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1761bf215546Sopenharmony_ci            break;
1762bf215546Sopenharmony_ci        }
1763bf215546Sopenharmony_ci
1764bf215546Sopenharmony_ci        if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
1765bf215546Sopenharmony_ci            context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
1766bf215546Sopenharmony_ci            context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
1767bf215546Sopenharmony_ci            context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
1768bf215546Sopenharmony_ci            context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
1769bf215546Sopenharmony_ci            context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
1770bf215546Sopenharmony_ci            sampler_mask |= (1 << s);
1771bf215546Sopenharmony_ci
1772bf215546Sopenharmony_ci        if (key.ts[s].colorop != D3DTOP_DISABLE) {
1773bf215546Sopenharmony_ci            if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7;
1774bf215546Sopenharmony_ci            if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7;
1775bf215546Sopenharmony_ci            if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7;
1776bf215546Sopenharmony_ci            if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s;
1777bf215546Sopenharmony_ci            if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s;
1778bf215546Sopenharmony_ci            if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s;
1779bf215546Sopenharmony_ci            if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s;
1780bf215546Sopenharmony_ci            if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s;
1781bf215546Sopenharmony_ci            if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s;
1782bf215546Sopenharmony_ci        }
1783bf215546Sopenharmony_ci        if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1784bf215546Sopenharmony_ci            if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7;
1785bf215546Sopenharmony_ci            if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7;
1786bf215546Sopenharmony_ci            if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7;
1787bf215546Sopenharmony_ci            if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s;
1788bf215546Sopenharmony_ci            if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s;
1789bf215546Sopenharmony_ci            if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s;
1790bf215546Sopenharmony_ci        }
1791bf215546Sopenharmony_ci        key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1792bf215546Sopenharmony_ci
1793bf215546Sopenharmony_ci        if (context->texture[s].enabled) {
1794bf215546Sopenharmony_ci            switch (context->texture[s].type) {
1795bf215546Sopenharmony_ci            case D3DRTYPE_TEXTURE:       key.ts[s].textarget = 1; break;
1796bf215546Sopenharmony_ci            case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1797bf215546Sopenharmony_ci            case D3DRTYPE_CUBETEXTURE:   key.ts[s].textarget = 3; break;
1798bf215546Sopenharmony_ci            default:
1799bf215546Sopenharmony_ci                assert(!"unexpected texture type");
1800bf215546Sopenharmony_ci                break;
1801bf215546Sopenharmony_ci            }
1802bf215546Sopenharmony_ci        } else {
1803bf215546Sopenharmony_ci            key.ts[s].textarget = 1;
1804bf215546Sopenharmony_ci        }
1805bf215546Sopenharmony_ci    }
1806bf215546Sopenharmony_ci
1807bf215546Sopenharmony_ci    /* Note: If colorop is D3DTOP_DISABLE for the first stage
1808bf215546Sopenharmony_ci     * (which implies alphaop is too), nothing particular happens,
1809bf215546Sopenharmony_ci     * that is, current is equal to diffuse (which is the case anyway,
1810bf215546Sopenharmony_ci     * because it is how it is initialized).
1811bf215546Sopenharmony_ci     * Special case seems if alphaop is D3DTOP_DISABLE and not colorop,
1812bf215546Sopenharmony_ci     * because then if the resultarg is TEMP, then diffuse alpha is written
1813bf215546Sopenharmony_ci     * to it. */
1814bf215546Sopenharmony_ci    if (key.ts[0].colorop != D3DTOP_DISABLE &&
1815bf215546Sopenharmony_ci        key.ts[0].alphaop == D3DTOP_DISABLE &&
1816bf215546Sopenharmony_ci        key.ts[0].resultarg != 0) {
1817bf215546Sopenharmony_ci        key.ts[0].alphaop = D3DTOP_SELECTARG1;
1818bf215546Sopenharmony_ci        key.ts[0].alphaarg1 = D3DTA_DIFFUSE;
1819bf215546Sopenharmony_ci    }
1820bf215546Sopenharmony_ci    /* When no alpha stage writes to current, diffuse alpha is taken.
1821bf215546Sopenharmony_ci     * Since we initialize current to diffuse, we have the behaviour. */
1822bf215546Sopenharmony_ci
1823bf215546Sopenharmony_ci    /* Last stage always writes to Current */
1824bf215546Sopenharmony_ci    if (s >= 1)
1825bf215546Sopenharmony_ci        key.ts[s-1].resultarg = 0;
1826bf215546Sopenharmony_ci
1827bf215546Sopenharmony_ci    key.projected = nine_ff_get_projected_key_ff(context);
1828bf215546Sopenharmony_ci    key.specular = !!context->rs[D3DRS_SPECULARENABLE];
1829bf215546Sopenharmony_ci
1830bf215546Sopenharmony_ci    for (; s < 8; ++s)
1831bf215546Sopenharmony_ci        key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1832bf215546Sopenharmony_ci    if (context->rs[D3DRS_FOGENABLE])
1833bf215546Sopenharmony_ci        key.fog_mode = context->rs[D3DRS_FOGTABLEMODE];
1834bf215546Sopenharmony_ci    key.fog = !!context->rs[D3DRS_FOGENABLE];
1835bf215546Sopenharmony_ci    /* Pixel fog (with WFOG advertised): source is either Z or W.
1836bf215546Sopenharmony_ci     * W is the source if vs ff is used, and the
1837bf215546Sopenharmony_ci     * projection matrix is not orthogonal.
1838bf215546Sopenharmony_ci     * Tests on Win 10 seem to indicate _34
1839bf215546Sopenharmony_ci     * and _33 are checked against 0, 1. */
1840bf215546Sopenharmony_ci    if (key.fog_mode && key.fog)
1841bf215546Sopenharmony_ci        key.fog_source = !context->programmable_vs &&
1842bf215546Sopenharmony_ci            !(projection_matrix->_34 == 0.0f &&
1843bf215546Sopenharmony_ci              projection_matrix->_44 == 1.0f);
1844bf215546Sopenharmony_ci
1845bf215546Sopenharmony_ci    DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key));
1846bf215546Sopenharmony_ci    ps = util_hash_table_get(device->ff.ht_ps, &key);
1847bf215546Sopenharmony_ci    if (ps)
1848bf215546Sopenharmony_ci        return ps;
1849bf215546Sopenharmony_ci    NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1850bf215546Sopenharmony_ci
1851bf215546Sopenharmony_ci    nine_ff_prune_ps(device);
1852bf215546Sopenharmony_ci    if (ps) {
1853bf215546Sopenharmony_ci        memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1854bf215546Sopenharmony_ci
1855bf215546Sopenharmony_ci        _mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps);
1856bf215546Sopenharmony_ci        device->ff.num_ps++;
1857bf215546Sopenharmony_ci
1858bf215546Sopenharmony_ci        ps->rt_mask = 0x1;
1859bf215546Sopenharmony_ci        ps->sampler_mask = sampler_mask;
1860bf215546Sopenharmony_ci    }
1861bf215546Sopenharmony_ci    return ps;
1862bf215546Sopenharmony_ci}
1863bf215546Sopenharmony_ci
1864bf215546Sopenharmony_cistatic void
1865bf215546Sopenharmony_cinine_ff_load_vs_transforms(struct NineDevice9 *device)
1866bf215546Sopenharmony_ci{
1867bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
1868bf215546Sopenharmony_ci    D3DMATRIX T;
1869bf215546Sopenharmony_ci    D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1870bf215546Sopenharmony_ci    unsigned i;
1871bf215546Sopenharmony_ci
1872bf215546Sopenharmony_ci    /* TODO: make this nicer, and only upload the ones we need */
1873bf215546Sopenharmony_ci    /* TODO: use ff.vs_const as storage of W, V, P matrices */
1874bf215546Sopenharmony_ci
1875bf215546Sopenharmony_ci    if (IS_D3DTS_DIRTY(context, WORLD) ||
1876bf215546Sopenharmony_ci        IS_D3DTS_DIRTY(context, VIEW) ||
1877bf215546Sopenharmony_ci        IS_D3DTS_DIRTY(context, PROJECTION)) {
1878bf215546Sopenharmony_ci        /* WVP, WV matrices */
1879bf215546Sopenharmony_ci        nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1880bf215546Sopenharmony_ci        nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1881bf215546Sopenharmony_ci
1882bf215546Sopenharmony_ci        /* normal matrix == transpose(inverse(WV)) */
1883bf215546Sopenharmony_ci        nine_d3d_matrix_inverse(&T, &M[1]);
1884bf215546Sopenharmony_ci        nine_d3d_matrix_transpose(&M[4], &T);
1885bf215546Sopenharmony_ci
1886bf215546Sopenharmony_ci        /* P matrix */
1887bf215546Sopenharmony_ci        M[2] = *GET_D3DTS(PROJECTION);
1888bf215546Sopenharmony_ci
1889bf215546Sopenharmony_ci        /* V and W matrix */
1890bf215546Sopenharmony_ci        nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW));
1891bf215546Sopenharmony_ci        M[40] = M[1];
1892bf215546Sopenharmony_ci    }
1893bf215546Sopenharmony_ci
1894bf215546Sopenharmony_ci    if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1895bf215546Sopenharmony_ci        /* load other world matrices */
1896bf215546Sopenharmony_ci        for (i = 1; i <= 8; ++i) {
1897bf215546Sopenharmony_ci            nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1898bf215546Sopenharmony_ci        }
1899bf215546Sopenharmony_ci    }
1900bf215546Sopenharmony_ci
1901bf215546Sopenharmony_ci    device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]);
1902bf215546Sopenharmony_ci}
1903bf215546Sopenharmony_ci
1904bf215546Sopenharmony_cistatic void
1905bf215546Sopenharmony_cinine_ff_load_lights(struct NineDevice9 *device)
1906bf215546Sopenharmony_ci{
1907bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
1908bf215546Sopenharmony_ci    struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1909bf215546Sopenharmony_ci    unsigned l;
1910bf215546Sopenharmony_ci
1911bf215546Sopenharmony_ci    if (context->changed.group & NINE_STATE_FF_MATERIAL) {
1912bf215546Sopenharmony_ci        const D3DMATERIAL9 *mtl = &context->ff.material;
1913bf215546Sopenharmony_ci
1914bf215546Sopenharmony_ci        memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1915bf215546Sopenharmony_ci        memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1916bf215546Sopenharmony_ci        memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1917bf215546Sopenharmony_ci        dst[23].x = mtl->Power;
1918bf215546Sopenharmony_ci        memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1919bf215546Sopenharmony_ci        d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]);
1920bf215546Sopenharmony_ci        dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1921bf215546Sopenharmony_ci        dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1922bf215546Sopenharmony_ci        dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1923bf215546Sopenharmony_ci    }
1924bf215546Sopenharmony_ci
1925bf215546Sopenharmony_ci    if (!(context->changed.group & NINE_STATE_FF_LIGHTING))
1926bf215546Sopenharmony_ci        return;
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_ci    for (l = 0; l < context->ff.num_lights_active; ++l) {
1929bf215546Sopenharmony_ci        const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]];
1930bf215546Sopenharmony_ci
1931bf215546Sopenharmony_ci        dst[32 + l * 8].x = light->Type;
1932bf215546Sopenharmony_ci        dst[32 + l * 8].y = light->Attenuation0;
1933bf215546Sopenharmony_ci        dst[32 + l * 8].z = light->Attenuation1;
1934bf215546Sopenharmony_ci        dst[32 + l * 8].w = light->Attenuation2;
1935bf215546Sopenharmony_ci        memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1936bf215546Sopenharmony_ci        memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1937bf215546Sopenharmony_ci        memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1938bf215546Sopenharmony_ci        nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1939bf215546Sopenharmony_ci        nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1940bf215546Sopenharmony_ci        dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1941bf215546Sopenharmony_ci        dst[37 + l * 8].w = light->Falloff;
1942bf215546Sopenharmony_ci        dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1943bf215546Sopenharmony_ci        dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1944bf215546Sopenharmony_ci        dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1945bf215546Sopenharmony_ci        dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active);
1946bf215546Sopenharmony_ci    }
1947bf215546Sopenharmony_ci}
1948bf215546Sopenharmony_ci
1949bf215546Sopenharmony_cistatic void
1950bf215546Sopenharmony_cinine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1951bf215546Sopenharmony_ci{
1952bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
1953bf215546Sopenharmony_ci    struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1954bf215546Sopenharmony_ci
1955bf215546Sopenharmony_ci    if (!(context->changed.group & NINE_STATE_FF_VS_OTHER))
1956bf215546Sopenharmony_ci        return;
1957bf215546Sopenharmony_ci    dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]);
1958bf215546Sopenharmony_ci    dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]);
1959bf215546Sopenharmony_ci    dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]);
1960bf215546Sopenharmony_ci    dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]);
1961bf215546Sopenharmony_ci    dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]);
1962bf215546Sopenharmony_ci    dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]);
1963bf215546Sopenharmony_ci    dst[28].x = asfloat(context->rs[D3DRS_FOGEND]);
1964bf215546Sopenharmony_ci    dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
1965bf215546Sopenharmony_ci    if (isinf(dst[28].y))
1966bf215546Sopenharmony_ci        dst[28].y = 0.0f;
1967bf215546Sopenharmony_ci    dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
1968bf215546Sopenharmony_ci}
1969bf215546Sopenharmony_ci
1970bf215546Sopenharmony_cistatic void
1971bf215546Sopenharmony_cinine_ff_load_tex_matrices(struct NineDevice9 *device)
1972bf215546Sopenharmony_ci{
1973bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
1974bf215546Sopenharmony_ci    D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1975bf215546Sopenharmony_ci    unsigned s;
1976bf215546Sopenharmony_ci
1977bf215546Sopenharmony_ci    if (!(context->ff.changed.transform[0] & 0xff0000))
1978bf215546Sopenharmony_ci        return;
1979bf215546Sopenharmony_ci    for (s = 0; s < 8; ++s) {
1980bf215546Sopenharmony_ci        if (IS_D3DTS_DIRTY(context, TEXTURE0 + s))
1981bf215546Sopenharmony_ci            nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE));
1982bf215546Sopenharmony_ci    }
1983bf215546Sopenharmony_ci}
1984bf215546Sopenharmony_ci
1985bf215546Sopenharmony_cistatic void
1986bf215546Sopenharmony_cinine_ff_load_ps_params(struct NineDevice9 *device)
1987bf215546Sopenharmony_ci{
1988bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
1989bf215546Sopenharmony_ci    struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1990bf215546Sopenharmony_ci    unsigned s;
1991bf215546Sopenharmony_ci
1992bf215546Sopenharmony_ci    if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS))
1993bf215546Sopenharmony_ci        return;
1994bf215546Sopenharmony_ci
1995bf215546Sopenharmony_ci    for (s = 0; s < 8; ++s)
1996bf215546Sopenharmony_ci        d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]);
1997bf215546Sopenharmony_ci
1998bf215546Sopenharmony_ci    for (s = 0; s < 8; ++s) {
1999bf215546Sopenharmony_ci        dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
2000bf215546Sopenharmony_ci        dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
2001bf215546Sopenharmony_ci        dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
2002bf215546Sopenharmony_ci        dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
2003bf215546Sopenharmony_ci        if (s & 1) {
2004bf215546Sopenharmony_ci            dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2005bf215546Sopenharmony_ci            dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2006bf215546Sopenharmony_ci        } else {
2007bf215546Sopenharmony_ci            dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2008bf215546Sopenharmony_ci            dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2009bf215546Sopenharmony_ci        }
2010bf215546Sopenharmony_ci    }
2011bf215546Sopenharmony_ci
2012bf215546Sopenharmony_ci    d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]);
2013bf215546Sopenharmony_ci    d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]);
2014bf215546Sopenharmony_ci    dst[22].x = asfloat(context->rs[D3DRS_FOGEND]);
2015bf215546Sopenharmony_ci    dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
2016bf215546Sopenharmony_ci    dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
2017bf215546Sopenharmony_ci}
2018bf215546Sopenharmony_ci
2019bf215546Sopenharmony_cistatic void
2020bf215546Sopenharmony_cinine_ff_load_viewport_info(struct NineDevice9 *device)
2021bf215546Sopenharmony_ci{
2022bf215546Sopenharmony_ci    D3DVIEWPORT9 *viewport = &device->context.viewport;
2023bf215546Sopenharmony_ci    struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
2024bf215546Sopenharmony_ci    float diffZ = viewport->MaxZ - viewport->MinZ;
2025bf215546Sopenharmony_ci
2026bf215546Sopenharmony_ci    /* Note: the other functions avoids to fill the const again if nothing changed.
2027bf215546Sopenharmony_ci     * But we don't have much to fill, and adding code to allow that may be complex
2028bf215546Sopenharmony_ci     * so just fill it always */
2029bf215546Sopenharmony_ci    dst[100].x = 2.0f / (float)(viewport->Width);
2030bf215546Sopenharmony_ci    dst[100].y = 2.0f / (float)(viewport->Height);
2031bf215546Sopenharmony_ci    dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
2032bf215546Sopenharmony_ci    dst[100].w = (float)(viewport->Width);
2033bf215546Sopenharmony_ci    dst[101].x = (float)(viewport->X);
2034bf215546Sopenharmony_ci    dst[101].y = (float)(viewport->Y);
2035bf215546Sopenharmony_ci    dst[101].z = (float)(viewport->MinZ);
2036bf215546Sopenharmony_ci}
2037bf215546Sopenharmony_ci
2038bf215546Sopenharmony_civoid
2039bf215546Sopenharmony_cinine_ff_update(struct NineDevice9 *device)
2040bf215546Sopenharmony_ci{
2041bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
2042bf215546Sopenharmony_ci    struct pipe_constant_buffer cb;
2043bf215546Sopenharmony_ci
2044bf215546Sopenharmony_ci    DBG("vs=%p ps=%p\n", context->vs, context->ps);
2045bf215546Sopenharmony_ci
2046bf215546Sopenharmony_ci    /* NOTE: the only reference belongs to the hash table */
2047bf215546Sopenharmony_ci    if (!context->programmable_vs) {
2048bf215546Sopenharmony_ci        device->ff.vs = nine_ff_get_vs(device);
2049bf215546Sopenharmony_ci        context->changed.group |= NINE_STATE_VS;
2050bf215546Sopenharmony_ci    }
2051bf215546Sopenharmony_ci    if (!context->ps) {
2052bf215546Sopenharmony_ci        device->ff.ps = nine_ff_get_ps(device);
2053bf215546Sopenharmony_ci        context->changed.group |= NINE_STATE_PS;
2054bf215546Sopenharmony_ci    }
2055bf215546Sopenharmony_ci
2056bf215546Sopenharmony_ci    if (!context->programmable_vs) {
2057bf215546Sopenharmony_ci        nine_ff_load_vs_transforms(device);
2058bf215546Sopenharmony_ci        nine_ff_load_tex_matrices(device);
2059bf215546Sopenharmony_ci        nine_ff_load_lights(device);
2060bf215546Sopenharmony_ci        nine_ff_load_point_and_fog_params(device);
2061bf215546Sopenharmony_ci        nine_ff_load_viewport_info(device);
2062bf215546Sopenharmony_ci
2063bf215546Sopenharmony_ci        memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform));
2064bf215546Sopenharmony_ci
2065bf215546Sopenharmony_ci        cb.buffer_offset = 0;
2066bf215546Sopenharmony_ci        cb.buffer = NULL;
2067bf215546Sopenharmony_ci        cb.user_buffer = device->ff.vs_const;
2068bf215546Sopenharmony_ci        cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
2069bf215546Sopenharmony_ci
2070bf215546Sopenharmony_ci        context->pipe_data.cb_vs_ff = cb;
2071bf215546Sopenharmony_ci        context->commit |= NINE_STATE_COMMIT_CONST_VS;
2072bf215546Sopenharmony_ci
2073bf215546Sopenharmony_ci        context->changed.group &= ~NINE_STATE_FF_VS;
2074bf215546Sopenharmony_ci    }
2075bf215546Sopenharmony_ci
2076bf215546Sopenharmony_ci    if (!context->ps) {
2077bf215546Sopenharmony_ci        nine_ff_load_ps_params(device);
2078bf215546Sopenharmony_ci
2079bf215546Sopenharmony_ci        cb.buffer_offset = 0;
2080bf215546Sopenharmony_ci        cb.buffer = NULL;
2081bf215546Sopenharmony_ci        cb.user_buffer = device->ff.ps_const;
2082bf215546Sopenharmony_ci        cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2083bf215546Sopenharmony_ci
2084bf215546Sopenharmony_ci        context->pipe_data.cb_ps_ff = cb;
2085bf215546Sopenharmony_ci        context->commit |= NINE_STATE_COMMIT_CONST_PS;
2086bf215546Sopenharmony_ci
2087bf215546Sopenharmony_ci        context->changed.group &= ~NINE_STATE_FF_PS;
2088bf215546Sopenharmony_ci    }
2089bf215546Sopenharmony_ci}
2090bf215546Sopenharmony_ci
2091bf215546Sopenharmony_ci
2092bf215546Sopenharmony_ciboolean
2093bf215546Sopenharmony_cinine_ff_init(struct NineDevice9 *device)
2094bf215546Sopenharmony_ci{
2095bf215546Sopenharmony_ci    device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash,
2096bf215546Sopenharmony_ci                                               nine_ff_vs_key_comp);
2097bf215546Sopenharmony_ci    device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash,
2098bf215546Sopenharmony_ci                                               nine_ff_ps_key_comp);
2099bf215546Sopenharmony_ci
2100bf215546Sopenharmony_ci    device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash,
2101bf215546Sopenharmony_ci                                                nine_ff_fvf_key_comp);
2102bf215546Sopenharmony_ci
2103bf215546Sopenharmony_ci    device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2104bf215546Sopenharmony_ci    device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2105bf215546Sopenharmony_ci
2106bf215546Sopenharmony_ci    return device->ff.ht_vs && device->ff.ht_ps &&
2107bf215546Sopenharmony_ci        device->ff.ht_fvf &&
2108bf215546Sopenharmony_ci        device->ff.vs_const && device->ff.ps_const;
2109bf215546Sopenharmony_ci}
2110bf215546Sopenharmony_ci
2111bf215546Sopenharmony_cistatic enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2112bf215546Sopenharmony_ci{
2113bf215546Sopenharmony_ci    NineUnknown_Unbind(NineUnknown(value));
2114bf215546Sopenharmony_ci    return PIPE_OK;
2115bf215546Sopenharmony_ci}
2116bf215546Sopenharmony_ci
2117bf215546Sopenharmony_civoid
2118bf215546Sopenharmony_cinine_ff_fini(struct NineDevice9 *device)
2119bf215546Sopenharmony_ci{
2120bf215546Sopenharmony_ci    if (device->ff.ht_vs) {
2121bf215546Sopenharmony_ci        util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2122bf215546Sopenharmony_ci        _mesa_hash_table_destroy(device->ff.ht_vs, NULL);
2123bf215546Sopenharmony_ci    }
2124bf215546Sopenharmony_ci    if (device->ff.ht_ps) {
2125bf215546Sopenharmony_ci        util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2126bf215546Sopenharmony_ci        _mesa_hash_table_destroy(device->ff.ht_ps, NULL);
2127bf215546Sopenharmony_ci    }
2128bf215546Sopenharmony_ci    if (device->ff.ht_fvf) {
2129bf215546Sopenharmony_ci        util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2130bf215546Sopenharmony_ci        _mesa_hash_table_destroy(device->ff.ht_fvf, NULL);
2131bf215546Sopenharmony_ci    }
2132bf215546Sopenharmony_ci    device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2133bf215546Sopenharmony_ci    device->ff.ps = NULL;
2134bf215546Sopenharmony_ci
2135bf215546Sopenharmony_ci    FREE(device->ff.vs_const);
2136bf215546Sopenharmony_ci    FREE(device->ff.ps_const);
2137bf215546Sopenharmony_ci}
2138bf215546Sopenharmony_ci
2139bf215546Sopenharmony_cistatic void
2140bf215546Sopenharmony_cinine_ff_prune_vs(struct NineDevice9 *device)
2141bf215546Sopenharmony_ci{
2142bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
2143bf215546Sopenharmony_ci
2144bf215546Sopenharmony_ci    if (device->ff.num_vs > 1024) {
2145bf215546Sopenharmony_ci        /* could destroy the bound one here, so unbind */
2146bf215546Sopenharmony_ci        context->pipe->bind_vs_state(context->pipe, NULL);
2147bf215546Sopenharmony_ci        util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2148bf215546Sopenharmony_ci        _mesa_hash_table_clear(device->ff.ht_vs, NULL);
2149bf215546Sopenharmony_ci        device->ff.num_vs = 0;
2150bf215546Sopenharmony_ci        context->changed.group |= NINE_STATE_VS;
2151bf215546Sopenharmony_ci    }
2152bf215546Sopenharmony_ci}
2153bf215546Sopenharmony_cistatic void
2154bf215546Sopenharmony_cinine_ff_prune_ps(struct NineDevice9 *device)
2155bf215546Sopenharmony_ci{
2156bf215546Sopenharmony_ci    struct nine_context *context = &device->context;
2157bf215546Sopenharmony_ci
2158bf215546Sopenharmony_ci    if (device->ff.num_ps > 1024) {
2159bf215546Sopenharmony_ci        /* could destroy the bound one here, so unbind */
2160bf215546Sopenharmony_ci        context->pipe->bind_fs_state(context->pipe, NULL);
2161bf215546Sopenharmony_ci        util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2162bf215546Sopenharmony_ci        _mesa_hash_table_clear(device->ff.ht_ps, NULL);
2163bf215546Sopenharmony_ci        device->ff.num_ps = 0;
2164bf215546Sopenharmony_ci        context->changed.group |= NINE_STATE_PS;
2165bf215546Sopenharmony_ci    }
2166bf215546Sopenharmony_ci}
2167bf215546Sopenharmony_ci
2168bf215546Sopenharmony_ci/* ========================================================================== */
2169bf215546Sopenharmony_ci
2170bf215546Sopenharmony_ci/* Matrix multiplication:
2171bf215546Sopenharmony_ci *
2172bf215546Sopenharmony_ci * in memory: 0 1 2 3 (row major)
2173bf215546Sopenharmony_ci *            4 5 6 7
2174bf215546Sopenharmony_ci *            8 9 a b
2175bf215546Sopenharmony_ci *            c d e f
2176bf215546Sopenharmony_ci *
2177bf215546Sopenharmony_ci *    cA cB cC cD
2178bf215546Sopenharmony_ci * r0             = (r0 * cA) (r0 * cB) . .
2179bf215546Sopenharmony_ci * r1             = (r1 * cA) (r1 * cB)
2180bf215546Sopenharmony_ci * r2             = (r2 * cA) .
2181bf215546Sopenharmony_ci * r3             = (r3 * cA) .
2182bf215546Sopenharmony_ci *
2183bf215546Sopenharmony_ci *               r: (11) (12) (13) (14)
2184bf215546Sopenharmony_ci *                  (21) (22) (23) (24)
2185bf215546Sopenharmony_ci *                  (31) (32) (33) (34)
2186bf215546Sopenharmony_ci *                  (41) (42) (43) (44)
2187bf215546Sopenharmony_ci * l: (11 12 13 14)
2188bf215546Sopenharmony_ci *    (21 22 23 24)
2189bf215546Sopenharmony_ci *    (31 32 33 34)
2190bf215546Sopenharmony_ci *    (41 42 43 44)
2191bf215546Sopenharmony_ci *
2192bf215546Sopenharmony_ci * v: (x  y  z  1 )
2193bf215546Sopenharmony_ci *
2194bf215546Sopenharmony_ci * t.xyzw = MUL(v.xxxx, r[0]);
2195bf215546Sopenharmony_ci * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2196bf215546Sopenharmony_ci * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2197bf215546Sopenharmony_ci * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2198bf215546Sopenharmony_ci *
2199bf215546Sopenharmony_ci * v.x = DP4(v, c[0]);
2200bf215546Sopenharmony_ci * v.y = DP4(v, c[1]);
2201bf215546Sopenharmony_ci * v.z = DP4(v, c[2]);
2202bf215546Sopenharmony_ci * v.w = DP4(v, c[3]) = 1
2203bf215546Sopenharmony_ci */
2204bf215546Sopenharmony_ci
2205bf215546Sopenharmony_ci/*
2206bf215546Sopenharmony_cistatic void
2207bf215546Sopenharmony_cinine_D3DMATRIX_print(const D3DMATRIX *M)
2208bf215546Sopenharmony_ci{
2209bf215546Sopenharmony_ci    DBG("\n(%f %f %f %f)\n"
2210bf215546Sopenharmony_ci        "(%f %f %f %f)\n"
2211bf215546Sopenharmony_ci        "(%f %f %f %f)\n"
2212bf215546Sopenharmony_ci        "(%f %f %f %f)\n",
2213bf215546Sopenharmony_ci        M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2214bf215546Sopenharmony_ci        M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2215bf215546Sopenharmony_ci        M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2216bf215546Sopenharmony_ci        M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2217bf215546Sopenharmony_ci}
2218bf215546Sopenharmony_ci*/
2219bf215546Sopenharmony_ci
2220bf215546Sopenharmony_cistatic inline float
2221bf215546Sopenharmony_cinine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2222bf215546Sopenharmony_ci{
2223bf215546Sopenharmony_ci    return A->m[r][0] * B->m[0][c] +
2224bf215546Sopenharmony_ci           A->m[r][1] * B->m[1][c] +
2225bf215546Sopenharmony_ci           A->m[r][2] * B->m[2][c] +
2226bf215546Sopenharmony_ci           A->m[r][3] * B->m[3][c];
2227bf215546Sopenharmony_ci}
2228bf215546Sopenharmony_ci
2229bf215546Sopenharmony_cistatic inline float
2230bf215546Sopenharmony_cinine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2231bf215546Sopenharmony_ci{
2232bf215546Sopenharmony_ci    return v->x * M->m[0][c] +
2233bf215546Sopenharmony_ci           v->y * M->m[1][c] +
2234bf215546Sopenharmony_ci           v->z * M->m[2][c] +
2235bf215546Sopenharmony_ci           1.0f * M->m[3][c];
2236bf215546Sopenharmony_ci}
2237bf215546Sopenharmony_ci
2238bf215546Sopenharmony_cistatic inline float
2239bf215546Sopenharmony_cinine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2240bf215546Sopenharmony_ci{
2241bf215546Sopenharmony_ci    return v->x * M->m[0][c] +
2242bf215546Sopenharmony_ci           v->y * M->m[1][c] +
2243bf215546Sopenharmony_ci           v->z * M->m[2][c];
2244bf215546Sopenharmony_ci}
2245bf215546Sopenharmony_ci
2246bf215546Sopenharmony_civoid
2247bf215546Sopenharmony_cinine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2248bf215546Sopenharmony_ci{
2249bf215546Sopenharmony_ci    D->_11 = nine_DP4_row_col(L, 0, R, 0);
2250bf215546Sopenharmony_ci    D->_12 = nine_DP4_row_col(L, 0, R, 1);
2251bf215546Sopenharmony_ci    D->_13 = nine_DP4_row_col(L, 0, R, 2);
2252bf215546Sopenharmony_ci    D->_14 = nine_DP4_row_col(L, 0, R, 3);
2253bf215546Sopenharmony_ci
2254bf215546Sopenharmony_ci    D->_21 = nine_DP4_row_col(L, 1, R, 0);
2255bf215546Sopenharmony_ci    D->_22 = nine_DP4_row_col(L, 1, R, 1);
2256bf215546Sopenharmony_ci    D->_23 = nine_DP4_row_col(L, 1, R, 2);
2257bf215546Sopenharmony_ci    D->_24 = nine_DP4_row_col(L, 1, R, 3);
2258bf215546Sopenharmony_ci
2259bf215546Sopenharmony_ci    D->_31 = nine_DP4_row_col(L, 2, R, 0);
2260bf215546Sopenharmony_ci    D->_32 = nine_DP4_row_col(L, 2, R, 1);
2261bf215546Sopenharmony_ci    D->_33 = nine_DP4_row_col(L, 2, R, 2);
2262bf215546Sopenharmony_ci    D->_34 = nine_DP4_row_col(L, 2, R, 3);
2263bf215546Sopenharmony_ci
2264bf215546Sopenharmony_ci    D->_41 = nine_DP4_row_col(L, 3, R, 0);
2265bf215546Sopenharmony_ci    D->_42 = nine_DP4_row_col(L, 3, R, 1);
2266bf215546Sopenharmony_ci    D->_43 = nine_DP4_row_col(L, 3, R, 2);
2267bf215546Sopenharmony_ci    D->_44 = nine_DP4_row_col(L, 3, R, 3);
2268bf215546Sopenharmony_ci}
2269bf215546Sopenharmony_ci
2270bf215546Sopenharmony_civoid
2271bf215546Sopenharmony_cinine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2272bf215546Sopenharmony_ci{
2273bf215546Sopenharmony_ci    d->x = nine_DP4_vec_col(v, M, 0);
2274bf215546Sopenharmony_ci    d->y = nine_DP4_vec_col(v, M, 1);
2275bf215546Sopenharmony_ci    d->z = nine_DP4_vec_col(v, M, 2);
2276bf215546Sopenharmony_ci}
2277bf215546Sopenharmony_ci
2278bf215546Sopenharmony_civoid
2279bf215546Sopenharmony_cinine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2280bf215546Sopenharmony_ci{
2281bf215546Sopenharmony_ci    d->x = nine_DP3_vec_col(v, M, 0);
2282bf215546Sopenharmony_ci    d->y = nine_DP3_vec_col(v, M, 1);
2283bf215546Sopenharmony_ci    d->z = nine_DP3_vec_col(v, M, 2);
2284bf215546Sopenharmony_ci}
2285bf215546Sopenharmony_ci
2286bf215546Sopenharmony_civoid
2287bf215546Sopenharmony_cinine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2288bf215546Sopenharmony_ci{
2289bf215546Sopenharmony_ci    unsigned i, j;
2290bf215546Sopenharmony_ci    for (i = 0; i < 4; ++i)
2291bf215546Sopenharmony_ci    for (j = 0; j < 4; ++j)
2292bf215546Sopenharmony_ci        D->m[i][j] = M->m[j][i];
2293bf215546Sopenharmony_ci}
2294bf215546Sopenharmony_ci
2295bf215546Sopenharmony_ci#define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do {            \
2296bf215546Sopenharmony_ci    float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2297bf215546Sopenharmony_ci    if (t > 0.0f) pos += t; else neg += t; } while(0)
2298bf215546Sopenharmony_ci
2299bf215546Sopenharmony_ci#define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do {            \
2300bf215546Sopenharmony_ci    float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2301bf215546Sopenharmony_ci    if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2302bf215546Sopenharmony_cifloat
2303bf215546Sopenharmony_cinine_d3d_matrix_det(const D3DMATRIX *M)
2304bf215546Sopenharmony_ci{
2305bf215546Sopenharmony_ci    float pos = 0.0f;
2306bf215546Sopenharmony_ci    float neg = 0.0f;
2307bf215546Sopenharmony_ci
2308bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2309bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2310bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2311bf215546Sopenharmony_ci
2312bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2313bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2314bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2315bf215546Sopenharmony_ci
2316bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2317bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2318bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2319bf215546Sopenharmony_ci
2320bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2321bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2322bf215546Sopenharmony_ci    _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2323bf215546Sopenharmony_ci
2324bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2325bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2326bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2327bf215546Sopenharmony_ci
2328bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2329bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2330bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2331bf215546Sopenharmony_ci
2332bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2333bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2334bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2335bf215546Sopenharmony_ci
2336bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2337bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2338bf215546Sopenharmony_ci    _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2339bf215546Sopenharmony_ci
2340bf215546Sopenharmony_ci    return pos + neg;
2341bf215546Sopenharmony_ci}
2342bf215546Sopenharmony_ci
2343bf215546Sopenharmony_ci/* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2344bf215546Sopenharmony_ci * I have no idea where this code came from.
2345bf215546Sopenharmony_ci */
2346bf215546Sopenharmony_civoid
2347bf215546Sopenharmony_cinine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2348bf215546Sopenharmony_ci{
2349bf215546Sopenharmony_ci    int i, k;
2350bf215546Sopenharmony_ci    float det;
2351bf215546Sopenharmony_ci
2352bf215546Sopenharmony_ci    D->m[0][0] =
2353bf215546Sopenharmony_ci        M->m[1][1] * M->m[2][2] * M->m[3][3] -
2354bf215546Sopenharmony_ci        M->m[1][1] * M->m[3][2] * M->m[2][3] -
2355bf215546Sopenharmony_ci        M->m[1][2] * M->m[2][1] * M->m[3][3] +
2356bf215546Sopenharmony_ci        M->m[1][2] * M->m[3][1] * M->m[2][3] +
2357bf215546Sopenharmony_ci        M->m[1][3] * M->m[2][1] * M->m[3][2] -
2358bf215546Sopenharmony_ci        M->m[1][3] * M->m[3][1] * M->m[2][2];
2359bf215546Sopenharmony_ci
2360bf215546Sopenharmony_ci    D->m[0][1] =
2361bf215546Sopenharmony_ci       -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2362bf215546Sopenharmony_ci        M->m[0][1] * M->m[3][2] * M->m[2][3] +
2363bf215546Sopenharmony_ci        M->m[0][2] * M->m[2][1] * M->m[3][3] -
2364bf215546Sopenharmony_ci        M->m[0][2] * M->m[3][1] * M->m[2][3] -
2365bf215546Sopenharmony_ci        M->m[0][3] * M->m[2][1] * M->m[3][2] +
2366bf215546Sopenharmony_ci        M->m[0][3] * M->m[3][1] * M->m[2][2];
2367bf215546Sopenharmony_ci
2368bf215546Sopenharmony_ci    D->m[0][2] =
2369bf215546Sopenharmony_ci        M->m[0][1] * M->m[1][2] * M->m[3][3] -
2370bf215546Sopenharmony_ci        M->m[0][1] * M->m[3][2] * M->m[1][3] -
2371bf215546Sopenharmony_ci        M->m[0][2] * M->m[1][1] * M->m[3][3] +
2372bf215546Sopenharmony_ci        M->m[0][2] * M->m[3][1] * M->m[1][3] +
2373bf215546Sopenharmony_ci        M->m[0][3] * M->m[1][1] * M->m[3][2] -
2374bf215546Sopenharmony_ci        M->m[0][3] * M->m[3][1] * M->m[1][2];
2375bf215546Sopenharmony_ci
2376bf215546Sopenharmony_ci    D->m[0][3] =
2377bf215546Sopenharmony_ci       -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2378bf215546Sopenharmony_ci        M->m[0][1] * M->m[2][2] * M->m[1][3] +
2379bf215546Sopenharmony_ci        M->m[0][2] * M->m[1][1] * M->m[2][3] -
2380bf215546Sopenharmony_ci        M->m[0][2] * M->m[2][1] * M->m[1][3] -
2381bf215546Sopenharmony_ci        M->m[0][3] * M->m[1][1] * M->m[2][2] +
2382bf215546Sopenharmony_ci        M->m[0][3] * M->m[2][1] * M->m[1][2];
2383bf215546Sopenharmony_ci
2384bf215546Sopenharmony_ci    D->m[1][0] =
2385bf215546Sopenharmony_ci       -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2386bf215546Sopenharmony_ci        M->m[1][0] * M->m[3][2] * M->m[2][3] +
2387bf215546Sopenharmony_ci        M->m[1][2] * M->m[2][0] * M->m[3][3] -
2388bf215546Sopenharmony_ci        M->m[1][2] * M->m[3][0] * M->m[2][3] -
2389bf215546Sopenharmony_ci        M->m[1][3] * M->m[2][0] * M->m[3][2] +
2390bf215546Sopenharmony_ci        M->m[1][3] * M->m[3][0] * M->m[2][2];
2391bf215546Sopenharmony_ci
2392bf215546Sopenharmony_ci    D->m[1][1] =
2393bf215546Sopenharmony_ci        M->m[0][0] * M->m[2][2] * M->m[3][3] -
2394bf215546Sopenharmony_ci        M->m[0][0] * M->m[3][2] * M->m[2][3] -
2395bf215546Sopenharmony_ci        M->m[0][2] * M->m[2][0] * M->m[3][3] +
2396bf215546Sopenharmony_ci        M->m[0][2] * M->m[3][0] * M->m[2][3] +
2397bf215546Sopenharmony_ci        M->m[0][3] * M->m[2][0] * M->m[3][2] -
2398bf215546Sopenharmony_ci        M->m[0][3] * M->m[3][0] * M->m[2][2];
2399bf215546Sopenharmony_ci
2400bf215546Sopenharmony_ci    D->m[1][2] =
2401bf215546Sopenharmony_ci       -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2402bf215546Sopenharmony_ci        M->m[0][0] * M->m[3][2] * M->m[1][3] +
2403bf215546Sopenharmony_ci        M->m[0][2] * M->m[1][0] * M->m[3][3] -
2404bf215546Sopenharmony_ci        M->m[0][2] * M->m[3][0] * M->m[1][3] -
2405bf215546Sopenharmony_ci        M->m[0][3] * M->m[1][0] * M->m[3][2] +
2406bf215546Sopenharmony_ci        M->m[0][3] * M->m[3][0] * M->m[1][2];
2407bf215546Sopenharmony_ci
2408bf215546Sopenharmony_ci    D->m[1][3] =
2409bf215546Sopenharmony_ci        M->m[0][0] * M->m[1][2] * M->m[2][3] -
2410bf215546Sopenharmony_ci        M->m[0][0] * M->m[2][2] * M->m[1][3] -
2411bf215546Sopenharmony_ci        M->m[0][2] * M->m[1][0] * M->m[2][3] +
2412bf215546Sopenharmony_ci        M->m[0][2] * M->m[2][0] * M->m[1][3] +
2413bf215546Sopenharmony_ci        M->m[0][3] * M->m[1][0] * M->m[2][2] -
2414bf215546Sopenharmony_ci        M->m[0][3] * M->m[2][0] * M->m[1][2];
2415bf215546Sopenharmony_ci
2416bf215546Sopenharmony_ci    D->m[2][0] =
2417bf215546Sopenharmony_ci        M->m[1][0] * M->m[2][1] * M->m[3][3] -
2418bf215546Sopenharmony_ci        M->m[1][0] * M->m[3][1] * M->m[2][3] -
2419bf215546Sopenharmony_ci        M->m[1][1] * M->m[2][0] * M->m[3][3] +
2420bf215546Sopenharmony_ci        M->m[1][1] * M->m[3][0] * M->m[2][3] +
2421bf215546Sopenharmony_ci        M->m[1][3] * M->m[2][0] * M->m[3][1] -
2422bf215546Sopenharmony_ci        M->m[1][3] * M->m[3][0] * M->m[2][1];
2423bf215546Sopenharmony_ci
2424bf215546Sopenharmony_ci    D->m[2][1] =
2425bf215546Sopenharmony_ci       -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2426bf215546Sopenharmony_ci        M->m[0][0] * M->m[3][1] * M->m[2][3] +
2427bf215546Sopenharmony_ci        M->m[0][1] * M->m[2][0] * M->m[3][3] -
2428bf215546Sopenharmony_ci        M->m[0][1] * M->m[3][0] * M->m[2][3] -
2429bf215546Sopenharmony_ci        M->m[0][3] * M->m[2][0] * M->m[3][1] +
2430bf215546Sopenharmony_ci        M->m[0][3] * M->m[3][0] * M->m[2][1];
2431bf215546Sopenharmony_ci
2432bf215546Sopenharmony_ci    D->m[2][2] =
2433bf215546Sopenharmony_ci        M->m[0][0] * M->m[1][1] * M->m[3][3] -
2434bf215546Sopenharmony_ci        M->m[0][0] * M->m[3][1] * M->m[1][3] -
2435bf215546Sopenharmony_ci        M->m[0][1] * M->m[1][0] * M->m[3][3] +
2436bf215546Sopenharmony_ci        M->m[0][1] * M->m[3][0] * M->m[1][3] +
2437bf215546Sopenharmony_ci        M->m[0][3] * M->m[1][0] * M->m[3][1] -
2438bf215546Sopenharmony_ci        M->m[0][3] * M->m[3][0] * M->m[1][1];
2439bf215546Sopenharmony_ci
2440bf215546Sopenharmony_ci    D->m[2][3] =
2441bf215546Sopenharmony_ci       -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2442bf215546Sopenharmony_ci        M->m[0][0] * M->m[2][1] * M->m[1][3] +
2443bf215546Sopenharmony_ci        M->m[0][1] * M->m[1][0] * M->m[2][3] -
2444bf215546Sopenharmony_ci        M->m[0][1] * M->m[2][0] * M->m[1][3] -
2445bf215546Sopenharmony_ci        M->m[0][3] * M->m[1][0] * M->m[2][1] +
2446bf215546Sopenharmony_ci        M->m[0][3] * M->m[2][0] * M->m[1][1];
2447bf215546Sopenharmony_ci
2448bf215546Sopenharmony_ci    D->m[3][0] =
2449bf215546Sopenharmony_ci       -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2450bf215546Sopenharmony_ci        M->m[1][0] * M->m[3][1] * M->m[2][2] +
2451bf215546Sopenharmony_ci        M->m[1][1] * M->m[2][0] * M->m[3][2] -
2452bf215546Sopenharmony_ci        M->m[1][1] * M->m[3][0] * M->m[2][2] -
2453bf215546Sopenharmony_ci        M->m[1][2] * M->m[2][0] * M->m[3][1] +
2454bf215546Sopenharmony_ci        M->m[1][2] * M->m[3][0] * M->m[2][1];
2455bf215546Sopenharmony_ci
2456bf215546Sopenharmony_ci    D->m[3][1] =
2457bf215546Sopenharmony_ci        M->m[0][0] * M->m[2][1] * M->m[3][2] -
2458bf215546Sopenharmony_ci        M->m[0][0] * M->m[3][1] * M->m[2][2] -
2459bf215546Sopenharmony_ci        M->m[0][1] * M->m[2][0] * M->m[3][2] +
2460bf215546Sopenharmony_ci        M->m[0][1] * M->m[3][0] * M->m[2][2] +
2461bf215546Sopenharmony_ci        M->m[0][2] * M->m[2][0] * M->m[3][1] -
2462bf215546Sopenharmony_ci        M->m[0][2] * M->m[3][0] * M->m[2][1];
2463bf215546Sopenharmony_ci
2464bf215546Sopenharmony_ci    D->m[3][2] =
2465bf215546Sopenharmony_ci       -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2466bf215546Sopenharmony_ci        M->m[0][0] * M->m[3][1] * M->m[1][2] +
2467bf215546Sopenharmony_ci        M->m[0][1] * M->m[1][0] * M->m[3][2] -
2468bf215546Sopenharmony_ci        M->m[0][1] * M->m[3][0] * M->m[1][2] -
2469bf215546Sopenharmony_ci        M->m[0][2] * M->m[1][0] * M->m[3][1] +
2470bf215546Sopenharmony_ci        M->m[0][2] * M->m[3][0] * M->m[1][1];
2471bf215546Sopenharmony_ci
2472bf215546Sopenharmony_ci    D->m[3][3] =
2473bf215546Sopenharmony_ci        M->m[0][0] * M->m[1][1] * M->m[2][2] -
2474bf215546Sopenharmony_ci        M->m[0][0] * M->m[2][1] * M->m[1][2] -
2475bf215546Sopenharmony_ci        M->m[0][1] * M->m[1][0] * M->m[2][2] +
2476bf215546Sopenharmony_ci        M->m[0][1] * M->m[2][0] * M->m[1][2] +
2477bf215546Sopenharmony_ci        M->m[0][2] * M->m[1][0] * M->m[2][1] -
2478bf215546Sopenharmony_ci        M->m[0][2] * M->m[2][0] * M->m[1][1];
2479bf215546Sopenharmony_ci
2480bf215546Sopenharmony_ci    det =
2481bf215546Sopenharmony_ci        M->m[0][0] * D->m[0][0] +
2482bf215546Sopenharmony_ci        M->m[1][0] * D->m[0][1] +
2483bf215546Sopenharmony_ci        M->m[2][0] * D->m[0][2] +
2484bf215546Sopenharmony_ci        M->m[3][0] * D->m[0][3];
2485bf215546Sopenharmony_ci
2486bf215546Sopenharmony_ci    if (fabsf(det) < 1e-30) {/* non inversible */
2487bf215546Sopenharmony_ci        *D = *M; /* wine tests */
2488bf215546Sopenharmony_ci        return;
2489bf215546Sopenharmony_ci    }
2490bf215546Sopenharmony_ci
2491bf215546Sopenharmony_ci    det = 1.0 / det;
2492bf215546Sopenharmony_ci
2493bf215546Sopenharmony_ci    for (i = 0; i < 4; i++)
2494bf215546Sopenharmony_ci    for (k = 0; k < 4; k++)
2495bf215546Sopenharmony_ci        D->m[i][k] *= det;
2496bf215546Sopenharmony_ci
2497bf215546Sopenharmony_ci#if defined(DEBUG) || !defined(NDEBUG)
2498bf215546Sopenharmony_ci    {
2499bf215546Sopenharmony_ci        D3DMATRIX I;
2500bf215546Sopenharmony_ci
2501bf215546Sopenharmony_ci        nine_d3d_matrix_matrix_mul(&I, D, M);
2502bf215546Sopenharmony_ci
2503bf215546Sopenharmony_ci        for (i = 0; i < 4; ++i)
2504bf215546Sopenharmony_ci        for (k = 0; k < 4; ++k)
2505bf215546Sopenharmony_ci            if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2506bf215546Sopenharmony_ci                DBG("Matrix inversion check FAILED !\n");
2507bf215546Sopenharmony_ci    }
2508bf215546Sopenharmony_ci#endif
2509bf215546Sopenharmony_ci}
2510