1bf215546Sopenharmony_ci 2bf215546Sopenharmony_ci/* FF is big and ugly so feel free to write lines as long as you like. 3bf215546Sopenharmony_ci * Aieeeeeeeee ! 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Let me make that clearer: 6bf215546Sopenharmony_ci * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!! 7bf215546Sopenharmony_ci */ 8bf215546Sopenharmony_ci 9bf215546Sopenharmony_ci#include "device9.h" 10bf215546Sopenharmony_ci#include "basetexture9.h" 11bf215546Sopenharmony_ci#include "vertexdeclaration9.h" 12bf215546Sopenharmony_ci#include "vertexshader9.h" 13bf215546Sopenharmony_ci#include "pixelshader9.h" 14bf215546Sopenharmony_ci#include "nine_ff.h" 15bf215546Sopenharmony_ci#include "nine_defines.h" 16bf215546Sopenharmony_ci#include "nine_helpers.h" 17bf215546Sopenharmony_ci#include "nine_pipe.h" 18bf215546Sopenharmony_ci#include "nine_dump.h" 19bf215546Sopenharmony_ci 20bf215546Sopenharmony_ci#include "pipe/p_context.h" 21bf215546Sopenharmony_ci#include "tgsi/tgsi_ureg.h" 22bf215546Sopenharmony_ci#include "tgsi/tgsi_dump.h" 23bf215546Sopenharmony_ci#include "util/u_box.h" 24bf215546Sopenharmony_ci#include "util/u_hash_table.h" 25bf215546Sopenharmony_ci#include "util/u_upload_mgr.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#define DBG_CHANNEL DBG_FF 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#define NINE_FF_NUM_VS_CONST 196 30bf215546Sopenharmony_ci#define NINE_FF_NUM_PS_CONST 24 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_cistruct fvec4 33bf215546Sopenharmony_ci{ 34bf215546Sopenharmony_ci float x, y, z, w; 35bf215546Sopenharmony_ci}; 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_cistruct nine_ff_vs_key 38bf215546Sopenharmony_ci{ 39bf215546Sopenharmony_ci union { 40bf215546Sopenharmony_ci struct { 41bf215546Sopenharmony_ci uint32_t position_t : 1; 42bf215546Sopenharmony_ci uint32_t lighting : 1; 43bf215546Sopenharmony_ci uint32_t darkness : 1; /* lighting enabled but no active lights */ 44bf215546Sopenharmony_ci uint32_t localviewer : 1; 45bf215546Sopenharmony_ci uint32_t vertexpointsize : 1; 46bf215546Sopenharmony_ci uint32_t pointscale : 1; 47bf215546Sopenharmony_ci uint32_t vertexblend : 3; 48bf215546Sopenharmony_ci uint32_t vertexblend_indexed : 1; 49bf215546Sopenharmony_ci uint32_t vertextween : 1; 50bf215546Sopenharmony_ci uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */ 51bf215546Sopenharmony_ci uint32_t mtl_ambient : 2; 52bf215546Sopenharmony_ci uint32_t mtl_specular : 2; 53bf215546Sopenharmony_ci uint32_t mtl_emissive : 2; 54bf215546Sopenharmony_ci uint32_t fog_mode : 2; 55bf215546Sopenharmony_ci uint32_t fog_range : 1; 56bf215546Sopenharmony_ci uint32_t color0in_one : 1; 57bf215546Sopenharmony_ci uint32_t color1in_zero : 1; 58bf215546Sopenharmony_ci uint32_t has_normal : 1; 59bf215546Sopenharmony_ci uint32_t fog : 1; 60bf215546Sopenharmony_ci uint32_t normalizenormals : 1; 61bf215546Sopenharmony_ci uint32_t ucp : 1; 62bf215546Sopenharmony_ci uint32_t pad1 : 4; 63bf215546Sopenharmony_ci uint32_t tc_dim_input: 16; /* 8 * 2 bits */ 64bf215546Sopenharmony_ci uint32_t pad2 : 16; 65bf215546Sopenharmony_ci uint32_t tc_dim_output: 24; /* 8 * 3 bits */ 66bf215546Sopenharmony_ci uint32_t pad3 : 8; 67bf215546Sopenharmony_ci uint32_t tc_gen : 24; /* 8 * 3 bits */ 68bf215546Sopenharmony_ci uint32_t pad4 : 8; 69bf215546Sopenharmony_ci uint32_t tc_idx : 24; 70bf215546Sopenharmony_ci uint32_t pad5 : 8; 71bf215546Sopenharmony_ci uint32_t passthrough; 72bf215546Sopenharmony_ci }; 73bf215546Sopenharmony_ci uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */ 74bf215546Sopenharmony_ci uint32_t value32[6]; 75bf215546Sopenharmony_ci }; 76bf215546Sopenharmony_ci}; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci/* Texture stage state: 79bf215546Sopenharmony_ci * 80bf215546Sopenharmony_ci * COLOROP D3DTOP 5 bit 81bf215546Sopenharmony_ci * ALPHAOP D3DTOP 5 bit 82bf215546Sopenharmony_ci * COLORARG0 D3DTA 3 bit 83bf215546Sopenharmony_ci * COLORARG1 D3DTA 3 bit 84bf215546Sopenharmony_ci * COLORARG2 D3DTA 3 bit 85bf215546Sopenharmony_ci * ALPHAARG0 D3DTA 3 bit 86bf215546Sopenharmony_ci * ALPHAARG1 D3DTA 3 bit 87bf215546Sopenharmony_ci * ALPHAARG2 D3DTA 3 bit 88bf215546Sopenharmony_ci * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1) 89bf215546Sopenharmony_ci * TEXCOORDINDEX 0 - 7 3 bit 90bf215546Sopenharmony_ci * =========================== 91bf215546Sopenharmony_ci * 32 bit per stage 92bf215546Sopenharmony_ci */ 93bf215546Sopenharmony_cistruct nine_ff_ps_key 94bf215546Sopenharmony_ci{ 95bf215546Sopenharmony_ci union { 96bf215546Sopenharmony_ci struct { 97bf215546Sopenharmony_ci struct { 98bf215546Sopenharmony_ci uint32_t colorop : 5; 99bf215546Sopenharmony_ci uint32_t alphaop : 5; 100bf215546Sopenharmony_ci uint32_t colorarg0 : 3; 101bf215546Sopenharmony_ci uint32_t colorarg1 : 3; 102bf215546Sopenharmony_ci uint32_t colorarg2 : 3; 103bf215546Sopenharmony_ci uint32_t alphaarg0 : 3; 104bf215546Sopenharmony_ci uint32_t alphaarg1 : 3; 105bf215546Sopenharmony_ci uint32_t alphaarg2 : 3; 106bf215546Sopenharmony_ci uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */ 107bf215546Sopenharmony_ci uint32_t textarget : 2; /* 1D/2D/3D/CUBE */ 108bf215546Sopenharmony_ci uint32_t pad : 1; 109bf215546Sopenharmony_ci /* that's 32 bit exactly */ 110bf215546Sopenharmony_ci } ts[8]; 111bf215546Sopenharmony_ci uint32_t projected : 16; 112bf215546Sopenharmony_ci uint32_t fog : 1; /* for vFog coming from VS */ 113bf215546Sopenharmony_ci uint32_t fog_mode : 2; 114bf215546Sopenharmony_ci uint32_t fog_source : 1; /* 0: Z, 1: W */ 115bf215546Sopenharmony_ci uint32_t specular : 1; 116bf215546Sopenharmony_ci uint32_t pad1 : 11; /* 9 32-bit words with this */ 117bf215546Sopenharmony_ci uint8_t colorarg_b4[3]; 118bf215546Sopenharmony_ci uint8_t colorarg_b5[3]; 119bf215546Sopenharmony_ci uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */ 120bf215546Sopenharmony_ci uint8_t pad2[3]; 121bf215546Sopenharmony_ci }; 122bf215546Sopenharmony_ci uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */ 123bf215546Sopenharmony_ci uint32_t value32[12]; 124bf215546Sopenharmony_ci }; 125bf215546Sopenharmony_ci}; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_cistatic uint32_t nine_ff_vs_key_hash(const void *key) 128bf215546Sopenharmony_ci{ 129bf215546Sopenharmony_ci const struct nine_ff_vs_key *vs = key; 130bf215546Sopenharmony_ci unsigned i; 131bf215546Sopenharmony_ci uint32_t hash = vs->value32[0]; 132bf215546Sopenharmony_ci for (i = 1; i < ARRAY_SIZE(vs->value32); ++i) 133bf215546Sopenharmony_ci hash ^= vs->value32[i]; 134bf215546Sopenharmony_ci return hash; 135bf215546Sopenharmony_ci} 136bf215546Sopenharmony_cistatic bool nine_ff_vs_key_comp(const void *key1, const void *key2) 137bf215546Sopenharmony_ci{ 138bf215546Sopenharmony_ci struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1; 139bf215546Sopenharmony_ci struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0; 142bf215546Sopenharmony_ci} 143bf215546Sopenharmony_cistatic uint32_t nine_ff_ps_key_hash(const void *key) 144bf215546Sopenharmony_ci{ 145bf215546Sopenharmony_ci const struct nine_ff_ps_key *ps = key; 146bf215546Sopenharmony_ci unsigned i; 147bf215546Sopenharmony_ci uint32_t hash = ps->value32[0]; 148bf215546Sopenharmony_ci for (i = 1; i < ARRAY_SIZE(ps->value32); ++i) 149bf215546Sopenharmony_ci hash ^= ps->value32[i]; 150bf215546Sopenharmony_ci return hash; 151bf215546Sopenharmony_ci} 152bf215546Sopenharmony_cistatic bool nine_ff_ps_key_comp(const void *key1, const void *key2) 153bf215546Sopenharmony_ci{ 154bf215546Sopenharmony_ci struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1; 155bf215546Sopenharmony_ci struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2; 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0; 158bf215546Sopenharmony_ci} 159bf215546Sopenharmony_cistatic uint32_t nine_ff_fvf_key_hash(const void *key) 160bf215546Sopenharmony_ci{ 161bf215546Sopenharmony_ci return *(DWORD *)key; 162bf215546Sopenharmony_ci} 163bf215546Sopenharmony_cistatic bool nine_ff_fvf_key_comp(const void *key1, const void *key2) 164bf215546Sopenharmony_ci{ 165bf215546Sopenharmony_ci return *(DWORD *)key1 == *(DWORD *)key2; 166bf215546Sopenharmony_ci} 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_cistatic void nine_ff_prune_vs(struct NineDevice9 *); 169bf215546Sopenharmony_cistatic void nine_ff_prune_ps(struct NineDevice9 *); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_cistatic void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override) 172bf215546Sopenharmony_ci{ 173bf215546Sopenharmony_ci if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) { 174bf215546Sopenharmony_ci const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL); 175bf215546Sopenharmony_ci tgsi_dump(toks, 0); 176bf215546Sopenharmony_ci ureg_free_tokens(toks); 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci#define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X) 181bf215546Sopenharmony_ci#define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y) 182bf215546Sopenharmony_ci#define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z) 183bf215546Sopenharmony_ci#define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W) 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci#define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X) 186bf215546Sopenharmony_ci#define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y) 187bf215546Sopenharmony_ci#define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z) 188bf215546Sopenharmony_ci#define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W) 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci#define _XYZW(r) (r) 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci/* AL should contain base address of lights table. */ 193bf215546Sopenharmony_ci#define LIGHT_CONST(i) \ 194bf215546Sopenharmony_ci ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL)) 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci#define MATERIAL_CONST(i) \ 197bf215546Sopenharmony_ci ureg_DECL_constant(ureg, 19 + (i)) 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci#define _CONST(n) ureg_DECL_constant(ureg, n) 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci/* VS FF constants layout: 202bf215546Sopenharmony_ci * 203bf215546Sopenharmony_ci * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION 204bf215546Sopenharmony_ci * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW 205bf215546Sopenharmony_ci * CONST[ 8..11] D3DTS_PROJECTION 206bf215546Sopenharmony_ci * CONST[12..15] D3DTS_VIEW^(-1) 207bf215546Sopenharmony_ci * CONST[16..18] Normal matrix 208bf215546Sopenharmony_ci * 209bf215546Sopenharmony_ci * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient 210bf215546Sopenharmony_ci * CONST[20] MATERIAL.Diffuse 211bf215546Sopenharmony_ci * CONST[21] MATERIAL.Ambient 212bf215546Sopenharmony_ci * CONST[22] MATERIAL.Specular 213bf215546Sopenharmony_ci * CONST[23].x___ MATERIAL.Power 214bf215546Sopenharmony_ci * CONST[24] MATERIAL.Emissive 215bf215546Sopenharmony_ci * CONST[25] RS.Ambient 216bf215546Sopenharmony_ci * 217bf215546Sopenharmony_ci * CONST[26].x___ RS.PointSizeMin 218bf215546Sopenharmony_ci * CONST[26]._y__ RS.PointSizeMax 219bf215546Sopenharmony_ci * CONST[26].__z_ RS.PointSize 220bf215546Sopenharmony_ci * CONST[26].___w RS.PointScaleA 221bf215546Sopenharmony_ci * CONST[27].x___ RS.PointScaleB 222bf215546Sopenharmony_ci * CONST[27]._y__ RS.PointScaleC 223bf215546Sopenharmony_ci * 224bf215546Sopenharmony_ci * CONST[28].x___ RS.FogEnd 225bf215546Sopenharmony_ci * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 226bf215546Sopenharmony_ci * CONST[28].__z_ RS.FogDensity 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci * CONST[30].x___ TWEENFACTOR 229bf215546Sopenharmony_ci * 230bf215546Sopenharmony_ci * CONST[32].x___ LIGHT[0].Type 231bf215546Sopenharmony_ci * CONST[32]._yzw LIGHT[0].Attenuation0,1,2 232bf215546Sopenharmony_ci * CONST[33] LIGHT[0].Diffuse 233bf215546Sopenharmony_ci * CONST[34] LIGHT[0].Specular 234bf215546Sopenharmony_ci * CONST[35] LIGHT[0].Ambient 235bf215546Sopenharmony_ci * CONST[36].xyz_ LIGHT[0].Position 236bf215546Sopenharmony_ci * CONST[36].___w LIGHT[0].Range 237bf215546Sopenharmony_ci * CONST[37].xyz_ LIGHT[0].Direction 238bf215546Sopenharmony_ci * CONST[37].___w LIGHT[0].Falloff 239bf215546Sopenharmony_ci * CONST[38].x___ cos(LIGHT[0].Theta / 2) 240bf215546Sopenharmony_ci * CONST[38]._y__ cos(LIGHT[0].Phi / 2) 241bf215546Sopenharmony_ci * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2)) 242bf215546Sopenharmony_ci * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights) 243bf215546Sopenharmony_ci * CONST[39].___w 1 if this is the last active light, 0 if not 244bf215546Sopenharmony_ci * CONST[40] LIGHT[1] 245bf215546Sopenharmony_ci * CONST[48] LIGHT[2] 246bf215546Sopenharmony_ci * CONST[56] LIGHT[3] 247bf215546Sopenharmony_ci * CONST[64] LIGHT[4] 248bf215546Sopenharmony_ci * CONST[72] LIGHT[5] 249bf215546Sopenharmony_ci * CONST[80] LIGHT[6] 250bf215546Sopenharmony_ci * CONST[88] LIGHT[7] 251bf215546Sopenharmony_ci * NOTE: no lighting code is generated if there are no active lights 252bf215546Sopenharmony_ci * 253bf215546Sopenharmony_ci * CONST[100].x___ Viewport 2/width 254bf215546Sopenharmony_ci * CONST[100]._y__ Viewport 2/height 255bf215546Sopenharmony_ci * CONST[100].__z_ Viewport 1/(zmax - zmin) 256bf215546Sopenharmony_ci * CONST[100].___w Viewport width 257bf215546Sopenharmony_ci * CONST[101].x___ Viewport x0 258bf215546Sopenharmony_ci * CONST[101]._y__ Viewport y0 259bf215546Sopenharmony_ci * CONST[101].__z_ Viewport z0 260bf215546Sopenharmony_ci * 261bf215546Sopenharmony_ci * CONST[128..131] D3DTS_TEXTURE0 262bf215546Sopenharmony_ci * CONST[132..135] D3DTS_TEXTURE1 263bf215546Sopenharmony_ci * CONST[136..139] D3DTS_TEXTURE2 264bf215546Sopenharmony_ci * CONST[140..143] D3DTS_TEXTURE3 265bf215546Sopenharmony_ci * CONST[144..147] D3DTS_TEXTURE4 266bf215546Sopenharmony_ci * CONST[148..151] D3DTS_TEXTURE5 267bf215546Sopenharmony_ci * CONST[152..155] D3DTS_TEXTURE6 268bf215546Sopenharmony_ci * CONST[156..159] D3DTS_TEXTURE7 269bf215546Sopenharmony_ci * 270bf215546Sopenharmony_ci * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW 271bf215546Sopenharmony_ci * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW 272bf215546Sopenharmony_ci * ... 273bf215546Sopenharmony_ci * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW 274bf215546Sopenharmony_ci */ 275bf215546Sopenharmony_cistruct vs_build_ctx 276bf215546Sopenharmony_ci{ 277bf215546Sopenharmony_ci struct ureg_program *ureg; 278bf215546Sopenharmony_ci const struct nine_ff_vs_key *key; 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci uint16_t input[PIPE_MAX_ATTRIBS]; 281bf215546Sopenharmony_ci unsigned num_inputs; 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_ci struct ureg_src aVtx; 284bf215546Sopenharmony_ci struct ureg_src aNrm; 285bf215546Sopenharmony_ci struct ureg_src aCol[2]; 286bf215546Sopenharmony_ci struct ureg_src aTex[8]; 287bf215546Sopenharmony_ci struct ureg_src aPsz; 288bf215546Sopenharmony_ci struct ureg_src aInd; 289bf215546Sopenharmony_ci struct ureg_src aWgt; 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci struct ureg_src aVtx1; /* tweening */ 292bf215546Sopenharmony_ci struct ureg_src aNrm1; 293bf215546Sopenharmony_ci 294bf215546Sopenharmony_ci struct ureg_src mtlA; 295bf215546Sopenharmony_ci struct ureg_src mtlD; 296bf215546Sopenharmony_ci struct ureg_src mtlS; 297bf215546Sopenharmony_ci struct ureg_src mtlE; 298bf215546Sopenharmony_ci}; 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_cistatic inline unsigned 301bf215546Sopenharmony_ciget_texcoord_sn(struct pipe_screen *screen) 302bf215546Sopenharmony_ci{ 303bf215546Sopenharmony_ci if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD)) 304bf215546Sopenharmony_ci return TGSI_SEMANTIC_TEXCOORD; 305bf215546Sopenharmony_ci return TGSI_SEMANTIC_GENERIC; 306bf215546Sopenharmony_ci} 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_cistatic inline struct ureg_src 309bf215546Sopenharmony_cibuild_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl) 310bf215546Sopenharmony_ci{ 311bf215546Sopenharmony_ci const unsigned i = vs->num_inputs++; 312bf215546Sopenharmony_ci assert(i < PIPE_MAX_ATTRIBS); 313bf215546Sopenharmony_ci vs->input[i] = ndecl; 314bf215546Sopenharmony_ci return ureg_DECL_vs_input(vs->ureg, i); 315bf215546Sopenharmony_ci} 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci/* NOTE: dst may alias src */ 318bf215546Sopenharmony_cistatic inline void 319bf215546Sopenharmony_ciureg_normalize3(struct ureg_program *ureg, 320bf215546Sopenharmony_ci struct ureg_dst dst, struct ureg_src src) 321bf215546Sopenharmony_ci{ 322bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 323bf215546Sopenharmony_ci struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci ureg_DP3(ureg, tmp_x, src, src); 326bf215546Sopenharmony_ci ureg_RSQ(ureg, tmp_x, _X(tmp)); 327bf215546Sopenharmony_ci ureg_MUL(ureg, dst, src, _X(tmp)); 328bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 329bf215546Sopenharmony_ci} 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_cistatic void * 332bf215546Sopenharmony_cinine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) 333bf215546Sopenharmony_ci{ 334bf215546Sopenharmony_ci const struct nine_ff_vs_key *key = vs->key; 335bf215546Sopenharmony_ci struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); 336bf215546Sopenharmony_ci struct ureg_dst oPos, oCol[2], oPsz, oFog; 337bf215546Sopenharmony_ci struct ureg_dst AR; 338bf215546Sopenharmony_ci unsigned i, c; 339bf215546Sopenharmony_ci unsigned label[32], l = 0; 340bf215546Sopenharmony_ci boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); 341bf215546Sopenharmony_ci boolean has_aNrm; 342bf215546Sopenharmony_ci boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp; 343bf215546Sopenharmony_ci const unsigned texcoord_sn = get_texcoord_sn(device->screen); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci vs->ureg = ureg; 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci /* Check which inputs we should transform. */ 348bf215546Sopenharmony_ci for (i = 0; i < 8 * 3; i += 3) { 349bf215546Sopenharmony_ci switch ((key->tc_gen >> i) & 0x7) { 350bf215546Sopenharmony_ci case NINED3DTSS_TCI_CAMERASPACENORMAL: 351bf215546Sopenharmony_ci need_aNrm = TRUE; 352bf215546Sopenharmony_ci break; 353bf215546Sopenharmony_ci case NINED3DTSS_TCI_CAMERASPACEPOSITION: 354bf215546Sopenharmony_ci need_aVtx = TRUE; 355bf215546Sopenharmony_ci break; 356bf215546Sopenharmony_ci case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 357bf215546Sopenharmony_ci need_aVtx = need_aNrm = TRUE; 358bf215546Sopenharmony_ci break; 359bf215546Sopenharmony_ci case NINED3DTSS_TCI_SPHEREMAP: 360bf215546Sopenharmony_ci need_aVtx = need_aNrm = TRUE; 361bf215546Sopenharmony_ci break; 362bf215546Sopenharmony_ci default: 363bf215546Sopenharmony_ci break; 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci } 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci has_aNrm = need_aNrm && key->has_normal; 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci /* Declare and record used inputs (needed for linkage with vertex format): 370bf215546Sopenharmony_ci * (texture coordinates handled later) 371bf215546Sopenharmony_ci */ 372bf215546Sopenharmony_ci vs->aVtx = build_vs_add_input(vs, 373bf215546Sopenharmony_ci key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION); 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci vs->aNrm = ureg_imm1f(ureg, 0.0f); 376bf215546Sopenharmony_ci if (has_aNrm) 377bf215546Sopenharmony_ci vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL); 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci vs->aCol[0] = ureg_imm1f(ureg, 1.0f); 380bf215546Sopenharmony_ci vs->aCol[1] = ureg_imm1f(ureg, 0.0f); 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci if (key->lighting || key->darkness) { 383bf215546Sopenharmony_ci const unsigned mask = key->mtl_diffuse | key->mtl_specular | 384bf215546Sopenharmony_ci key->mtl_ambient | key->mtl_emissive; 385bf215546Sopenharmony_ci if ((mask & 0x1) && !key->color0in_one) 386bf215546Sopenharmony_ci vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 387bf215546Sopenharmony_ci if ((mask & 0x2) && !key->color1in_zero) 388bf215546Sopenharmony_ci vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci vs->mtlD = MATERIAL_CONST(1); 391bf215546Sopenharmony_ci vs->mtlA = MATERIAL_CONST(2); 392bf215546Sopenharmony_ci vs->mtlS = MATERIAL_CONST(3); 393bf215546Sopenharmony_ci vs->mtlE = MATERIAL_CONST(5); 394bf215546Sopenharmony_ci if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else 395bf215546Sopenharmony_ci if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1]; 396bf215546Sopenharmony_ci if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else 397bf215546Sopenharmony_ci if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1]; 398bf215546Sopenharmony_ci if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else 399bf215546Sopenharmony_ci if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1]; 400bf215546Sopenharmony_ci if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else 401bf215546Sopenharmony_ci if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1]; 402bf215546Sopenharmony_ci } else { 403bf215546Sopenharmony_ci if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 404bf215546Sopenharmony_ci if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 405bf215546Sopenharmony_ci } 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci if (key->vertexpointsize) 408bf215546Sopenharmony_ci vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE); 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) 411bf215546Sopenharmony_ci vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES); 412bf215546Sopenharmony_ci if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) 413bf215546Sopenharmony_ci vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT); 414bf215546Sopenharmony_ci if (key->vertextween) { 415bf215546Sopenharmony_ci vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1)); 416bf215546Sopenharmony_ci vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1)); 417bf215546Sopenharmony_ci } 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci /* Declare outputs: 420bf215546Sopenharmony_ci */ 421bf215546Sopenharmony_ci oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */ 422bf215546Sopenharmony_ci oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0)); 423bf215546Sopenharmony_ci oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); 424bf215546Sopenharmony_ci if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 425bf215546Sopenharmony_ci oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16); 426bf215546Sopenharmony_ci oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); 427bf215546Sopenharmony_ci } 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci if (key->vertexpointsize || key->pointscale) { 430bf215546Sopenharmony_ci oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, 431bf215546Sopenharmony_ci TGSI_WRITEMASK_X, 0, 1); 432bf215546Sopenharmony_ci oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci if (key->lighting || key->vertexblend) 436bf215546Sopenharmony_ci AR = ureg_DECL_address(ureg); 437bf215546Sopenharmony_ci 438bf215546Sopenharmony_ci /* === Vertex transformation / vertex blending: 439bf215546Sopenharmony_ci */ 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci if (key->position_t) { 442bf215546Sopenharmony_ci if (device->driver_caps.window_space_position_support) { 443bf215546Sopenharmony_ci ureg_MOV(ureg, oPos, vs->aVtx); 444bf215546Sopenharmony_ci } else { 445bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 446bf215546Sopenharmony_ci /* vs->aVtx contains the coordinates buffer wise. 447bf215546Sopenharmony_ci * later in the pipeline, clipping, viewport and division 448bf215546Sopenharmony_ci * by w (rhw = 1/w) are going to be applied, so do the reverse 449bf215546Sopenharmony_ci * of these transformations (except clipping) to have the good 450bf215546Sopenharmony_ci * position at the end.*/ 451bf215546Sopenharmony_ci ureg_MOV(ureg, tmp, vs->aVtx); 452bf215546Sopenharmony_ci /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */ 453bf215546Sopenharmony_ci ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101))); 454bf215546Sopenharmony_ci ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100)); 455bf215546Sopenharmony_ci ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 456bf215546Sopenharmony_ci /* Y needs to be reversed */ 457bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp))); 458bf215546Sopenharmony_ci /* Replace w by 1 if it equals to 0 */ 459bf215546Sopenharmony_ci ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W))), 460bf215546Sopenharmony_ci ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W), ureg_imm1f(ureg, 1.0f)); 461bf215546Sopenharmony_ci /* inverse rhw */ 462bf215546Sopenharmony_ci ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp)); 463bf215546Sopenharmony_ci /* multiply X, Y, Z by w */ 464bf215546Sopenharmony_ci ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp)); 465bf215546Sopenharmony_ci ureg_MOV(ureg, oPos, ureg_src(tmp)); 466bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 467bf215546Sopenharmony_ci } 468bf215546Sopenharmony_ci } else if (key->vertexblend) { 469bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 470bf215546Sopenharmony_ci struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 471bf215546Sopenharmony_ci struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 472bf215546Sopenharmony_ci struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 473bf215546Sopenharmony_ci struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg); 474bf215546Sopenharmony_ci struct ureg_src cWM[4]; 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci for (i = 160; i <= 195; ++i) 477bf215546Sopenharmony_ci ureg_DECL_constant(ureg, i); 478bf215546Sopenharmony_ci 479bf215546Sopenharmony_ci /* translate world matrix index to constant file index */ 480bf215546Sopenharmony_ci if (key->vertexblend_indexed) { 481bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f)); 482bf215546Sopenharmony_ci ureg_ARL(ureg, AR, ureg_src(tmp)); 483bf215546Sopenharmony_ci } 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 486bf215546Sopenharmony_ci ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 487bf215546Sopenharmony_ci ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f)); 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci for (i = 0; i < key->vertexblend; ++i) { 490bf215546Sopenharmony_ci for (c = 0; c < 4; ++c) { 491bf215546Sopenharmony_ci cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0); 492bf215546Sopenharmony_ci if (key->vertexblend_indexed) 493bf215546Sopenharmony_ci cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i)); 494bf215546Sopenharmony_ci } 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci /* multiply by WORLD(index) */ 497bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]); 498bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp)); 499bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp)); 500bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp)); 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_ci if (has_aNrm) { 503bf215546Sopenharmony_ci /* Note: the spec says the transpose of the inverse of the 504bf215546Sopenharmony_ci * WorldView matrices should be used, but all tests show 505bf215546Sopenharmony_ci * otherwise. 506bf215546Sopenharmony_ci * Only case unknown: D3DVBF_0WEIGHTS */ 507bf215546Sopenharmony_ci ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]); 508bf215546Sopenharmony_ci ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2)); 509bf215546Sopenharmony_ci ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2)); 510bf215546Sopenharmony_ci } 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci if (i < (key->vertexblend - 1)) { 513bf215546Sopenharmony_ci /* accumulate weighted position value */ 514bf215546Sopenharmony_ci ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst)); 515bf215546Sopenharmony_ci if (has_aNrm) 516bf215546Sopenharmony_ci ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst)); 517bf215546Sopenharmony_ci /* subtract weighted position value for last value */ 518bf215546Sopenharmony_ci ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i))); 519bf215546Sopenharmony_ci } 520bf215546Sopenharmony_ci } 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci /* the last weighted position is always 1 - sum_of_previous_weights */ 523bf215546Sopenharmony_ci ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst)); 524bf215546Sopenharmony_ci if (has_aNrm) 525bf215546Sopenharmony_ci ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst)); 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci /* multiply by VIEW_PROJ */ 528bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8)); 529bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp)); 530bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp)); 531bf215546Sopenharmony_ci ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp)); 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci if (need_aVtx) 534bf215546Sopenharmony_ci vs->aVtx = ureg_src(aVtx_dst); 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 537bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp2); 538bf215546Sopenharmony_ci ureg_release_temporary(ureg, sum_blendweights); 539bf215546Sopenharmony_ci if (!need_aVtx) 540bf215546Sopenharmony_ci ureg_release_temporary(ureg, aVtx_dst); 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci if (has_aNrm) { 543bf215546Sopenharmony_ci if (key->normalizenormals) 544bf215546Sopenharmony_ci ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 545bf215546Sopenharmony_ci vs->aNrm = ureg_src(aNrm_dst); 546bf215546Sopenharmony_ci } else 547bf215546Sopenharmony_ci ureg_release_temporary(ureg, aNrm_dst); 548bf215546Sopenharmony_ci } else { 549bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci if (key->vertextween) { 552bf215546Sopenharmony_ci struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 553bf215546Sopenharmony_ci ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx); 554bf215546Sopenharmony_ci vs->aVtx = ureg_src(aVtx_dst); 555bf215546Sopenharmony_ci if (has_aNrm) { 556bf215546Sopenharmony_ci struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 557bf215546Sopenharmony_ci ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm); 558bf215546Sopenharmony_ci vs->aNrm = ureg_src(aNrm_dst); 559bf215546Sopenharmony_ci } 560bf215546Sopenharmony_ci } 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci /* position = vertex * WORLD_VIEW_PROJ */ 563bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0)); 564bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp)); 565bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp)); 566bf215546Sopenharmony_ci ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp)); 567bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci if (need_aVtx) { 570bf215546Sopenharmony_ci struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 571bf215546Sopenharmony_ci ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4)); 572bf215546Sopenharmony_ci ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst)); 573bf215546Sopenharmony_ci ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst)); 574bf215546Sopenharmony_ci ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst)); 575bf215546Sopenharmony_ci vs->aVtx = ureg_src(aVtx_dst); 576bf215546Sopenharmony_ci } 577bf215546Sopenharmony_ci if (has_aNrm) { 578bf215546Sopenharmony_ci struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 579bf215546Sopenharmony_ci ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16)); 580bf215546Sopenharmony_ci ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst)); 581bf215546Sopenharmony_ci ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst)); 582bf215546Sopenharmony_ci if (key->normalizenormals) 583bf215546Sopenharmony_ci ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 584bf215546Sopenharmony_ci vs->aNrm = ureg_src(aNrm_dst); 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci } 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci /* === Process point size: 589bf215546Sopenharmony_ci */ 590bf215546Sopenharmony_ci if (key->vertexpointsize || key->pointscale) { 591bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 592bf215546Sopenharmony_ci struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 593bf215546Sopenharmony_ci struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 594bf215546Sopenharmony_ci struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 595bf215546Sopenharmony_ci if (key->vertexpointsize) { 596bf215546Sopenharmony_ci struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 597bf215546Sopenharmony_ci ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1)); 598bf215546Sopenharmony_ci ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1)); 599bf215546Sopenharmony_ci } else { 600bf215546Sopenharmony_ci struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 601bf215546Sopenharmony_ci ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1)); 602bf215546Sopenharmony_ci } 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci if (key->pointscale) { 605bf215546Sopenharmony_ci struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 606bf215546Sopenharmony_ci struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 609bf215546Sopenharmony_ci ureg_RSQ(ureg, tmp_y, _X(tmp)); 610bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp)); 611bf215546Sopenharmony_ci ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f)); 612bf215546Sopenharmony_ci ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2)); 613bf215546Sopenharmony_ci ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1)); 614bf215546Sopenharmony_ci ureg_RSQ(ureg, tmp_x, _X(tmp)); 615bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp)); 616bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100))); 617bf215546Sopenharmony_ci ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1)); 618bf215546Sopenharmony_ci ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1)); 619bf215546Sopenharmony_ci } 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci ureg_MOV(ureg, oPsz, _Z(tmp)); 622bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 623bf215546Sopenharmony_ci } 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci for (i = 0; i < 8; ++i) { 626bf215546Sopenharmony_ci struct ureg_dst tmp, tmp_x, tmp2; 627bf215546Sopenharmony_ci struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed; 628bf215546Sopenharmony_ci unsigned c, writemask; 629bf215546Sopenharmony_ci const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7; 630bf215546Sopenharmony_ci const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7; 631bf215546Sopenharmony_ci unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3); 632bf215546Sopenharmony_ci const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7; 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci /* No texture output of index s */ 635bf215546Sopenharmony_ci if (tci == NINED3DTSS_TCI_DISABLE) 636bf215546Sopenharmony_ci continue; 637bf215546Sopenharmony_ci oTex = ureg_DECL_output(ureg, texcoord_sn, i); 638bf215546Sopenharmony_ci tmp = ureg_DECL_temporary(ureg); 639bf215546Sopenharmony_ci tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 640bf215546Sopenharmony_ci input_coord = ureg_DECL_temporary(ureg); 641bf215546Sopenharmony_ci transformed = ureg_DECL_temporary(ureg); 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci /* Get the coordinate */ 644bf215546Sopenharmony_ci switch (tci) { 645bf215546Sopenharmony_ci case NINED3DTSS_TCI_PASSTHRU: 646bf215546Sopenharmony_ci /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx * 647bf215546Sopenharmony_ci * Else the idx is used only to determine wrapping mode. */ 648bf215546Sopenharmony_ci vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); 649bf215546Sopenharmony_ci ureg_MOV(ureg, input_coord, vs->aTex[idx]); 650bf215546Sopenharmony_ci break; 651bf215546Sopenharmony_ci case NINED3DTSS_TCI_CAMERASPACENORMAL: 652bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm); 653bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 654bf215546Sopenharmony_ci dim_input = 4; 655bf215546Sopenharmony_ci break; 656bf215546Sopenharmony_ci case NINED3DTSS_TCI_CAMERASPACEPOSITION: 657bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx); 658bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 659bf215546Sopenharmony_ci dim_input = 4; 660bf215546Sopenharmony_ci break; 661bf215546Sopenharmony_ci case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 662bf215546Sopenharmony_ci tmp.WriteMask = TGSI_WRITEMASK_XYZ; 663bf215546Sopenharmony_ci aVtx_normed = ureg_DECL_temporary(ureg); 664bf215546Sopenharmony_ci ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 665bf215546Sopenharmony_ci ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 666bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 667bf215546Sopenharmony_ci ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 668bf215546Sopenharmony_ci ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 669bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 670bf215546Sopenharmony_ci ureg_release_temporary(ureg, aVtx_normed); 671bf215546Sopenharmony_ci dim_input = 4; 672bf215546Sopenharmony_ci tmp.WriteMask = TGSI_WRITEMASK_XYZW; 673bf215546Sopenharmony_ci break; 674bf215546Sopenharmony_ci case NINED3DTSS_TCI_SPHEREMAP: 675bf215546Sopenharmony_ci /* Implement the formula of GL_SPHERE_MAP */ 676bf215546Sopenharmony_ci tmp.WriteMask = TGSI_WRITEMASK_XYZ; 677bf215546Sopenharmony_ci aVtx_normed = ureg_DECL_temporary(ureg); 678bf215546Sopenharmony_ci tmp2 = ureg_DECL_temporary(ureg); 679bf215546Sopenharmony_ci ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 680bf215546Sopenharmony_ci ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 681bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 682bf215546Sopenharmony_ci ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 683bf215546Sopenharmony_ci ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 684bf215546Sopenharmony_ci /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */ 685bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp)); 686bf215546Sopenharmony_ci ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2)); 687bf215546Sopenharmony_ci ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2)); 688bf215546Sopenharmony_ci ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2)); 689bf215546Sopenharmony_ci ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f)); 690bf215546Sopenharmony_ci /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2) 691bf215546Sopenharmony_ci * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */ 692bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2)); 693bf215546Sopenharmony_ci ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f)); 694bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 695bf215546Sopenharmony_ci ureg_release_temporary(ureg, aVtx_normed); 696bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp2); 697bf215546Sopenharmony_ci dim_input = 4; 698bf215546Sopenharmony_ci tmp.WriteMask = TGSI_WRITEMASK_XYZW; 699bf215546Sopenharmony_ci break; 700bf215546Sopenharmony_ci default: 701bf215546Sopenharmony_ci assert(0); 702bf215546Sopenharmony_ci break; 703bf215546Sopenharmony_ci } 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci /* Apply the transformation */ 706bf215546Sopenharmony_ci /* dim_output == 0 => do not transform the components. 707bf215546Sopenharmony_ci * XYZRHW also disables transformation */ 708bf215546Sopenharmony_ci if (!dim_output || key->position_t) { 709bf215546Sopenharmony_ci ureg_release_temporary(ureg, transformed); 710bf215546Sopenharmony_ci transformed = input_coord; 711bf215546Sopenharmony_ci writemask = TGSI_WRITEMASK_XYZW; 712bf215546Sopenharmony_ci } else { 713bf215546Sopenharmony_ci for (c = 0; c < dim_output; c++) { 714bf215546Sopenharmony_ci t = ureg_writemask(transformed, 1 << c); 715bf215546Sopenharmony_ci switch (dim_input) { 716bf215546Sopenharmony_ci /* dim_input = 1 2 3: -> we add trailing 1 to input*/ 717bf215546Sopenharmony_ci case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c))); 718bf215546Sopenharmony_ci break; 719bf215546Sopenharmony_ci case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 720bf215546Sopenharmony_ci ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c))); 721bf215546Sopenharmony_ci break; 722bf215546Sopenharmony_ci case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 723bf215546Sopenharmony_ci ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c))); 724bf215546Sopenharmony_ci break; 725bf215546Sopenharmony_ci case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break; 726bf215546Sopenharmony_ci default: 727bf215546Sopenharmony_ci assert(0); 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci } 730bf215546Sopenharmony_ci writemask = (1 << dim_output) - 1; 731bf215546Sopenharmony_ci ureg_release_temporary(ureg, input_coord); 732bf215546Sopenharmony_ci } 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed)); 735bf215546Sopenharmony_ci ureg_release_temporary(ureg, transformed); 736bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 737bf215546Sopenharmony_ci } 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci /* === Lighting: 740bf215546Sopenharmony_ci * 741bf215546Sopenharmony_ci * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation. 742bf215546Sopenharmony_ci * POINT: Finite distance to scene, divergent rays, isotropic, attenuation. 743bf215546Sopenharmony_ci * SPOT: Finite distance, divergent rays, angular dependence, attenuation. 744bf215546Sopenharmony_ci * 745bf215546Sopenharmony_ci * vec3 normal = normalize(in.Normal * NormalMatrix); 746bf215546Sopenharmony_ci * vec3 hitDir = light.direction; 747bf215546Sopenharmony_ci * float atten = 1.0; 748bf215546Sopenharmony_ci * 749bf215546Sopenharmony_ci * if (light.type != DIRECTIONAL) 750bf215546Sopenharmony_ci * { 751bf215546Sopenharmony_ci * vec3 hitVec = light.position - eyeVertex; 752bf215546Sopenharmony_ci * float d = length(hitVec); 753bf215546Sopenharmony_ci * hitDir = hitVec / d; 754bf215546Sopenharmony_ci * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0); 755bf215546Sopenharmony_ci * } 756bf215546Sopenharmony_ci * 757bf215546Sopenharmony_ci * if (light.type == SPOTLIGHT) 758bf215546Sopenharmony_ci * { 759bf215546Sopenharmony_ci * float rho = dp3(-hitVec, light.direction); 760bf215546Sopenharmony_ci * if (rho < cos(light.phi / 2)) 761bf215546Sopenharmony_ci * atten = 0; 762bf215546Sopenharmony_ci * if (rho < cos(light.theta / 2)) 763bf215546Sopenharmony_ci * atten *= pow(some_func(rho), light.falloff); 764bf215546Sopenharmony_ci * } 765bf215546Sopenharmony_ci * 766bf215546Sopenharmony_ci * float nDotHit = dp3_sat(normal, hitVec); 767bf215546Sopenharmony_ci * float powFact = 0.0; 768bf215546Sopenharmony_ci * 769bf215546Sopenharmony_ci * if (nDotHit > 0.0) 770bf215546Sopenharmony_ci * { 771bf215546Sopenharmony_ci * vec3 midVec = normalize(hitDir + eye); 772bf215546Sopenharmony_ci * float nDotMid = dp3_sat(normal, midVec); 773bf215546Sopenharmony_ci * pFact = pow(nDotMid, material.power); 774bf215546Sopenharmony_ci * } 775bf215546Sopenharmony_ci * 776bf215546Sopenharmony_ci * ambient += light.ambient * atten; 777bf215546Sopenharmony_ci * diffuse += light.diffuse * atten * nDotHit; 778bf215546Sopenharmony_ci * specular += light.specular * atten * powFact; 779bf215546Sopenharmony_ci */ 780bf215546Sopenharmony_ci if (key->lighting) { 781bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 782bf215546Sopenharmony_ci struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 783bf215546Sopenharmony_ci struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 784bf215546Sopenharmony_ci struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 785bf215546Sopenharmony_ci struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 786bf215546Sopenharmony_ci struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 787bf215546Sopenharmony_ci struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X); 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci /* Light.*.Alpha is not used. */ 794bf215546Sopenharmony_ci struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 795bf215546Sopenharmony_ci struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 796bf215546Sopenharmony_ci struct ureg_dst rS = ureg_DECL_temporary(ureg); 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4)); 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci struct ureg_src cLKind = _XXXX(LIGHT_CONST(0)); 801bf215546Sopenharmony_ci struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0)); 802bf215546Sopenharmony_ci struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0)); 803bf215546Sopenharmony_ci struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0)); 804bf215546Sopenharmony_ci struct ureg_src cLColD = _XYZW(LIGHT_CONST(1)); 805bf215546Sopenharmony_ci struct ureg_src cLColS = _XYZW(LIGHT_CONST(2)); 806bf215546Sopenharmony_ci struct ureg_src cLColA = _XYZW(LIGHT_CONST(3)); 807bf215546Sopenharmony_ci struct ureg_src cLPos = _XYZW(LIGHT_CONST(4)); 808bf215546Sopenharmony_ci struct ureg_src cLRng = _WWWW(LIGHT_CONST(4)); 809bf215546Sopenharmony_ci struct ureg_src cLDir = _XYZW(LIGHT_CONST(5)); 810bf215546Sopenharmony_ci struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5)); 811bf215546Sopenharmony_ci struct ureg_src cLTht = _XXXX(LIGHT_CONST(6)); 812bf215546Sopenharmony_ci struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6)); 813bf215546Sopenharmony_ci struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6)); 814bf215546Sopenharmony_ci struct ureg_src cLLast = _WWWW(LIGHT_CONST(7)); 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci const unsigned loop_label = l++; 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci /* Declare all light constants to allow indirect adressing */ 819bf215546Sopenharmony_ci for (i = 32; i < 96; i++) 820bf215546Sopenharmony_ci ureg_DECL_constant(ureg, i); 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */ 823bf215546Sopenharmony_ci ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f)); 824bf215546Sopenharmony_ci ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f)); 825bf215546Sopenharmony_ci ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f)); 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci /* loop management */ 828bf215546Sopenharmony_ci ureg_BGNLOOP(ureg, &label[loop_label]); 829bf215546Sopenharmony_ci ureg_ARL(ureg, AL, _W(rCtr)); 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci /* if (not DIRECTIONAL light): */ 832bf215546Sopenharmony_ci ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL)); 833bf215546Sopenharmony_ci ureg_MOV(ureg, rHit, ureg_negate(cLDir)); 834bf215546Sopenharmony_ci ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f)); 835bf215546Sopenharmony_ci ureg_IF(ureg, _X(tmp), &label[l++]); 836bf215546Sopenharmony_ci { 837bf215546Sopenharmony_ci /* hitDir = light.position - eyeVtx 838bf215546Sopenharmony_ci * d = length(hitDir) 839bf215546Sopenharmony_ci */ 840bf215546Sopenharmony_ci ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx)); 841bf215546Sopenharmony_ci ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit)); 842bf215546Sopenharmony_ci ureg_RSQ(ureg, tmp_y, _X(tmp)); 843bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */ 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_ci /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */ 846bf215546Sopenharmony_ci ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1); 847bf215546Sopenharmony_ci ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0); 848bf215546Sopenharmony_ci ureg_RCP(ureg, rAtt, _W(rAtt)); 849bf215546Sopenharmony_ci /* cut-off if distance exceeds Light.Range */ 850bf215546Sopenharmony_ci ureg_SLT(ureg, tmp_x, _X(tmp), cLRng); 851bf215546Sopenharmony_ci ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 852bf215546Sopenharmony_ci } 853bf215546Sopenharmony_ci ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 854bf215546Sopenharmony_ci ureg_ENDIF(ureg); 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci /* normalize hitDir */ 857bf215546Sopenharmony_ci ureg_normalize3(ureg, rHit, ureg_src(rHit)); 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci /* if (SPOT light) */ 860bf215546Sopenharmony_ci ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT)); 861bf215546Sopenharmony_ci ureg_IF(ureg, _X(tmp), &label[l++]); 862bf215546Sopenharmony_ci { 863bf215546Sopenharmony_ci /* rho = dp3(-hitDir, light.spotDir) 864bf215546Sopenharmony_ci * 865bf215546Sopenharmony_ci * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi 866bf215546Sopenharmony_ci * spotAtt = 1 867bf215546Sopenharmony_ci * else 868bf215546Sopenharmony_ci * if (rho <= light.cphi2) 869bf215546Sopenharmony_ci * spotAtt = 0 870bf215546Sopenharmony_ci * else 871bf215546Sopenharmony_ci * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff 872bf215546Sopenharmony_ci */ 873bf215546Sopenharmony_ci ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */ 874bf215546Sopenharmony_ci ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi)); 875bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv); 876bf215546Sopenharmony_ci ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */ 877bf215546Sopenharmony_ci ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */ 878bf215546Sopenharmony_ci ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */ 879bf215546Sopenharmony_ci ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp)); 880bf215546Sopenharmony_ci ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 881bf215546Sopenharmony_ci } 882bf215546Sopenharmony_ci ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 883bf215546Sopenharmony_ci ureg_ENDIF(ureg); 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_ci /* directional factors, let's not use LIT because of clarity */ 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci if (has_aNrm) { 888bf215546Sopenharmony_ci if (key->localviewer) { 889bf215546Sopenharmony_ci ureg_normalize3(ureg, rMid, vs->aVtx); 890bf215546Sopenharmony_ci ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid))); 891bf215546Sopenharmony_ci } else { 892bf215546Sopenharmony_ci ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f)); 893bf215546Sopenharmony_ci } 894bf215546Sopenharmony_ci ureg_normalize3(ureg, rMid, ureg_src(rMid)); 895bf215546Sopenharmony_ci ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit)); 896bf215546Sopenharmony_ci ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 897bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp)); 898bf215546Sopenharmony_ci /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0). 899bf215546Sopenharmony_ci * For front facing, it is more restrictive than test (dp3(normal,mid) > 0). 900bf215546Sopenharmony_ci * No tests were made for backfacing, so add the two conditions */ 901bf215546Sopenharmony_ci ureg_IF(ureg, _Z(tmp), &label[l++]); 902bf215546Sopenharmony_ci { 903bf215546Sopenharmony_ci ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 904bf215546Sopenharmony_ci ureg_POW(ureg, tmp_y, _Y(tmp), mtlP); 905bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */ 906bf215546Sopenharmony_ci ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */ 907bf215546Sopenharmony_ci } 908bf215546Sopenharmony_ci ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 909bf215546Sopenharmony_ci ureg_ENDIF(ureg); 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */ 912bf215546Sopenharmony_ci ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */ 913bf215546Sopenharmony_ci } 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_ci ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */ 916bf215546Sopenharmony_ci 917bf215546Sopenharmony_ci /* break if this was the last light */ 918bf215546Sopenharmony_ci ureg_IF(ureg, cLLast, &label[l++]); 919bf215546Sopenharmony_ci ureg_BRK(ureg); 920bf215546Sopenharmony_ci ureg_ENDIF(ureg); 921bf215546Sopenharmony_ci ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f)); 924bf215546Sopenharmony_ci ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg)); 925bf215546Sopenharmony_ci ureg_ENDLOOP(ureg, &label[loop_label]); 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci /* Apply to material: 928bf215546Sopenharmony_ci * 929bf215546Sopenharmony_ci * oCol[0] = (material.emissive + material.ambient * rs.ambient) + 930bf215546Sopenharmony_ci * material.ambient * ambient + 931bf215546Sopenharmony_ci * material.diffuse * diffuse + 932bf215546Sopenharmony_ci * oCol[1] = material.specular * specular; 933bf215546Sopenharmony_ci */ 934bf215546Sopenharmony_ci if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 935bf215546Sopenharmony_ci ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19)); 936bf215546Sopenharmony_ci else { 937bf215546Sopenharmony_ci ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25)); 938bf215546Sopenharmony_ci ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE); 939bf215546Sopenharmony_ci } 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp)); 942bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 943bf215546Sopenharmony_ci ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); 944bf215546Sopenharmony_ci ureg_release_temporary(ureg, rAtt); 945bf215546Sopenharmony_ci ureg_release_temporary(ureg, rHit); 946bf215546Sopenharmony_ci ureg_release_temporary(ureg, rMid); 947bf215546Sopenharmony_ci ureg_release_temporary(ureg, rCtr); 948bf215546Sopenharmony_ci ureg_release_temporary(ureg, rD); 949bf215546Sopenharmony_ci ureg_release_temporary(ureg, rA); 950bf215546Sopenharmony_ci ureg_release_temporary(ureg, rS); 951bf215546Sopenharmony_ci ureg_release_temporary(ureg, rAtt); 952bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 953bf215546Sopenharmony_ci } else 954bf215546Sopenharmony_ci /* COLOR */ 955bf215546Sopenharmony_ci if (key->darkness) { 956bf215546Sopenharmony_ci if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 957bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19)); 958bf215546Sopenharmony_ci else 959bf215546Sopenharmony_ci ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); 960bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 961bf215546Sopenharmony_ci ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f)); 962bf215546Sopenharmony_ci } else { 963bf215546Sopenharmony_ci ureg_MOV(ureg, oCol[0], vs->aCol[0]); 964bf215546Sopenharmony_ci ureg_MOV(ureg, oCol[1], vs->aCol[1]); 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci /* === Process fog. 968bf215546Sopenharmony_ci * 969bf215546Sopenharmony_ci * exp(x) = ex2(log2(e) * x) 970bf215546Sopenharmony_ci */ 971bf215546Sopenharmony_ci if (key->fog_mode) { 972bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 973bf215546Sopenharmony_ci struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 974bf215546Sopenharmony_ci struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 975bf215546Sopenharmony_ci if (key->fog_range) { 976bf215546Sopenharmony_ci ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 977bf215546Sopenharmony_ci ureg_RSQ(ureg, tmp_z, _X(tmp)); 978bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp)); 979bf215546Sopenharmony_ci } else { 980bf215546Sopenharmony_ci ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx))); 981bf215546Sopenharmony_ci } 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci if (key->fog_mode == D3DFOG_EXP) { 984bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 985bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 986bf215546Sopenharmony_ci ureg_EX2(ureg, tmp_x, _X(tmp)); 987bf215546Sopenharmony_ci } else 988bf215546Sopenharmony_ci if (key->fog_mode == D3DFOG_EXP2) { 989bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 990bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp)); 991bf215546Sopenharmony_ci ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 992bf215546Sopenharmony_ci ureg_EX2(ureg, tmp_x, _X(tmp)); 993bf215546Sopenharmony_ci } else 994bf215546Sopenharmony_ci if (key->fog_mode == D3DFOG_LINEAR) { 995bf215546Sopenharmony_ci ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp))); 996bf215546Sopenharmony_ci ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); 997bf215546Sopenharmony_ci } 998bf215546Sopenharmony_ci ureg_MOV(ureg, oFog, _X(tmp)); 999bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 1000bf215546Sopenharmony_ci } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) { 1001bf215546Sopenharmony_ci ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); 1002bf215546Sopenharmony_ci } 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) { 1005bf215546Sopenharmony_ci struct ureg_src input; 1006bf215546Sopenharmony_ci struct ureg_dst output; 1007bf215546Sopenharmony_ci input = vs->aWgt; 1008bf215546Sopenharmony_ci output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); 1009bf215546Sopenharmony_ci ureg_MOV(ureg, output, input); 1010bf215546Sopenharmony_ci } 1011bf215546Sopenharmony_ci if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) { 1012bf215546Sopenharmony_ci struct ureg_src input; 1013bf215546Sopenharmony_ci struct ureg_dst output; 1014bf215546Sopenharmony_ci input = vs->aInd; 1015bf215546Sopenharmony_ci output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); 1016bf215546Sopenharmony_ci ureg_MOV(ureg, output, input); 1017bf215546Sopenharmony_ci } 1018bf215546Sopenharmony_ci if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) { 1019bf215546Sopenharmony_ci struct ureg_src input; 1020bf215546Sopenharmony_ci struct ureg_dst output; 1021bf215546Sopenharmony_ci input = vs->aNrm; 1022bf215546Sopenharmony_ci output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); 1023bf215546Sopenharmony_ci ureg_MOV(ureg, output, input); 1024bf215546Sopenharmony_ci } 1025bf215546Sopenharmony_ci if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) { 1026bf215546Sopenharmony_ci struct ureg_src input; 1027bf215546Sopenharmony_ci struct ureg_dst output; 1028bf215546Sopenharmony_ci input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT); 1029bf215546Sopenharmony_ci output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); 1030bf215546Sopenharmony_ci ureg_MOV(ureg, output, input); 1031bf215546Sopenharmony_ci } 1032bf215546Sopenharmony_ci if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) { 1033bf215546Sopenharmony_ci struct ureg_src input; 1034bf215546Sopenharmony_ci struct ureg_dst output; 1035bf215546Sopenharmony_ci input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL); 1036bf215546Sopenharmony_ci output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23); 1037bf215546Sopenharmony_ci ureg_MOV(ureg, output, input); 1038bf215546Sopenharmony_ci } 1039bf215546Sopenharmony_ci if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 1040bf215546Sopenharmony_ci struct ureg_src input; 1041bf215546Sopenharmony_ci struct ureg_dst output; 1042bf215546Sopenharmony_ci input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG); 1043bf215546Sopenharmony_ci input = ureg_scalar(input, TGSI_SWIZZLE_X); 1044bf215546Sopenharmony_ci output = oFog; 1045bf215546Sopenharmony_ci ureg_MOV(ureg, output, input); 1046bf215546Sopenharmony_ci } 1047bf215546Sopenharmony_ci if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) { 1048bf215546Sopenharmony_ci (void) 0; /* TODO: replace z of position output ? */ 1049bf215546Sopenharmony_ci } 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci /* ucp for ff applies on world coordinates. 1052bf215546Sopenharmony_ci * aVtx is in worldview coordinates. */ 1053bf215546Sopenharmony_ci if (key->ucp) { 1054bf215546Sopenharmony_ci struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0); 1055bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1056bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12)); 1057bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp)); 1058bf215546Sopenharmony_ci ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp)); 1059bf215546Sopenharmony_ci ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp)); 1060bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 1061bf215546Sopenharmony_ci } 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci if (key->position_t && device->driver_caps.window_space_position_support) 1064bf215546Sopenharmony_ci ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 1065bf215546Sopenharmony_ci 1066bf215546Sopenharmony_ci ureg_END(ureg); 1067bf215546Sopenharmony_ci nine_ureg_tgsi_dump(ureg, FALSE); 1068bf215546Sopenharmony_ci return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL); 1069bf215546Sopenharmony_ci} 1070bf215546Sopenharmony_ci 1071bf215546Sopenharmony_ci/* PS FF constants layout: 1072bf215546Sopenharmony_ci * 1073bf215546Sopenharmony_ci * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT 1074bf215546Sopenharmony_ci * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00 1075bf215546Sopenharmony_ci * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01 1076bf215546Sopenharmony_ci * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10 1077bf215546Sopenharmony_ci * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11 1078bf215546Sopenharmony_ci * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE 1079bf215546Sopenharmony_ci * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET 1080bf215546Sopenharmony_ci * 1081bf215546Sopenharmony_ci * CONST[20] D3DRS_TEXTUREFACTOR 1082bf215546Sopenharmony_ci * CONST[21] D3DRS_FOGCOLOR 1083bf215546Sopenharmony_ci * CONST[22].x___ RS.FogEnd 1084bf215546Sopenharmony_ci * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 1085bf215546Sopenharmony_ci * CONST[22].__z_ RS.FogDensity 1086bf215546Sopenharmony_ci */ 1087bf215546Sopenharmony_cistruct ps_build_ctx 1088bf215546Sopenharmony_ci{ 1089bf215546Sopenharmony_ci struct ureg_program *ureg; 1090bf215546Sopenharmony_ci 1091bf215546Sopenharmony_ci struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */ 1092bf215546Sopenharmony_ci struct ureg_src vT[8]; /* TEXCOORD[i] */ 1093bf215546Sopenharmony_ci struct ureg_dst rCur; /* D3DTA_CURRENT */ 1094bf215546Sopenharmony_ci struct ureg_dst rMod; 1095bf215546Sopenharmony_ci struct ureg_src rCurSrc; 1096bf215546Sopenharmony_ci struct ureg_dst rTmp; /* D3DTA_TEMP */ 1097bf215546Sopenharmony_ci struct ureg_src rTmpSrc; 1098bf215546Sopenharmony_ci struct ureg_dst rTex; 1099bf215546Sopenharmony_ci struct ureg_src rTexSrc; 1100bf215546Sopenharmony_ci struct ureg_src cBEM[8]; 1101bf215546Sopenharmony_ci struct ureg_src s[8]; 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci struct { 1104bf215546Sopenharmony_ci unsigned index; 1105bf215546Sopenharmony_ci unsigned index_pre_mod; 1106bf215546Sopenharmony_ci } stage; 1107bf215546Sopenharmony_ci}; 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_cistatic struct ureg_src 1110bf215546Sopenharmony_cips_get_ts_arg(struct ps_build_ctx *ps, unsigned ta) 1111bf215546Sopenharmony_ci{ 1112bf215546Sopenharmony_ci struct ureg_src reg; 1113bf215546Sopenharmony_ci 1114bf215546Sopenharmony_ci switch (ta & D3DTA_SELECTMASK) { 1115bf215546Sopenharmony_ci case D3DTA_CONSTANT: 1116bf215546Sopenharmony_ci reg = ureg_DECL_constant(ps->ureg, ps->stage.index); 1117bf215546Sopenharmony_ci break; 1118bf215546Sopenharmony_ci case D3DTA_CURRENT: 1119bf215546Sopenharmony_ci reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc; 1120bf215546Sopenharmony_ci break; 1121bf215546Sopenharmony_ci case D3DTA_DIFFUSE: 1122bf215546Sopenharmony_ci reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 1123bf215546Sopenharmony_ci break; 1124bf215546Sopenharmony_ci case D3DTA_SPECULAR: 1125bf215546Sopenharmony_ci reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1126bf215546Sopenharmony_ci break; 1127bf215546Sopenharmony_ci case D3DTA_TEMP: 1128bf215546Sopenharmony_ci reg = ps->rTmpSrc; 1129bf215546Sopenharmony_ci break; 1130bf215546Sopenharmony_ci case D3DTA_TEXTURE: 1131bf215546Sopenharmony_ci reg = ps->rTexSrc; 1132bf215546Sopenharmony_ci break; 1133bf215546Sopenharmony_ci case D3DTA_TFACTOR: 1134bf215546Sopenharmony_ci reg = ureg_DECL_constant(ps->ureg, 20); 1135bf215546Sopenharmony_ci break; 1136bf215546Sopenharmony_ci default: 1137bf215546Sopenharmony_ci assert(0); 1138bf215546Sopenharmony_ci reg = ureg_src_undef(); 1139bf215546Sopenharmony_ci break; 1140bf215546Sopenharmony_ci } 1141bf215546Sopenharmony_ci if (ta & D3DTA_COMPLEMENT) { 1142bf215546Sopenharmony_ci struct ureg_dst dst = ureg_DECL_temporary(ps->ureg); 1143bf215546Sopenharmony_ci ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg)); 1144bf215546Sopenharmony_ci reg = ureg_src(dst); 1145bf215546Sopenharmony_ci } 1146bf215546Sopenharmony_ci if (ta & D3DTA_ALPHAREPLICATE) 1147bf215546Sopenharmony_ci reg = _WWWW(reg); 1148bf215546Sopenharmony_ci return reg; 1149bf215546Sopenharmony_ci} 1150bf215546Sopenharmony_ci 1151bf215546Sopenharmony_cistatic struct ureg_dst 1152bf215546Sopenharmony_cips_get_ts_dst(struct ps_build_ctx *ps, unsigned ta) 1153bf215546Sopenharmony_ci{ 1154bf215546Sopenharmony_ci assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE))); 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_ci switch (ta & D3DTA_SELECTMASK) { 1157bf215546Sopenharmony_ci case D3DTA_CURRENT: 1158bf215546Sopenharmony_ci return ps->rCur; 1159bf215546Sopenharmony_ci case D3DTA_TEMP: 1160bf215546Sopenharmony_ci return ps->rTmp; 1161bf215546Sopenharmony_ci default: 1162bf215546Sopenharmony_ci assert(0); 1163bf215546Sopenharmony_ci return ureg_dst_undef(); 1164bf215546Sopenharmony_ci } 1165bf215546Sopenharmony_ci} 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_cistatic uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top) 1168bf215546Sopenharmony_ci{ 1169bf215546Sopenharmony_ci switch (top) { 1170bf215546Sopenharmony_ci case D3DTOP_DISABLE: 1171bf215546Sopenharmony_ci return 0x0; 1172bf215546Sopenharmony_ci case D3DTOP_SELECTARG1: 1173bf215546Sopenharmony_ci case D3DTOP_PREMODULATE: 1174bf215546Sopenharmony_ci return 0x2; 1175bf215546Sopenharmony_ci case D3DTOP_SELECTARG2: 1176bf215546Sopenharmony_ci return 0x4; 1177bf215546Sopenharmony_ci case D3DTOP_MULTIPLYADD: 1178bf215546Sopenharmony_ci case D3DTOP_LERP: 1179bf215546Sopenharmony_ci return 0x7; 1180bf215546Sopenharmony_ci default: 1181bf215546Sopenharmony_ci return 0x6; 1182bf215546Sopenharmony_ci } 1183bf215546Sopenharmony_ci} 1184bf215546Sopenharmony_ci 1185bf215546Sopenharmony_cistatic inline boolean 1186bf215546Sopenharmony_ciis_MOV_no_op(struct ureg_dst dst, struct ureg_src src) 1187bf215546Sopenharmony_ci{ 1188bf215546Sopenharmony_ci return !dst.WriteMask || 1189bf215546Sopenharmony_ci (dst.File == src.File && 1190bf215546Sopenharmony_ci dst.Index == src.Index && 1191bf215546Sopenharmony_ci !dst.Indirect && 1192bf215546Sopenharmony_ci !dst.Saturate && 1193bf215546Sopenharmony_ci !src.Indirect && 1194bf215546Sopenharmony_ci !src.Negate && 1195bf215546Sopenharmony_ci !src.Absolute && 1196bf215546Sopenharmony_ci (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) && 1197bf215546Sopenharmony_ci (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) && 1198bf215546Sopenharmony_ci (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) && 1199bf215546Sopenharmony_ci (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W))); 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci} 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_cistatic void 1204bf215546Sopenharmony_cips_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg) 1205bf215546Sopenharmony_ci{ 1206bf215546Sopenharmony_ci struct ureg_program *ureg = ps->ureg; 1207bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1208bf215546Sopenharmony_ci struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 1209bf215546Sopenharmony_ci struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 1210bf215546Sopenharmony_ci 1211bf215546Sopenharmony_ci tmp.WriteMask = dst.WriteMask; 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_ci if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 && 1214bf215546Sopenharmony_ci top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE && 1215bf215546Sopenharmony_ci top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA && 1216bf215546Sopenharmony_ci top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA && 1217bf215546Sopenharmony_ci top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE && 1218bf215546Sopenharmony_ci top != D3DTOP_LERP) 1219bf215546Sopenharmony_ci dst = ureg_saturate(dst); 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_ci switch (top) { 1222bf215546Sopenharmony_ci case D3DTOP_SELECTARG1: 1223bf215546Sopenharmony_ci if (!is_MOV_no_op(dst, arg[1])) 1224bf215546Sopenharmony_ci ureg_MOV(ureg, dst, arg[1]); 1225bf215546Sopenharmony_ci break; 1226bf215546Sopenharmony_ci case D3DTOP_SELECTARG2: 1227bf215546Sopenharmony_ci if (!is_MOV_no_op(dst, arg[2])) 1228bf215546Sopenharmony_ci ureg_MOV(ureg, dst, arg[2]); 1229bf215546Sopenharmony_ci break; 1230bf215546Sopenharmony_ci case D3DTOP_MODULATE: 1231bf215546Sopenharmony_ci ureg_MUL(ureg, dst, arg[1], arg[2]); 1232bf215546Sopenharmony_ci break; 1233bf215546Sopenharmony_ci case D3DTOP_MODULATE2X: 1234bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, arg[1], arg[2]); 1235bf215546Sopenharmony_ci ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp)); 1236bf215546Sopenharmony_ci break; 1237bf215546Sopenharmony_ci case D3DTOP_MODULATE4X: 1238bf215546Sopenharmony_ci ureg_MUL(ureg, tmp, arg[1], arg[2]); 1239bf215546Sopenharmony_ci ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f)); 1240bf215546Sopenharmony_ci break; 1241bf215546Sopenharmony_ci case D3DTOP_ADD: 1242bf215546Sopenharmony_ci ureg_ADD(ureg, dst, arg[1], arg[2]); 1243bf215546Sopenharmony_ci break; 1244bf215546Sopenharmony_ci case D3DTOP_ADDSIGNED: 1245bf215546Sopenharmony_ci ureg_ADD(ureg, tmp, arg[1], arg[2]); 1246bf215546Sopenharmony_ci ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f)); 1247bf215546Sopenharmony_ci break; 1248bf215546Sopenharmony_ci case D3DTOP_ADDSIGNED2X: 1249bf215546Sopenharmony_ci ureg_ADD(ureg, tmp, arg[1], arg[2]); 1250bf215546Sopenharmony_ci ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 1251bf215546Sopenharmony_ci break; 1252bf215546Sopenharmony_ci case D3DTOP_SUBTRACT: 1253bf215546Sopenharmony_ci ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2])); 1254bf215546Sopenharmony_ci break; 1255bf215546Sopenharmony_ci case D3DTOP_ADDSMOOTH: 1256bf215546Sopenharmony_ci ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 1257bf215546Sopenharmony_ci ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]); 1258bf215546Sopenharmony_ci break; 1259bf215546Sopenharmony_ci case D3DTOP_BLENDDIFFUSEALPHA: 1260bf215546Sopenharmony_ci ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]); 1261bf215546Sopenharmony_ci break; 1262bf215546Sopenharmony_ci case D3DTOP_BLENDTEXTUREALPHA: 1263bf215546Sopenharmony_ci /* XXX: alpha taken from previous stage, texture or result ? */ 1264bf215546Sopenharmony_ci ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]); 1265bf215546Sopenharmony_ci break; 1266bf215546Sopenharmony_ci case D3DTOP_BLENDFACTORALPHA: 1267bf215546Sopenharmony_ci ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]); 1268bf215546Sopenharmony_ci break; 1269bf215546Sopenharmony_ci case D3DTOP_BLENDTEXTUREALPHAPM: 1270bf215546Sopenharmony_ci ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex))); 1271bf215546Sopenharmony_ci ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]); 1272bf215546Sopenharmony_ci break; 1273bf215546Sopenharmony_ci case D3DTOP_BLENDCURRENTALPHA: 1274bf215546Sopenharmony_ci ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]); 1275bf215546Sopenharmony_ci break; 1276bf215546Sopenharmony_ci case D3DTOP_PREMODULATE: 1277bf215546Sopenharmony_ci ureg_MOV(ureg, dst, arg[1]); 1278bf215546Sopenharmony_ci ps->stage.index_pre_mod = ps->stage.index + 1; 1279bf215546Sopenharmony_ci break; 1280bf215546Sopenharmony_ci case D3DTOP_MODULATEALPHA_ADDCOLOR: 1281bf215546Sopenharmony_ci ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]); 1282bf215546Sopenharmony_ci break; 1283bf215546Sopenharmony_ci case D3DTOP_MODULATECOLOR_ADDALPHA: 1284bf215546Sopenharmony_ci ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1])); 1285bf215546Sopenharmony_ci break; 1286bf215546Sopenharmony_ci case D3DTOP_MODULATEINVALPHA_ADDCOLOR: 1287bf215546Sopenharmony_ci ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1]))); 1288bf215546Sopenharmony_ci ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]); 1289bf215546Sopenharmony_ci break; 1290bf215546Sopenharmony_ci case D3DTOP_MODULATEINVCOLOR_ADDALPHA: 1291bf215546Sopenharmony_ci ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 1292bf215546Sopenharmony_ci ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1])); 1293bf215546Sopenharmony_ci break; 1294bf215546Sopenharmony_ci case D3DTOP_BUMPENVMAP: 1295bf215546Sopenharmony_ci break; 1296bf215546Sopenharmony_ci case D3DTOP_BUMPENVMAPLUMINANCE: 1297bf215546Sopenharmony_ci break; 1298bf215546Sopenharmony_ci case D3DTOP_DOTPRODUCT3: 1299bf215546Sopenharmony_ci ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 1300bf215546Sopenharmony_ci ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 1301bf215546Sopenharmony_ci ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2)); 1302bf215546Sopenharmony_ci ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0)); 1303bf215546Sopenharmony_ci break; 1304bf215546Sopenharmony_ci case D3DTOP_MULTIPLYADD: 1305bf215546Sopenharmony_ci ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]); 1306bf215546Sopenharmony_ci break; 1307bf215546Sopenharmony_ci case D3DTOP_LERP: 1308bf215546Sopenharmony_ci ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]); 1309bf215546Sopenharmony_ci break; 1310bf215546Sopenharmony_ci case D3DTOP_DISABLE: 1311bf215546Sopenharmony_ci /* no-op ? */ 1312bf215546Sopenharmony_ci break; 1313bf215546Sopenharmony_ci default: 1314bf215546Sopenharmony_ci assert(!"invalid D3DTOP"); 1315bf215546Sopenharmony_ci break; 1316bf215546Sopenharmony_ci } 1317bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 1318bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp2); 1319bf215546Sopenharmony_ci} 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_cistatic void * 1322bf215546Sopenharmony_cinine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) 1323bf215546Sopenharmony_ci{ 1324bf215546Sopenharmony_ci struct ps_build_ctx ps; 1325bf215546Sopenharmony_ci struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT); 1326bf215546Sopenharmony_ci struct ureg_dst oCol; 1327bf215546Sopenharmony_ci unsigned s; 1328bf215546Sopenharmony_ci const unsigned texcoord_sn = get_texcoord_sn(device->screen); 1329bf215546Sopenharmony_ci 1330bf215546Sopenharmony_ci memset(&ps, 0, sizeof(ps)); 1331bf215546Sopenharmony_ci ps.ureg = ureg; 1332bf215546Sopenharmony_ci ps.stage.index_pre_mod = -1; 1333bf215546Sopenharmony_ci 1334bf215546Sopenharmony_ci ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 1335bf215546Sopenharmony_ci 1336bf215546Sopenharmony_ci ps.rCur = ureg_DECL_temporary(ureg); 1337bf215546Sopenharmony_ci ps.rTmp = ureg_DECL_temporary(ureg); 1338bf215546Sopenharmony_ci ps.rTex = ureg_DECL_temporary(ureg); 1339bf215546Sopenharmony_ci ps.rCurSrc = ureg_src(ps.rCur); 1340bf215546Sopenharmony_ci ps.rTmpSrc = ureg_src(ps.rTmp); 1341bf215546Sopenharmony_ci ps.rTexSrc = ureg_src(ps.rTex); 1342bf215546Sopenharmony_ci 1343bf215546Sopenharmony_ci /* Initial values */ 1344bf215546Sopenharmony_ci ureg_MOV(ureg, ps.rCur, ps.vC[0]); 1345bf215546Sopenharmony_ci ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f)); 1346bf215546Sopenharmony_ci ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f)); 1347bf215546Sopenharmony_ci 1348bf215546Sopenharmony_ci for (s = 0; s < 8; ++s) { 1349bf215546Sopenharmony_ci ps.s[s] = ureg_src_undef(); 1350bf215546Sopenharmony_ci 1351bf215546Sopenharmony_ci if (key->ts[s].colorop != D3DTOP_DISABLE) { 1352bf215546Sopenharmony_ci if (key->ts[s].colorarg0 == D3DTA_SPECULAR || 1353bf215546Sopenharmony_ci key->ts[s].colorarg1 == D3DTA_SPECULAR || 1354bf215546Sopenharmony_ci key->ts[s].colorarg2 == D3DTA_SPECULAR) 1355bf215546Sopenharmony_ci ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1356bf215546Sopenharmony_ci 1357bf215546Sopenharmony_ci if (key->ts[s].colorarg0 == D3DTA_TEXTURE || 1358bf215546Sopenharmony_ci key->ts[s].colorarg1 == D3DTA_TEXTURE || 1359bf215546Sopenharmony_ci key->ts[s].colorarg2 == D3DTA_TEXTURE || 1360bf215546Sopenharmony_ci key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA || 1361bf215546Sopenharmony_ci key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) { 1362bf215546Sopenharmony_ci ps.s[s] = ureg_DECL_sampler(ureg, s); 1363bf215546Sopenharmony_ci ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 1364bf215546Sopenharmony_ci } 1365bf215546Sopenharmony_ci if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE || 1366bf215546Sopenharmony_ci key->ts[s - 1].alphaop == D3DTOP_PREMODULATE)) 1367bf215546Sopenharmony_ci ps.s[s] = ureg_DECL_sampler(ureg, s); 1368bf215546Sopenharmony_ci } 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci if (key->ts[s].alphaop != D3DTOP_DISABLE) { 1371bf215546Sopenharmony_ci if (key->ts[s].alphaarg0 == D3DTA_SPECULAR || 1372bf215546Sopenharmony_ci key->ts[s].alphaarg1 == D3DTA_SPECULAR || 1373bf215546Sopenharmony_ci key->ts[s].alphaarg2 == D3DTA_SPECULAR) 1374bf215546Sopenharmony_ci ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci if (key->ts[s].alphaarg0 == D3DTA_TEXTURE || 1377bf215546Sopenharmony_ci key->ts[s].alphaarg1 == D3DTA_TEXTURE || 1378bf215546Sopenharmony_ci key->ts[s].alphaarg2 == D3DTA_TEXTURE || 1379bf215546Sopenharmony_ci key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA || 1380bf215546Sopenharmony_ci key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) { 1381bf215546Sopenharmony_ci ps.s[s] = ureg_DECL_sampler(ureg, s); 1382bf215546Sopenharmony_ci ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 1383bf215546Sopenharmony_ci } 1384bf215546Sopenharmony_ci } 1385bf215546Sopenharmony_ci } 1386bf215546Sopenharmony_ci if (key->specular) 1387bf215546Sopenharmony_ci ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1388bf215546Sopenharmony_ci 1389bf215546Sopenharmony_ci oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci /* Run stages. 1392bf215546Sopenharmony_ci */ 1393bf215546Sopenharmony_ci for (s = 0; s < 8; ++s) { 1394bf215546Sopenharmony_ci unsigned colorarg[3]; 1395bf215546Sopenharmony_ci unsigned alphaarg[3]; 1396bf215546Sopenharmony_ci const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop); 1397bf215546Sopenharmony_ci const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop); 1398bf215546Sopenharmony_ci struct ureg_dst dst; 1399bf215546Sopenharmony_ci struct ureg_src arg[3]; 1400bf215546Sopenharmony_ci 1401bf215546Sopenharmony_ci if (key->ts[s].colorop == D3DTOP_DISABLE) { 1402bf215546Sopenharmony_ci assert (key->ts[s].alphaop == D3DTOP_DISABLE); 1403bf215546Sopenharmony_ci continue; 1404bf215546Sopenharmony_ci } 1405bf215546Sopenharmony_ci ps.stage.index = s; 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_ci DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s, 1408bf215546Sopenharmony_ci nine_D3DTOP_to_str(key->ts[s].colorop), 1409bf215546Sopenharmony_ci nine_D3DTOP_to_str(key->ts[s].alphaop)); 1410bf215546Sopenharmony_ci 1411bf215546Sopenharmony_ci if (!ureg_src_is_undef(ps.s[s])) { 1412bf215546Sopenharmony_ci unsigned target; 1413bf215546Sopenharmony_ci struct ureg_src texture_coord = ps.vT[s]; 1414bf215546Sopenharmony_ci struct ureg_dst delta; 1415bf215546Sopenharmony_ci switch (key->ts[s].textarget) { 1416bf215546Sopenharmony_ci case 0: target = TGSI_TEXTURE_1D; break; 1417bf215546Sopenharmony_ci case 1: target = TGSI_TEXTURE_2D; break; 1418bf215546Sopenharmony_ci case 2: target = TGSI_TEXTURE_3D; break; 1419bf215546Sopenharmony_ci case 3: target = TGSI_TEXTURE_CUBE; break; 1420bf215546Sopenharmony_ci /* this is a 2 bit bitfield, do I really need a default case ? */ 1421bf215546Sopenharmony_ci } 1422bf215546Sopenharmony_ci 1423bf215546Sopenharmony_ci /* Modify coordinates */ 1424bf215546Sopenharmony_ci if (s >= 1 && 1425bf215546Sopenharmony_ci (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP || 1426bf215546Sopenharmony_ci key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) { 1427bf215546Sopenharmony_ci delta = ureg_DECL_temporary(ureg); 1428bf215546Sopenharmony_ci /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */ 1429bf215546Sopenharmony_ci ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1))); 1430bf215546Sopenharmony_ci ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta)); 1431bf215546Sopenharmony_ci /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */ 1432bf215546Sopenharmony_ci ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1))); 1433bf215546Sopenharmony_ci ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta)); 1434bf215546Sopenharmony_ci texture_coord = ureg_src(ureg_DECL_temporary(ureg)); 1435bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]); 1436bf215546Sopenharmony_ci ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta)); 1437bf215546Sopenharmony_ci /* Prepare luminance multiplier 1438bf215546Sopenharmony_ci * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */ 1439bf215546Sopenharmony_ci if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) { 1440bf215546Sopenharmony_ci struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2)); 1441bf215546Sopenharmony_ci struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2)); 1442bf215546Sopenharmony_ci 1443bf215546Sopenharmony_ci ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset); 1444bf215546Sopenharmony_ci } 1445bf215546Sopenharmony_ci } 1446bf215546Sopenharmony_ci if (key->projected & (3 << (s *2))) { 1447bf215546Sopenharmony_ci unsigned dim = 1 + ((key->projected >> (2 * s)) & 3); 1448bf215546Sopenharmony_ci if (dim == 4) 1449bf215546Sopenharmony_ci ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]); 1450bf215546Sopenharmony_ci else { 1451bf215546Sopenharmony_ci struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1452bf215546Sopenharmony_ci ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1)); 1453bf215546Sopenharmony_ci ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord); 1454bf215546Sopenharmony_ci ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]); 1455bf215546Sopenharmony_ci ureg_release_temporary(ureg, tmp); 1456bf215546Sopenharmony_ci } 1457bf215546Sopenharmony_ci } else { 1458bf215546Sopenharmony_ci ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]); 1459bf215546Sopenharmony_ci } 1460bf215546Sopenharmony_ci if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 1461bf215546Sopenharmony_ci ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta)); 1462bf215546Sopenharmony_ci } 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci if (key->ts[s].colorop == D3DTOP_BUMPENVMAP || 1465bf215546Sopenharmony_ci key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 1466bf215546Sopenharmony_ci continue; 1467bf215546Sopenharmony_ci 1468bf215546Sopenharmony_ci dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT); 1469bf215546Sopenharmony_ci 1470bf215546Sopenharmony_ci if (ps.stage.index_pre_mod == ps.stage.index) { 1471bf215546Sopenharmony_ci ps.rMod = ureg_DECL_temporary(ureg); 1472bf215546Sopenharmony_ci ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc); 1473bf215546Sopenharmony_ci } 1474bf215546Sopenharmony_ci 1475bf215546Sopenharmony_ci colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f; 1476bf215546Sopenharmony_ci colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f; 1477bf215546Sopenharmony_ci colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f; 1478bf215546Sopenharmony_ci alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f; 1479bf215546Sopenharmony_ci alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f; 1480bf215546Sopenharmony_ci alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f; 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci if (key->ts[s].colorop != key->ts[s].alphaop || 1483bf215546Sopenharmony_ci colorarg[0] != alphaarg[0] || 1484bf215546Sopenharmony_ci colorarg[1] != alphaarg[1] || 1485bf215546Sopenharmony_ci colorarg[2] != alphaarg[2]) 1486bf215546Sopenharmony_ci dst.WriteMask = TGSI_WRITEMASK_XYZ; 1487bf215546Sopenharmony_ci 1488bf215546Sopenharmony_ci /* Special DOTPRODUCT behaviour (see wine tests) */ 1489bf215546Sopenharmony_ci if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3) 1490bf215546Sopenharmony_ci dst.WriteMask = TGSI_WRITEMASK_XYZW; 1491bf215546Sopenharmony_ci 1492bf215546Sopenharmony_ci if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]); 1493bf215546Sopenharmony_ci if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]); 1494bf215546Sopenharmony_ci if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]); 1495bf215546Sopenharmony_ci ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg); 1496bf215546Sopenharmony_ci 1497bf215546Sopenharmony_ci if (dst.WriteMask != TGSI_WRITEMASK_XYZW) { 1498bf215546Sopenharmony_ci dst.WriteMask = TGSI_WRITEMASK_W; 1499bf215546Sopenharmony_ci 1500bf215546Sopenharmony_ci if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]); 1501bf215546Sopenharmony_ci if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]); 1502bf215546Sopenharmony_ci if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]); 1503bf215546Sopenharmony_ci ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg); 1504bf215546Sopenharmony_ci } 1505bf215546Sopenharmony_ci } 1506bf215546Sopenharmony_ci 1507bf215546Sopenharmony_ci if (key->specular) 1508bf215546Sopenharmony_ci ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]); 1509bf215546Sopenharmony_ci 1510bf215546Sopenharmony_ci /* Fog. 1511bf215546Sopenharmony_ci */ 1512bf215546Sopenharmony_ci if (key->fog_mode) { 1513bf215546Sopenharmony_ci struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X); 1514bf215546Sopenharmony_ci struct ureg_src vPos; 1515bf215546Sopenharmony_ci if (device->screen->get_param(device->screen, 1516bf215546Sopenharmony_ci PIPE_CAP_FS_POSITION_IS_SYSVAL)) { 1517bf215546Sopenharmony_ci vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 1518bf215546Sopenharmony_ci } else { 1519bf215546Sopenharmony_ci vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, 1520bf215546Sopenharmony_ci TGSI_INTERPOLATE_LINEAR); 1521bf215546Sopenharmony_ci } 1522bf215546Sopenharmony_ci 1523bf215546Sopenharmony_ci /* Source is either W or Z. 1524bf215546Sopenharmony_ci * When we use vs ff, 1525bf215546Sopenharmony_ci * Z is when an orthogonal projection matrix is detected, 1526bf215546Sopenharmony_ci * W (WFOG) else. 1527bf215546Sopenharmony_ci * Z is used for programmable vs. 1528bf215546Sopenharmony_ci * Note: Tests indicate that the projection matrix coefficients do 1529bf215546Sopenharmony_ci * actually affect pixel fog (and not vertex fog) when vs ff is used, 1530bf215546Sopenharmony_ci * which justifies taking the position's w instead of taking the z coordinate 1531bf215546Sopenharmony_ci * before the projection in the vs shader. 1532bf215546Sopenharmony_ci */ 1533bf215546Sopenharmony_ci if (!key->fog_source) 1534bf215546Sopenharmony_ci ureg_MOV(ureg, rFog, _ZZZZ(vPos)); 1535bf215546Sopenharmony_ci else 1536bf215546Sopenharmony_ci /* Position's w is 1/w */ 1537bf215546Sopenharmony_ci ureg_RCP(ureg, rFog, _WWWW(vPos)); 1538bf215546Sopenharmony_ci 1539bf215546Sopenharmony_ci if (key->fog_mode == D3DFOG_EXP) { 1540bf215546Sopenharmony_ci ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 1541bf215546Sopenharmony_ci ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 1542bf215546Sopenharmony_ci ureg_EX2(ureg, rFog, _X(rFog)); 1543bf215546Sopenharmony_ci } else 1544bf215546Sopenharmony_ci if (key->fog_mode == D3DFOG_EXP2) { 1545bf215546Sopenharmony_ci ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 1546bf215546Sopenharmony_ci ureg_MUL(ureg, rFog, _X(rFog), _X(rFog)); 1547bf215546Sopenharmony_ci ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 1548bf215546Sopenharmony_ci ureg_EX2(ureg, rFog, _X(rFog)); 1549bf215546Sopenharmony_ci } else 1550bf215546Sopenharmony_ci if (key->fog_mode == D3DFOG_LINEAR) { 1551bf215546Sopenharmony_ci ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog))); 1552bf215546Sopenharmony_ci ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22))); 1553bf215546Sopenharmony_ci } 1554bf215546Sopenharmony_ci ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21)); 1555bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 1556bf215546Sopenharmony_ci } else 1557bf215546Sopenharmony_ci if (key->fog) { 1558bf215546Sopenharmony_ci struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE); 1559bf215546Sopenharmony_ci ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21)); 1560bf215546Sopenharmony_ci ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 1561bf215546Sopenharmony_ci } else { 1562bf215546Sopenharmony_ci ureg_MOV(ureg, oCol, ps.rCurSrc); 1563bf215546Sopenharmony_ci } 1564bf215546Sopenharmony_ci 1565bf215546Sopenharmony_ci ureg_END(ureg); 1566bf215546Sopenharmony_ci nine_ureg_tgsi_dump(ureg, FALSE); 1567bf215546Sopenharmony_ci return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL); 1568bf215546Sopenharmony_ci} 1569bf215546Sopenharmony_ci 1570bf215546Sopenharmony_cistatic struct NineVertexShader9 * 1571bf215546Sopenharmony_cinine_ff_get_vs(struct NineDevice9 *device) 1572bf215546Sopenharmony_ci{ 1573bf215546Sopenharmony_ci const struct nine_context *context = &device->context; 1574bf215546Sopenharmony_ci struct NineVertexShader9 *vs; 1575bf215546Sopenharmony_ci struct vs_build_ctx bld; 1576bf215546Sopenharmony_ci struct nine_ff_vs_key key; 1577bf215546Sopenharmony_ci unsigned s, i; 1578bf215546Sopenharmony_ci boolean has_indexes = false; 1579bf215546Sopenharmony_ci boolean has_weights = false; 1580bf215546Sopenharmony_ci int8_t input_texture_coord[8]; 1581bf215546Sopenharmony_ci 1582bf215546Sopenharmony_ci assert(sizeof(key) <= sizeof(key.value32)); 1583bf215546Sopenharmony_ci 1584bf215546Sopenharmony_ci memset(&key, 0, sizeof(key)); 1585bf215546Sopenharmony_ci memset(&bld, 0, sizeof(bld)); 1586bf215546Sopenharmony_ci memset(&input_texture_coord, 0, sizeof(input_texture_coord)); 1587bf215546Sopenharmony_ci 1588bf215546Sopenharmony_ci bld.key = &key; 1589bf215546Sopenharmony_ci 1590bf215546Sopenharmony_ci /* FIXME: this shouldn't be NULL, but it is on init */ 1591bf215546Sopenharmony_ci if (context->vdecl) { 1592bf215546Sopenharmony_ci key.color0in_one = 1; 1593bf215546Sopenharmony_ci key.color1in_zero = 1; 1594bf215546Sopenharmony_ci for (i = 0; i < context->vdecl->nelems; i++) { 1595bf215546Sopenharmony_ci uint16_t usage = context->vdecl->usage_map[i]; 1596bf215546Sopenharmony_ci if (usage == NINE_DECLUSAGE_POSITIONT) 1597bf215546Sopenharmony_ci key.position_t = 1; 1598bf215546Sopenharmony_ci else if (usage == NINE_DECLUSAGE_i(COLOR, 0)) 1599bf215546Sopenharmony_ci key.color0in_one = 0; 1600bf215546Sopenharmony_ci else if (usage == NINE_DECLUSAGE_i(COLOR, 1)) 1601bf215546Sopenharmony_ci key.color1in_zero = 0; 1602bf215546Sopenharmony_ci else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) { 1603bf215546Sopenharmony_ci has_indexes = true; 1604bf215546Sopenharmony_ci key.passthrough |= 1 << usage; 1605bf215546Sopenharmony_ci } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) { 1606bf215546Sopenharmony_ci has_weights = true; 1607bf215546Sopenharmony_ci key.passthrough |= 1 << usage; 1608bf215546Sopenharmony_ci } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) { 1609bf215546Sopenharmony_ci key.has_normal = 1; 1610bf215546Sopenharmony_ci key.passthrough |= 1 << usage; 1611bf215546Sopenharmony_ci } else if (usage == NINE_DECLUSAGE_PSIZE) 1612bf215546Sopenharmony_ci key.vertexpointsize = 1; 1613bf215546Sopenharmony_ci else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { 1614bf215546Sopenharmony_ci s = usage / NINE_DECLUSAGE_COUNT; 1615bf215546Sopenharmony_ci if (s < 8) 1616bf215546Sopenharmony_ci input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type); 1617bf215546Sopenharmony_ci else 1618bf215546Sopenharmony_ci DBG("FF given texture coordinate >= 8. Ignoring\n"); 1619bf215546Sopenharmony_ci } else if (usage < NINE_DECLUSAGE_NONE) 1620bf215546Sopenharmony_ci key.passthrough |= 1 << usage; 1621bf215546Sopenharmony_ci } 1622bf215546Sopenharmony_ci } 1623bf215546Sopenharmony_ci /* ff vs + ps 3.0: some elements are passed to the ps (wine test). 1624bf215546Sopenharmony_ci * We do restrict to indices 0 */ 1625bf215546Sopenharmony_ci key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) | 1626bf215546Sopenharmony_ci (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) | 1627bf215546Sopenharmony_ci (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE)); 1628bf215546Sopenharmony_ci if (!key.position_t) 1629bf215546Sopenharmony_ci key.passthrough = 0; 1630bf215546Sopenharmony_ci key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE]; 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_ci key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active; 1633bf215546Sopenharmony_ci key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active; 1634bf215546Sopenharmony_ci if (key.position_t) { 1635bf215546Sopenharmony_ci key.darkness = 0; /* |= key.lighting; */ /* XXX ? */ 1636bf215546Sopenharmony_ci key.lighting = 0; 1637bf215546Sopenharmony_ci } 1638bf215546Sopenharmony_ci if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) { 1639bf215546Sopenharmony_ci uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2); 1640bf215546Sopenharmony_ci key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask; 1641bf215546Sopenharmony_ci key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask; 1642bf215546Sopenharmony_ci key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask; 1643bf215546Sopenharmony_ci key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask; 1644bf215546Sopenharmony_ci } 1645bf215546Sopenharmony_ci key.fog = !!context->rs[D3DRS_FOGENABLE]; 1646bf215546Sopenharmony_ci key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0; 1647bf215546Sopenharmony_ci if (key.fog_mode) 1648bf215546Sopenharmony_ci key.fog_range = context->rs[D3DRS_RANGEFOGENABLE]; 1649bf215546Sopenharmony_ci 1650bf215546Sopenharmony_ci key.localviewer = !!context->rs[D3DRS_LOCALVIEWER]; 1651bf215546Sopenharmony_ci key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS]; 1652bf215546Sopenharmony_ci key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE]; 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 1655bf215546Sopenharmony_ci key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes; 1656bf215546Sopenharmony_ci 1657bf215546Sopenharmony_ci switch (context->rs[D3DRS_VERTEXBLEND]) { 1658bf215546Sopenharmony_ci case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break; 1659bf215546Sopenharmony_ci case D3DVBF_1WEIGHTS: key.vertexblend = 2; break; 1660bf215546Sopenharmony_ci case D3DVBF_2WEIGHTS: key.vertexblend = 3; break; 1661bf215546Sopenharmony_ci case D3DVBF_3WEIGHTS: key.vertexblend = 4; break; 1662bf215546Sopenharmony_ci case D3DVBF_TWEENING: key.vertextween = 1; break; 1663bf215546Sopenharmony_ci default: 1664bf215546Sopenharmony_ci assert(!"invalid D3DVBF"); 1665bf215546Sopenharmony_ci break; 1666bf215546Sopenharmony_ci } 1667bf215546Sopenharmony_ci if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) 1668bf215546Sopenharmony_ci key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */ 1669bf215546Sopenharmony_ci } 1670bf215546Sopenharmony_ci 1671bf215546Sopenharmony_ci for (s = 0; s < 8; ++s) { 1672bf215546Sopenharmony_ci unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; 1673bf215546Sopenharmony_ci unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7; 1674bf215546Sopenharmony_ci unsigned dim; 1675bf215546Sopenharmony_ci 1676bf215546Sopenharmony_ci if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU) 1677bf215546Sopenharmony_ci gen = NINED3DTSS_TCI_PASSTHRU; 1678bf215546Sopenharmony_ci 1679bf215546Sopenharmony_ci if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU) 1680bf215546Sopenharmony_ci gen = NINED3DTSS_TCI_DISABLE; 1681bf215546Sopenharmony_ci 1682bf215546Sopenharmony_ci key.tc_gen |= gen << (s * 3); 1683bf215546Sopenharmony_ci key.tc_idx |= idx << (s * 3); 1684bf215546Sopenharmony_ci key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2); 1685bf215546Sopenharmony_ci 1686bf215546Sopenharmony_ci dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; 1687bf215546Sopenharmony_ci if (dim > 4) 1688bf215546Sopenharmony_ci dim = input_texture_coord[idx]; 1689bf215546Sopenharmony_ci if (dim == 1) /* NV behaviour */ 1690bf215546Sopenharmony_ci dim = 0; 1691bf215546Sopenharmony_ci key.tc_dim_output |= dim << (s * 3); 1692bf215546Sopenharmony_ci } 1693bf215546Sopenharmony_ci 1694bf215546Sopenharmony_ci DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key)); 1695bf215546Sopenharmony_ci vs = util_hash_table_get(device->ff.ht_vs, &key); 1696bf215546Sopenharmony_ci if (vs) 1697bf215546Sopenharmony_ci return vs; 1698bf215546Sopenharmony_ci NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld)); 1699bf215546Sopenharmony_ci 1700bf215546Sopenharmony_ci nine_ff_prune_vs(device); 1701bf215546Sopenharmony_ci if (vs) { 1702bf215546Sopenharmony_ci unsigned n; 1703bf215546Sopenharmony_ci 1704bf215546Sopenharmony_ci memcpy(&vs->ff_key, &key, sizeof(vs->ff_key)); 1705bf215546Sopenharmony_ci 1706bf215546Sopenharmony_ci _mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs); 1707bf215546Sopenharmony_ci device->ff.num_vs++; 1708bf215546Sopenharmony_ci 1709bf215546Sopenharmony_ci vs->num_inputs = bld.num_inputs; 1710bf215546Sopenharmony_ci for (n = 0; n < bld.num_inputs; ++n) 1711bf215546Sopenharmony_ci vs->input_map[n].ndecl = bld.input[n]; 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ci vs->position_t = key.position_t; 1714bf215546Sopenharmony_ci vs->point_size = key.vertexpointsize | key.pointscale; 1715bf215546Sopenharmony_ci } 1716bf215546Sopenharmony_ci return vs; 1717bf215546Sopenharmony_ci} 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_ci#define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE) 1720bf215546Sopenharmony_ci#define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32))) 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_cistatic struct NinePixelShader9 * 1723bf215546Sopenharmony_cinine_ff_get_ps(struct NineDevice9 *device) 1724bf215546Sopenharmony_ci{ 1725bf215546Sopenharmony_ci struct nine_context *context = &device->context; 1726bf215546Sopenharmony_ci D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION); 1727bf215546Sopenharmony_ci struct NinePixelShader9 *ps; 1728bf215546Sopenharmony_ci struct nine_ff_ps_key key; 1729bf215546Sopenharmony_ci unsigned s; 1730bf215546Sopenharmony_ci uint8_t sampler_mask = 0; 1731bf215546Sopenharmony_ci 1732bf215546Sopenharmony_ci assert(sizeof(key) <= sizeof(key.value32)); 1733bf215546Sopenharmony_ci 1734bf215546Sopenharmony_ci memset(&key, 0, sizeof(key)); 1735bf215546Sopenharmony_ci for (s = 0; s < 8; ++s) { 1736bf215546Sopenharmony_ci key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP]; 1737bf215546Sopenharmony_ci key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP]; 1738bf215546Sopenharmony_ci const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop); 1739bf215546Sopenharmony_ci const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop); 1740bf215546Sopenharmony_ci /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. 1741bf215546Sopenharmony_ci * ALPHAOP cannot be enabled if COLOROP is disabled. 1742bf215546Sopenharmony_ci * Verified on Windows. */ 1743bf215546Sopenharmony_ci if (key.ts[s].colorop == D3DTOP_DISABLE) { 1744bf215546Sopenharmony_ci key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */ 1745bf215546Sopenharmony_ci break; 1746bf215546Sopenharmony_ci } 1747bf215546Sopenharmony_ci 1748bf215546Sopenharmony_ci if (!context->texture[s].enabled && 1749bf215546Sopenharmony_ci ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE && 1750bf215546Sopenharmony_ci used_c & 0x1) || 1751bf215546Sopenharmony_ci (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE && 1752bf215546Sopenharmony_ci used_c & 0x2) || 1753bf215546Sopenharmony_ci (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE && 1754bf215546Sopenharmony_ci used_c & 0x4))) { 1755bf215546Sopenharmony_ci /* Tested on Windows: Invalid texture read disables the stage 1756bf215546Sopenharmony_ci * and the subsequent ones, but only for colorop. For alpha, 1757bf215546Sopenharmony_ci * it's as if the texture had alpha of 1.0, which is what 1758bf215546Sopenharmony_ci * has our dummy texture in that case. Invalid color also 1759bf215546Sopenharmony_ci * disabled the following alpha stages. */ 1760bf215546Sopenharmony_ci key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 1761bf215546Sopenharmony_ci break; 1762bf215546Sopenharmony_ci } 1763bf215546Sopenharmony_ci 1764bf215546Sopenharmony_ci if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE || 1765bf215546Sopenharmony_ci context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE || 1766bf215546Sopenharmony_ci context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE || 1767bf215546Sopenharmony_ci context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE || 1768bf215546Sopenharmony_ci context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE || 1769bf215546Sopenharmony_ci context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE) 1770bf215546Sopenharmony_ci sampler_mask |= (1 << s); 1771bf215546Sopenharmony_ci 1772bf215546Sopenharmony_ci if (key.ts[s].colorop != D3DTOP_DISABLE) { 1773bf215546Sopenharmony_ci if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7; 1774bf215546Sopenharmony_ci if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7; 1775bf215546Sopenharmony_ci if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7; 1776bf215546Sopenharmony_ci if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s; 1777bf215546Sopenharmony_ci if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s; 1778bf215546Sopenharmony_ci if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s; 1779bf215546Sopenharmony_ci if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s; 1780bf215546Sopenharmony_ci if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s; 1781bf215546Sopenharmony_ci if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s; 1782bf215546Sopenharmony_ci } 1783bf215546Sopenharmony_ci if (key.ts[s].alphaop != D3DTOP_DISABLE) { 1784bf215546Sopenharmony_ci if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7; 1785bf215546Sopenharmony_ci if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7; 1786bf215546Sopenharmony_ci if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7; 1787bf215546Sopenharmony_ci if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s; 1788bf215546Sopenharmony_ci if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s; 1789bf215546Sopenharmony_ci if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s; 1790bf215546Sopenharmony_ci } 1791bf215546Sopenharmony_ci key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP; 1792bf215546Sopenharmony_ci 1793bf215546Sopenharmony_ci if (context->texture[s].enabled) { 1794bf215546Sopenharmony_ci switch (context->texture[s].type) { 1795bf215546Sopenharmony_ci case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break; 1796bf215546Sopenharmony_ci case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break; 1797bf215546Sopenharmony_ci case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break; 1798bf215546Sopenharmony_ci default: 1799bf215546Sopenharmony_ci assert(!"unexpected texture type"); 1800bf215546Sopenharmony_ci break; 1801bf215546Sopenharmony_ci } 1802bf215546Sopenharmony_ci } else { 1803bf215546Sopenharmony_ci key.ts[s].textarget = 1; 1804bf215546Sopenharmony_ci } 1805bf215546Sopenharmony_ci } 1806bf215546Sopenharmony_ci 1807bf215546Sopenharmony_ci /* Note: If colorop is D3DTOP_DISABLE for the first stage 1808bf215546Sopenharmony_ci * (which implies alphaop is too), nothing particular happens, 1809bf215546Sopenharmony_ci * that is, current is equal to diffuse (which is the case anyway, 1810bf215546Sopenharmony_ci * because it is how it is initialized). 1811bf215546Sopenharmony_ci * Special case seems if alphaop is D3DTOP_DISABLE and not colorop, 1812bf215546Sopenharmony_ci * because then if the resultarg is TEMP, then diffuse alpha is written 1813bf215546Sopenharmony_ci * to it. */ 1814bf215546Sopenharmony_ci if (key.ts[0].colorop != D3DTOP_DISABLE && 1815bf215546Sopenharmony_ci key.ts[0].alphaop == D3DTOP_DISABLE && 1816bf215546Sopenharmony_ci key.ts[0].resultarg != 0) { 1817bf215546Sopenharmony_ci key.ts[0].alphaop = D3DTOP_SELECTARG1; 1818bf215546Sopenharmony_ci key.ts[0].alphaarg1 = D3DTA_DIFFUSE; 1819bf215546Sopenharmony_ci } 1820bf215546Sopenharmony_ci /* When no alpha stage writes to current, diffuse alpha is taken. 1821bf215546Sopenharmony_ci * Since we initialize current to diffuse, we have the behaviour. */ 1822bf215546Sopenharmony_ci 1823bf215546Sopenharmony_ci /* Last stage always writes to Current */ 1824bf215546Sopenharmony_ci if (s >= 1) 1825bf215546Sopenharmony_ci key.ts[s-1].resultarg = 0; 1826bf215546Sopenharmony_ci 1827bf215546Sopenharmony_ci key.projected = nine_ff_get_projected_key_ff(context); 1828bf215546Sopenharmony_ci key.specular = !!context->rs[D3DRS_SPECULARENABLE]; 1829bf215546Sopenharmony_ci 1830bf215546Sopenharmony_ci for (; s < 8; ++s) 1831bf215546Sopenharmony_ci key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 1832bf215546Sopenharmony_ci if (context->rs[D3DRS_FOGENABLE]) 1833bf215546Sopenharmony_ci key.fog_mode = context->rs[D3DRS_FOGTABLEMODE]; 1834bf215546Sopenharmony_ci key.fog = !!context->rs[D3DRS_FOGENABLE]; 1835bf215546Sopenharmony_ci /* Pixel fog (with WFOG advertised): source is either Z or W. 1836bf215546Sopenharmony_ci * W is the source if vs ff is used, and the 1837bf215546Sopenharmony_ci * projection matrix is not orthogonal. 1838bf215546Sopenharmony_ci * Tests on Win 10 seem to indicate _34 1839bf215546Sopenharmony_ci * and _33 are checked against 0, 1. */ 1840bf215546Sopenharmony_ci if (key.fog_mode && key.fog) 1841bf215546Sopenharmony_ci key.fog_source = !context->programmable_vs && 1842bf215546Sopenharmony_ci !(projection_matrix->_34 == 0.0f && 1843bf215546Sopenharmony_ci projection_matrix->_44 == 1.0f); 1844bf215546Sopenharmony_ci 1845bf215546Sopenharmony_ci DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key)); 1846bf215546Sopenharmony_ci ps = util_hash_table_get(device->ff.ht_ps, &key); 1847bf215546Sopenharmony_ci if (ps) 1848bf215546Sopenharmony_ci return ps; 1849bf215546Sopenharmony_ci NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key)); 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci nine_ff_prune_ps(device); 1852bf215546Sopenharmony_ci if (ps) { 1853bf215546Sopenharmony_ci memcpy(&ps->ff_key, &key, sizeof(ps->ff_key)); 1854bf215546Sopenharmony_ci 1855bf215546Sopenharmony_ci _mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps); 1856bf215546Sopenharmony_ci device->ff.num_ps++; 1857bf215546Sopenharmony_ci 1858bf215546Sopenharmony_ci ps->rt_mask = 0x1; 1859bf215546Sopenharmony_ci ps->sampler_mask = sampler_mask; 1860bf215546Sopenharmony_ci } 1861bf215546Sopenharmony_ci return ps; 1862bf215546Sopenharmony_ci} 1863bf215546Sopenharmony_ci 1864bf215546Sopenharmony_cistatic void 1865bf215546Sopenharmony_cinine_ff_load_vs_transforms(struct NineDevice9 *device) 1866bf215546Sopenharmony_ci{ 1867bf215546Sopenharmony_ci struct nine_context *context = &device->context; 1868bf215546Sopenharmony_ci D3DMATRIX T; 1869bf215546Sopenharmony_ci D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 1870bf215546Sopenharmony_ci unsigned i; 1871bf215546Sopenharmony_ci 1872bf215546Sopenharmony_ci /* TODO: make this nicer, and only upload the ones we need */ 1873bf215546Sopenharmony_ci /* TODO: use ff.vs_const as storage of W, V, P matrices */ 1874bf215546Sopenharmony_ci 1875bf215546Sopenharmony_ci if (IS_D3DTS_DIRTY(context, WORLD) || 1876bf215546Sopenharmony_ci IS_D3DTS_DIRTY(context, VIEW) || 1877bf215546Sopenharmony_ci IS_D3DTS_DIRTY(context, PROJECTION)) { 1878bf215546Sopenharmony_ci /* WVP, WV matrices */ 1879bf215546Sopenharmony_ci nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW)); 1880bf215546Sopenharmony_ci nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION)); 1881bf215546Sopenharmony_ci 1882bf215546Sopenharmony_ci /* normal matrix == transpose(inverse(WV)) */ 1883bf215546Sopenharmony_ci nine_d3d_matrix_inverse(&T, &M[1]); 1884bf215546Sopenharmony_ci nine_d3d_matrix_transpose(&M[4], &T); 1885bf215546Sopenharmony_ci 1886bf215546Sopenharmony_ci /* P matrix */ 1887bf215546Sopenharmony_ci M[2] = *GET_D3DTS(PROJECTION); 1888bf215546Sopenharmony_ci 1889bf215546Sopenharmony_ci /* V and W matrix */ 1890bf215546Sopenharmony_ci nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW)); 1891bf215546Sopenharmony_ci M[40] = M[1]; 1892bf215546Sopenharmony_ci } 1893bf215546Sopenharmony_ci 1894bf215546Sopenharmony_ci if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 1895bf215546Sopenharmony_ci /* load other world matrices */ 1896bf215546Sopenharmony_ci for (i = 1; i <= 8; ++i) { 1897bf215546Sopenharmony_ci nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW)); 1898bf215546Sopenharmony_ci } 1899bf215546Sopenharmony_ci } 1900bf215546Sopenharmony_ci 1901bf215546Sopenharmony_ci device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]); 1902bf215546Sopenharmony_ci} 1903bf215546Sopenharmony_ci 1904bf215546Sopenharmony_cistatic void 1905bf215546Sopenharmony_cinine_ff_load_lights(struct NineDevice9 *device) 1906bf215546Sopenharmony_ci{ 1907bf215546Sopenharmony_ci struct nine_context *context = &device->context; 1908bf215546Sopenharmony_ci struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 1909bf215546Sopenharmony_ci unsigned l; 1910bf215546Sopenharmony_ci 1911bf215546Sopenharmony_ci if (context->changed.group & NINE_STATE_FF_MATERIAL) { 1912bf215546Sopenharmony_ci const D3DMATERIAL9 *mtl = &context->ff.material; 1913bf215546Sopenharmony_ci 1914bf215546Sopenharmony_ci memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float)); 1915bf215546Sopenharmony_ci memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float)); 1916bf215546Sopenharmony_ci memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float)); 1917bf215546Sopenharmony_ci dst[23].x = mtl->Power; 1918bf215546Sopenharmony_ci memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float)); 1919bf215546Sopenharmony_ci d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]); 1920bf215546Sopenharmony_ci dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r; 1921bf215546Sopenharmony_ci dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g; 1922bf215546Sopenharmony_ci dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b; 1923bf215546Sopenharmony_ci } 1924bf215546Sopenharmony_ci 1925bf215546Sopenharmony_ci if (!(context->changed.group & NINE_STATE_FF_LIGHTING)) 1926bf215546Sopenharmony_ci return; 1927bf215546Sopenharmony_ci 1928bf215546Sopenharmony_ci for (l = 0; l < context->ff.num_lights_active; ++l) { 1929bf215546Sopenharmony_ci const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]]; 1930bf215546Sopenharmony_ci 1931bf215546Sopenharmony_ci dst[32 + l * 8].x = light->Type; 1932bf215546Sopenharmony_ci dst[32 + l * 8].y = light->Attenuation0; 1933bf215546Sopenharmony_ci dst[32 + l * 8].z = light->Attenuation1; 1934bf215546Sopenharmony_ci dst[32 + l * 8].w = light->Attenuation2; 1935bf215546Sopenharmony_ci memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse)); 1936bf215546Sopenharmony_ci memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular)); 1937bf215546Sopenharmony_ci memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient)); 1938bf215546Sopenharmony_ci nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW)); 1939bf215546Sopenharmony_ci nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW)); 1940bf215546Sopenharmony_ci dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range; 1941bf215546Sopenharmony_ci dst[37 + l * 8].w = light->Falloff; 1942bf215546Sopenharmony_ci dst[38 + l * 8].x = cosf(light->Theta * 0.5f); 1943bf215546Sopenharmony_ci dst[38 + l * 8].y = cosf(light->Phi * 0.5f); 1944bf215546Sopenharmony_ci dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y); 1945bf215546Sopenharmony_ci dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active); 1946bf215546Sopenharmony_ci } 1947bf215546Sopenharmony_ci} 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_cistatic void 1950bf215546Sopenharmony_cinine_ff_load_point_and_fog_params(struct NineDevice9 *device) 1951bf215546Sopenharmony_ci{ 1952bf215546Sopenharmony_ci struct nine_context *context = &device->context; 1953bf215546Sopenharmony_ci struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_ci if (!(context->changed.group & NINE_STATE_FF_VS_OTHER)) 1956bf215546Sopenharmony_ci return; 1957bf215546Sopenharmony_ci dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]); 1958bf215546Sopenharmony_ci dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]); 1959bf215546Sopenharmony_ci dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]); 1960bf215546Sopenharmony_ci dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]); 1961bf215546Sopenharmony_ci dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]); 1962bf215546Sopenharmony_ci dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]); 1963bf215546Sopenharmony_ci dst[28].x = asfloat(context->rs[D3DRS_FOGEND]); 1964bf215546Sopenharmony_ci dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 1965bf215546Sopenharmony_ci if (isinf(dst[28].y)) 1966bf215546Sopenharmony_ci dst[28].y = 0.0f; 1967bf215546Sopenharmony_ci dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 1968bf215546Sopenharmony_ci} 1969bf215546Sopenharmony_ci 1970bf215546Sopenharmony_cistatic void 1971bf215546Sopenharmony_cinine_ff_load_tex_matrices(struct NineDevice9 *device) 1972bf215546Sopenharmony_ci{ 1973bf215546Sopenharmony_ci struct nine_context *context = &device->context; 1974bf215546Sopenharmony_ci D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 1975bf215546Sopenharmony_ci unsigned s; 1976bf215546Sopenharmony_ci 1977bf215546Sopenharmony_ci if (!(context->ff.changed.transform[0] & 0xff0000)) 1978bf215546Sopenharmony_ci return; 1979bf215546Sopenharmony_ci for (s = 0; s < 8; ++s) { 1980bf215546Sopenharmony_ci if (IS_D3DTS_DIRTY(context, TEXTURE0 + s)) 1981bf215546Sopenharmony_ci nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE)); 1982bf215546Sopenharmony_ci } 1983bf215546Sopenharmony_ci} 1984bf215546Sopenharmony_ci 1985bf215546Sopenharmony_cistatic void 1986bf215546Sopenharmony_cinine_ff_load_ps_params(struct NineDevice9 *device) 1987bf215546Sopenharmony_ci{ 1988bf215546Sopenharmony_ci struct nine_context *context = &device->context; 1989bf215546Sopenharmony_ci struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const; 1990bf215546Sopenharmony_ci unsigned s; 1991bf215546Sopenharmony_ci 1992bf215546Sopenharmony_ci if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS)) 1993bf215546Sopenharmony_ci return; 1994bf215546Sopenharmony_ci 1995bf215546Sopenharmony_ci for (s = 0; s < 8; ++s) 1996bf215546Sopenharmony_ci d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]); 1997bf215546Sopenharmony_ci 1998bf215546Sopenharmony_ci for (s = 0; s < 8; ++s) { 1999bf215546Sopenharmony_ci dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]); 2000bf215546Sopenharmony_ci dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]); 2001bf215546Sopenharmony_ci dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]); 2002bf215546Sopenharmony_ci dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]); 2003bf215546Sopenharmony_ci if (s & 1) { 2004bf215546Sopenharmony_ci dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 2005bf215546Sopenharmony_ci dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 2006bf215546Sopenharmony_ci } else { 2007bf215546Sopenharmony_ci dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 2008bf215546Sopenharmony_ci dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 2009bf215546Sopenharmony_ci } 2010bf215546Sopenharmony_ci } 2011bf215546Sopenharmony_ci 2012bf215546Sopenharmony_ci d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]); 2013bf215546Sopenharmony_ci d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]); 2014bf215546Sopenharmony_ci dst[22].x = asfloat(context->rs[D3DRS_FOGEND]); 2015bf215546Sopenharmony_ci dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 2016bf215546Sopenharmony_ci dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 2017bf215546Sopenharmony_ci} 2018bf215546Sopenharmony_ci 2019bf215546Sopenharmony_cistatic void 2020bf215546Sopenharmony_cinine_ff_load_viewport_info(struct NineDevice9 *device) 2021bf215546Sopenharmony_ci{ 2022bf215546Sopenharmony_ci D3DVIEWPORT9 *viewport = &device->context.viewport; 2023bf215546Sopenharmony_ci struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 2024bf215546Sopenharmony_ci float diffZ = viewport->MaxZ - viewport->MinZ; 2025bf215546Sopenharmony_ci 2026bf215546Sopenharmony_ci /* Note: the other functions avoids to fill the const again if nothing changed. 2027bf215546Sopenharmony_ci * But we don't have much to fill, and adding code to allow that may be complex 2028bf215546Sopenharmony_ci * so just fill it always */ 2029bf215546Sopenharmony_ci dst[100].x = 2.0f / (float)(viewport->Width); 2030bf215546Sopenharmony_ci dst[100].y = 2.0f / (float)(viewport->Height); 2031bf215546Sopenharmony_ci dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ); 2032bf215546Sopenharmony_ci dst[100].w = (float)(viewport->Width); 2033bf215546Sopenharmony_ci dst[101].x = (float)(viewport->X); 2034bf215546Sopenharmony_ci dst[101].y = (float)(viewport->Y); 2035bf215546Sopenharmony_ci dst[101].z = (float)(viewport->MinZ); 2036bf215546Sopenharmony_ci} 2037bf215546Sopenharmony_ci 2038bf215546Sopenharmony_civoid 2039bf215546Sopenharmony_cinine_ff_update(struct NineDevice9 *device) 2040bf215546Sopenharmony_ci{ 2041bf215546Sopenharmony_ci struct nine_context *context = &device->context; 2042bf215546Sopenharmony_ci struct pipe_constant_buffer cb; 2043bf215546Sopenharmony_ci 2044bf215546Sopenharmony_ci DBG("vs=%p ps=%p\n", context->vs, context->ps); 2045bf215546Sopenharmony_ci 2046bf215546Sopenharmony_ci /* NOTE: the only reference belongs to the hash table */ 2047bf215546Sopenharmony_ci if (!context->programmable_vs) { 2048bf215546Sopenharmony_ci device->ff.vs = nine_ff_get_vs(device); 2049bf215546Sopenharmony_ci context->changed.group |= NINE_STATE_VS; 2050bf215546Sopenharmony_ci } 2051bf215546Sopenharmony_ci if (!context->ps) { 2052bf215546Sopenharmony_ci device->ff.ps = nine_ff_get_ps(device); 2053bf215546Sopenharmony_ci context->changed.group |= NINE_STATE_PS; 2054bf215546Sopenharmony_ci } 2055bf215546Sopenharmony_ci 2056bf215546Sopenharmony_ci if (!context->programmable_vs) { 2057bf215546Sopenharmony_ci nine_ff_load_vs_transforms(device); 2058bf215546Sopenharmony_ci nine_ff_load_tex_matrices(device); 2059bf215546Sopenharmony_ci nine_ff_load_lights(device); 2060bf215546Sopenharmony_ci nine_ff_load_point_and_fog_params(device); 2061bf215546Sopenharmony_ci nine_ff_load_viewport_info(device); 2062bf215546Sopenharmony_ci 2063bf215546Sopenharmony_ci memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform)); 2064bf215546Sopenharmony_ci 2065bf215546Sopenharmony_ci cb.buffer_offset = 0; 2066bf215546Sopenharmony_ci cb.buffer = NULL; 2067bf215546Sopenharmony_ci cb.user_buffer = device->ff.vs_const; 2068bf215546Sopenharmony_ci cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); 2069bf215546Sopenharmony_ci 2070bf215546Sopenharmony_ci context->pipe_data.cb_vs_ff = cb; 2071bf215546Sopenharmony_ci context->commit |= NINE_STATE_COMMIT_CONST_VS; 2072bf215546Sopenharmony_ci 2073bf215546Sopenharmony_ci context->changed.group &= ~NINE_STATE_FF_VS; 2074bf215546Sopenharmony_ci } 2075bf215546Sopenharmony_ci 2076bf215546Sopenharmony_ci if (!context->ps) { 2077bf215546Sopenharmony_ci nine_ff_load_ps_params(device); 2078bf215546Sopenharmony_ci 2079bf215546Sopenharmony_ci cb.buffer_offset = 0; 2080bf215546Sopenharmony_ci cb.buffer = NULL; 2081bf215546Sopenharmony_ci cb.user_buffer = device->ff.ps_const; 2082bf215546Sopenharmony_ci cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); 2083bf215546Sopenharmony_ci 2084bf215546Sopenharmony_ci context->pipe_data.cb_ps_ff = cb; 2085bf215546Sopenharmony_ci context->commit |= NINE_STATE_COMMIT_CONST_PS; 2086bf215546Sopenharmony_ci 2087bf215546Sopenharmony_ci context->changed.group &= ~NINE_STATE_FF_PS; 2088bf215546Sopenharmony_ci } 2089bf215546Sopenharmony_ci} 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_ci 2092bf215546Sopenharmony_ciboolean 2093bf215546Sopenharmony_cinine_ff_init(struct NineDevice9 *device) 2094bf215546Sopenharmony_ci{ 2095bf215546Sopenharmony_ci device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash, 2096bf215546Sopenharmony_ci nine_ff_vs_key_comp); 2097bf215546Sopenharmony_ci device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash, 2098bf215546Sopenharmony_ci nine_ff_ps_key_comp); 2099bf215546Sopenharmony_ci 2100bf215546Sopenharmony_ci device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash, 2101bf215546Sopenharmony_ci nine_ff_fvf_key_comp); 2102bf215546Sopenharmony_ci 2103bf215546Sopenharmony_ci device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float)); 2104bf215546Sopenharmony_ci device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float)); 2105bf215546Sopenharmony_ci 2106bf215546Sopenharmony_ci return device->ff.ht_vs && device->ff.ht_ps && 2107bf215546Sopenharmony_ci device->ff.ht_fvf && 2108bf215546Sopenharmony_ci device->ff.vs_const && device->ff.ps_const; 2109bf215546Sopenharmony_ci} 2110bf215546Sopenharmony_ci 2111bf215546Sopenharmony_cistatic enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data) 2112bf215546Sopenharmony_ci{ 2113bf215546Sopenharmony_ci NineUnknown_Unbind(NineUnknown(value)); 2114bf215546Sopenharmony_ci return PIPE_OK; 2115bf215546Sopenharmony_ci} 2116bf215546Sopenharmony_ci 2117bf215546Sopenharmony_civoid 2118bf215546Sopenharmony_cinine_ff_fini(struct NineDevice9 *device) 2119bf215546Sopenharmony_ci{ 2120bf215546Sopenharmony_ci if (device->ff.ht_vs) { 2121bf215546Sopenharmony_ci util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 2122bf215546Sopenharmony_ci _mesa_hash_table_destroy(device->ff.ht_vs, NULL); 2123bf215546Sopenharmony_ci } 2124bf215546Sopenharmony_ci if (device->ff.ht_ps) { 2125bf215546Sopenharmony_ci util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 2126bf215546Sopenharmony_ci _mesa_hash_table_destroy(device->ff.ht_ps, NULL); 2127bf215546Sopenharmony_ci } 2128bf215546Sopenharmony_ci if (device->ff.ht_fvf) { 2129bf215546Sopenharmony_ci util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL); 2130bf215546Sopenharmony_ci _mesa_hash_table_destroy(device->ff.ht_fvf, NULL); 2131bf215546Sopenharmony_ci } 2132bf215546Sopenharmony_ci device->ff.vs = NULL; /* destroyed by unbinding from hash table */ 2133bf215546Sopenharmony_ci device->ff.ps = NULL; 2134bf215546Sopenharmony_ci 2135bf215546Sopenharmony_ci FREE(device->ff.vs_const); 2136bf215546Sopenharmony_ci FREE(device->ff.ps_const); 2137bf215546Sopenharmony_ci} 2138bf215546Sopenharmony_ci 2139bf215546Sopenharmony_cistatic void 2140bf215546Sopenharmony_cinine_ff_prune_vs(struct NineDevice9 *device) 2141bf215546Sopenharmony_ci{ 2142bf215546Sopenharmony_ci struct nine_context *context = &device->context; 2143bf215546Sopenharmony_ci 2144bf215546Sopenharmony_ci if (device->ff.num_vs > 1024) { 2145bf215546Sopenharmony_ci /* could destroy the bound one here, so unbind */ 2146bf215546Sopenharmony_ci context->pipe->bind_vs_state(context->pipe, NULL); 2147bf215546Sopenharmony_ci util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 2148bf215546Sopenharmony_ci _mesa_hash_table_clear(device->ff.ht_vs, NULL); 2149bf215546Sopenharmony_ci device->ff.num_vs = 0; 2150bf215546Sopenharmony_ci context->changed.group |= NINE_STATE_VS; 2151bf215546Sopenharmony_ci } 2152bf215546Sopenharmony_ci} 2153bf215546Sopenharmony_cistatic void 2154bf215546Sopenharmony_cinine_ff_prune_ps(struct NineDevice9 *device) 2155bf215546Sopenharmony_ci{ 2156bf215546Sopenharmony_ci struct nine_context *context = &device->context; 2157bf215546Sopenharmony_ci 2158bf215546Sopenharmony_ci if (device->ff.num_ps > 1024) { 2159bf215546Sopenharmony_ci /* could destroy the bound one here, so unbind */ 2160bf215546Sopenharmony_ci context->pipe->bind_fs_state(context->pipe, NULL); 2161bf215546Sopenharmony_ci util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 2162bf215546Sopenharmony_ci _mesa_hash_table_clear(device->ff.ht_ps, NULL); 2163bf215546Sopenharmony_ci device->ff.num_ps = 0; 2164bf215546Sopenharmony_ci context->changed.group |= NINE_STATE_PS; 2165bf215546Sopenharmony_ci } 2166bf215546Sopenharmony_ci} 2167bf215546Sopenharmony_ci 2168bf215546Sopenharmony_ci/* ========================================================================== */ 2169bf215546Sopenharmony_ci 2170bf215546Sopenharmony_ci/* Matrix multiplication: 2171bf215546Sopenharmony_ci * 2172bf215546Sopenharmony_ci * in memory: 0 1 2 3 (row major) 2173bf215546Sopenharmony_ci * 4 5 6 7 2174bf215546Sopenharmony_ci * 8 9 a b 2175bf215546Sopenharmony_ci * c d e f 2176bf215546Sopenharmony_ci * 2177bf215546Sopenharmony_ci * cA cB cC cD 2178bf215546Sopenharmony_ci * r0 = (r0 * cA) (r0 * cB) . . 2179bf215546Sopenharmony_ci * r1 = (r1 * cA) (r1 * cB) 2180bf215546Sopenharmony_ci * r2 = (r2 * cA) . 2181bf215546Sopenharmony_ci * r3 = (r3 * cA) . 2182bf215546Sopenharmony_ci * 2183bf215546Sopenharmony_ci * r: (11) (12) (13) (14) 2184bf215546Sopenharmony_ci * (21) (22) (23) (24) 2185bf215546Sopenharmony_ci * (31) (32) (33) (34) 2186bf215546Sopenharmony_ci * (41) (42) (43) (44) 2187bf215546Sopenharmony_ci * l: (11 12 13 14) 2188bf215546Sopenharmony_ci * (21 22 23 24) 2189bf215546Sopenharmony_ci * (31 32 33 34) 2190bf215546Sopenharmony_ci * (41 42 43 44) 2191bf215546Sopenharmony_ci * 2192bf215546Sopenharmony_ci * v: (x y z 1 ) 2193bf215546Sopenharmony_ci * 2194bf215546Sopenharmony_ci * t.xyzw = MUL(v.xxxx, r[0]); 2195bf215546Sopenharmony_ci * t.xyzw = MAD(v.yyyy, r[1], t.xyzw); 2196bf215546Sopenharmony_ci * t.xyzw = MAD(v.zzzz, r[2], t.xyzw); 2197bf215546Sopenharmony_ci * v.xyzw = MAD(v.wwww, r[3], t.xyzw); 2198bf215546Sopenharmony_ci * 2199bf215546Sopenharmony_ci * v.x = DP4(v, c[0]); 2200bf215546Sopenharmony_ci * v.y = DP4(v, c[1]); 2201bf215546Sopenharmony_ci * v.z = DP4(v, c[2]); 2202bf215546Sopenharmony_ci * v.w = DP4(v, c[3]) = 1 2203bf215546Sopenharmony_ci */ 2204bf215546Sopenharmony_ci 2205bf215546Sopenharmony_ci/* 2206bf215546Sopenharmony_cistatic void 2207bf215546Sopenharmony_cinine_D3DMATRIX_print(const D3DMATRIX *M) 2208bf215546Sopenharmony_ci{ 2209bf215546Sopenharmony_ci DBG("\n(%f %f %f %f)\n" 2210bf215546Sopenharmony_ci "(%f %f %f %f)\n" 2211bf215546Sopenharmony_ci "(%f %f %f %f)\n" 2212bf215546Sopenharmony_ci "(%f %f %f %f)\n", 2213bf215546Sopenharmony_ci M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3], 2214bf215546Sopenharmony_ci M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3], 2215bf215546Sopenharmony_ci M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3], 2216bf215546Sopenharmony_ci M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]); 2217bf215546Sopenharmony_ci} 2218bf215546Sopenharmony_ci*/ 2219bf215546Sopenharmony_ci 2220bf215546Sopenharmony_cistatic inline float 2221bf215546Sopenharmony_cinine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c) 2222bf215546Sopenharmony_ci{ 2223bf215546Sopenharmony_ci return A->m[r][0] * B->m[0][c] + 2224bf215546Sopenharmony_ci A->m[r][1] * B->m[1][c] + 2225bf215546Sopenharmony_ci A->m[r][2] * B->m[2][c] + 2226bf215546Sopenharmony_ci A->m[r][3] * B->m[3][c]; 2227bf215546Sopenharmony_ci} 2228bf215546Sopenharmony_ci 2229bf215546Sopenharmony_cistatic inline float 2230bf215546Sopenharmony_cinine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 2231bf215546Sopenharmony_ci{ 2232bf215546Sopenharmony_ci return v->x * M->m[0][c] + 2233bf215546Sopenharmony_ci v->y * M->m[1][c] + 2234bf215546Sopenharmony_ci v->z * M->m[2][c] + 2235bf215546Sopenharmony_ci 1.0f * M->m[3][c]; 2236bf215546Sopenharmony_ci} 2237bf215546Sopenharmony_ci 2238bf215546Sopenharmony_cistatic inline float 2239bf215546Sopenharmony_cinine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 2240bf215546Sopenharmony_ci{ 2241bf215546Sopenharmony_ci return v->x * M->m[0][c] + 2242bf215546Sopenharmony_ci v->y * M->m[1][c] + 2243bf215546Sopenharmony_ci v->z * M->m[2][c]; 2244bf215546Sopenharmony_ci} 2245bf215546Sopenharmony_ci 2246bf215546Sopenharmony_civoid 2247bf215546Sopenharmony_cinine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R) 2248bf215546Sopenharmony_ci{ 2249bf215546Sopenharmony_ci D->_11 = nine_DP4_row_col(L, 0, R, 0); 2250bf215546Sopenharmony_ci D->_12 = nine_DP4_row_col(L, 0, R, 1); 2251bf215546Sopenharmony_ci D->_13 = nine_DP4_row_col(L, 0, R, 2); 2252bf215546Sopenharmony_ci D->_14 = nine_DP4_row_col(L, 0, R, 3); 2253bf215546Sopenharmony_ci 2254bf215546Sopenharmony_ci D->_21 = nine_DP4_row_col(L, 1, R, 0); 2255bf215546Sopenharmony_ci D->_22 = nine_DP4_row_col(L, 1, R, 1); 2256bf215546Sopenharmony_ci D->_23 = nine_DP4_row_col(L, 1, R, 2); 2257bf215546Sopenharmony_ci D->_24 = nine_DP4_row_col(L, 1, R, 3); 2258bf215546Sopenharmony_ci 2259bf215546Sopenharmony_ci D->_31 = nine_DP4_row_col(L, 2, R, 0); 2260bf215546Sopenharmony_ci D->_32 = nine_DP4_row_col(L, 2, R, 1); 2261bf215546Sopenharmony_ci D->_33 = nine_DP4_row_col(L, 2, R, 2); 2262bf215546Sopenharmony_ci D->_34 = nine_DP4_row_col(L, 2, R, 3); 2263bf215546Sopenharmony_ci 2264bf215546Sopenharmony_ci D->_41 = nine_DP4_row_col(L, 3, R, 0); 2265bf215546Sopenharmony_ci D->_42 = nine_DP4_row_col(L, 3, R, 1); 2266bf215546Sopenharmony_ci D->_43 = nine_DP4_row_col(L, 3, R, 2); 2267bf215546Sopenharmony_ci D->_44 = nine_DP4_row_col(L, 3, R, 3); 2268bf215546Sopenharmony_ci} 2269bf215546Sopenharmony_ci 2270bf215546Sopenharmony_civoid 2271bf215546Sopenharmony_cinine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 2272bf215546Sopenharmony_ci{ 2273bf215546Sopenharmony_ci d->x = nine_DP4_vec_col(v, M, 0); 2274bf215546Sopenharmony_ci d->y = nine_DP4_vec_col(v, M, 1); 2275bf215546Sopenharmony_ci d->z = nine_DP4_vec_col(v, M, 2); 2276bf215546Sopenharmony_ci} 2277bf215546Sopenharmony_ci 2278bf215546Sopenharmony_civoid 2279bf215546Sopenharmony_cinine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 2280bf215546Sopenharmony_ci{ 2281bf215546Sopenharmony_ci d->x = nine_DP3_vec_col(v, M, 0); 2282bf215546Sopenharmony_ci d->y = nine_DP3_vec_col(v, M, 1); 2283bf215546Sopenharmony_ci d->z = nine_DP3_vec_col(v, M, 2); 2284bf215546Sopenharmony_ci} 2285bf215546Sopenharmony_ci 2286bf215546Sopenharmony_civoid 2287bf215546Sopenharmony_cinine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M) 2288bf215546Sopenharmony_ci{ 2289bf215546Sopenharmony_ci unsigned i, j; 2290bf215546Sopenharmony_ci for (i = 0; i < 4; ++i) 2291bf215546Sopenharmony_ci for (j = 0; j < 4; ++j) 2292bf215546Sopenharmony_ci D->m[i][j] = M->m[j][i]; 2293bf215546Sopenharmony_ci} 2294bf215546Sopenharmony_ci 2295bf215546Sopenharmony_ci#define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 2296bf215546Sopenharmony_ci float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 2297bf215546Sopenharmony_ci if (t > 0.0f) pos += t; else neg += t; } while(0) 2298bf215546Sopenharmony_ci 2299bf215546Sopenharmony_ci#define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 2300bf215546Sopenharmony_ci float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 2301bf215546Sopenharmony_ci if (t > 0.0f) neg -= t; else pos -= t; } while(0) 2302bf215546Sopenharmony_cifloat 2303bf215546Sopenharmony_cinine_d3d_matrix_det(const D3DMATRIX *M) 2304bf215546Sopenharmony_ci{ 2305bf215546Sopenharmony_ci float pos = 0.0f; 2306bf215546Sopenharmony_ci float neg = 0.0f; 2307bf215546Sopenharmony_ci 2308bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4); 2309bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2); 2310bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3); 2311bf215546Sopenharmony_ci 2312bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3); 2313bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4); 2314bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1); 2315bf215546Sopenharmony_ci 2316bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4); 2317bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1); 2318bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2); 2319bf215546Sopenharmony_ci 2320bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2); 2321bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3); 2322bf215546Sopenharmony_ci _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1); 2323bf215546Sopenharmony_ci 2324bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3); 2325bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4); 2326bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2); 2327bf215546Sopenharmony_ci 2328bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4); 2329bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1); 2330bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3); 2331bf215546Sopenharmony_ci 2332bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2); 2333bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4); 2334bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1); 2335bf215546Sopenharmony_ci 2336bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3); 2337bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1); 2338bf215546Sopenharmony_ci _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2); 2339bf215546Sopenharmony_ci 2340bf215546Sopenharmony_ci return pos + neg; 2341bf215546Sopenharmony_ci} 2342bf215546Sopenharmony_ci 2343bf215546Sopenharmony_ci/* XXX: Probably better to just use src/mesa/math/m_matrix.c because 2344bf215546Sopenharmony_ci * I have no idea where this code came from. 2345bf215546Sopenharmony_ci */ 2346bf215546Sopenharmony_civoid 2347bf215546Sopenharmony_cinine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M) 2348bf215546Sopenharmony_ci{ 2349bf215546Sopenharmony_ci int i, k; 2350bf215546Sopenharmony_ci float det; 2351bf215546Sopenharmony_ci 2352bf215546Sopenharmony_ci D->m[0][0] = 2353bf215546Sopenharmony_ci M->m[1][1] * M->m[2][2] * M->m[3][3] - 2354bf215546Sopenharmony_ci M->m[1][1] * M->m[3][2] * M->m[2][3] - 2355bf215546Sopenharmony_ci M->m[1][2] * M->m[2][1] * M->m[3][3] + 2356bf215546Sopenharmony_ci M->m[1][2] * M->m[3][1] * M->m[2][3] + 2357bf215546Sopenharmony_ci M->m[1][3] * M->m[2][1] * M->m[3][2] - 2358bf215546Sopenharmony_ci M->m[1][3] * M->m[3][1] * M->m[2][2]; 2359bf215546Sopenharmony_ci 2360bf215546Sopenharmony_ci D->m[0][1] = 2361bf215546Sopenharmony_ci -M->m[0][1] * M->m[2][2] * M->m[3][3] + 2362bf215546Sopenharmony_ci M->m[0][1] * M->m[3][2] * M->m[2][3] + 2363bf215546Sopenharmony_ci M->m[0][2] * M->m[2][1] * M->m[3][3] - 2364bf215546Sopenharmony_ci M->m[0][2] * M->m[3][1] * M->m[2][3] - 2365bf215546Sopenharmony_ci M->m[0][3] * M->m[2][1] * M->m[3][2] + 2366bf215546Sopenharmony_ci M->m[0][3] * M->m[3][1] * M->m[2][2]; 2367bf215546Sopenharmony_ci 2368bf215546Sopenharmony_ci D->m[0][2] = 2369bf215546Sopenharmony_ci M->m[0][1] * M->m[1][2] * M->m[3][3] - 2370bf215546Sopenharmony_ci M->m[0][1] * M->m[3][2] * M->m[1][3] - 2371bf215546Sopenharmony_ci M->m[0][2] * M->m[1][1] * M->m[3][3] + 2372bf215546Sopenharmony_ci M->m[0][2] * M->m[3][1] * M->m[1][3] + 2373bf215546Sopenharmony_ci M->m[0][3] * M->m[1][1] * M->m[3][2] - 2374bf215546Sopenharmony_ci M->m[0][3] * M->m[3][1] * M->m[1][2]; 2375bf215546Sopenharmony_ci 2376bf215546Sopenharmony_ci D->m[0][3] = 2377bf215546Sopenharmony_ci -M->m[0][1] * M->m[1][2] * M->m[2][3] + 2378bf215546Sopenharmony_ci M->m[0][1] * M->m[2][2] * M->m[1][3] + 2379bf215546Sopenharmony_ci M->m[0][2] * M->m[1][1] * M->m[2][3] - 2380bf215546Sopenharmony_ci M->m[0][2] * M->m[2][1] * M->m[1][3] - 2381bf215546Sopenharmony_ci M->m[0][3] * M->m[1][1] * M->m[2][2] + 2382bf215546Sopenharmony_ci M->m[0][3] * M->m[2][1] * M->m[1][2]; 2383bf215546Sopenharmony_ci 2384bf215546Sopenharmony_ci D->m[1][0] = 2385bf215546Sopenharmony_ci -M->m[1][0] * M->m[2][2] * M->m[3][3] + 2386bf215546Sopenharmony_ci M->m[1][0] * M->m[3][2] * M->m[2][3] + 2387bf215546Sopenharmony_ci M->m[1][2] * M->m[2][0] * M->m[3][3] - 2388bf215546Sopenharmony_ci M->m[1][2] * M->m[3][0] * M->m[2][3] - 2389bf215546Sopenharmony_ci M->m[1][3] * M->m[2][0] * M->m[3][2] + 2390bf215546Sopenharmony_ci M->m[1][3] * M->m[3][0] * M->m[2][2]; 2391bf215546Sopenharmony_ci 2392bf215546Sopenharmony_ci D->m[1][1] = 2393bf215546Sopenharmony_ci M->m[0][0] * M->m[2][2] * M->m[3][3] - 2394bf215546Sopenharmony_ci M->m[0][0] * M->m[3][2] * M->m[2][3] - 2395bf215546Sopenharmony_ci M->m[0][2] * M->m[2][0] * M->m[3][3] + 2396bf215546Sopenharmony_ci M->m[0][2] * M->m[3][0] * M->m[2][3] + 2397bf215546Sopenharmony_ci M->m[0][3] * M->m[2][0] * M->m[3][2] - 2398bf215546Sopenharmony_ci M->m[0][3] * M->m[3][0] * M->m[2][2]; 2399bf215546Sopenharmony_ci 2400bf215546Sopenharmony_ci D->m[1][2] = 2401bf215546Sopenharmony_ci -M->m[0][0] * M->m[1][2] * M->m[3][3] + 2402bf215546Sopenharmony_ci M->m[0][0] * M->m[3][2] * M->m[1][3] + 2403bf215546Sopenharmony_ci M->m[0][2] * M->m[1][0] * M->m[3][3] - 2404bf215546Sopenharmony_ci M->m[0][2] * M->m[3][0] * M->m[1][3] - 2405bf215546Sopenharmony_ci M->m[0][3] * M->m[1][0] * M->m[3][2] + 2406bf215546Sopenharmony_ci M->m[0][3] * M->m[3][0] * M->m[1][2]; 2407bf215546Sopenharmony_ci 2408bf215546Sopenharmony_ci D->m[1][3] = 2409bf215546Sopenharmony_ci M->m[0][0] * M->m[1][2] * M->m[2][3] - 2410bf215546Sopenharmony_ci M->m[0][0] * M->m[2][2] * M->m[1][3] - 2411bf215546Sopenharmony_ci M->m[0][2] * M->m[1][0] * M->m[2][3] + 2412bf215546Sopenharmony_ci M->m[0][2] * M->m[2][0] * M->m[1][3] + 2413bf215546Sopenharmony_ci M->m[0][3] * M->m[1][0] * M->m[2][2] - 2414bf215546Sopenharmony_ci M->m[0][3] * M->m[2][0] * M->m[1][2]; 2415bf215546Sopenharmony_ci 2416bf215546Sopenharmony_ci D->m[2][0] = 2417bf215546Sopenharmony_ci M->m[1][0] * M->m[2][1] * M->m[3][3] - 2418bf215546Sopenharmony_ci M->m[1][0] * M->m[3][1] * M->m[2][3] - 2419bf215546Sopenharmony_ci M->m[1][1] * M->m[2][0] * M->m[3][3] + 2420bf215546Sopenharmony_ci M->m[1][1] * M->m[3][0] * M->m[2][3] + 2421bf215546Sopenharmony_ci M->m[1][3] * M->m[2][0] * M->m[3][1] - 2422bf215546Sopenharmony_ci M->m[1][3] * M->m[3][0] * M->m[2][1]; 2423bf215546Sopenharmony_ci 2424bf215546Sopenharmony_ci D->m[2][1] = 2425bf215546Sopenharmony_ci -M->m[0][0] * M->m[2][1] * M->m[3][3] + 2426bf215546Sopenharmony_ci M->m[0][0] * M->m[3][1] * M->m[2][3] + 2427bf215546Sopenharmony_ci M->m[0][1] * M->m[2][0] * M->m[3][3] - 2428bf215546Sopenharmony_ci M->m[0][1] * M->m[3][0] * M->m[2][3] - 2429bf215546Sopenharmony_ci M->m[0][3] * M->m[2][0] * M->m[3][1] + 2430bf215546Sopenharmony_ci M->m[0][3] * M->m[3][0] * M->m[2][1]; 2431bf215546Sopenharmony_ci 2432bf215546Sopenharmony_ci D->m[2][2] = 2433bf215546Sopenharmony_ci M->m[0][0] * M->m[1][1] * M->m[3][3] - 2434bf215546Sopenharmony_ci M->m[0][0] * M->m[3][1] * M->m[1][3] - 2435bf215546Sopenharmony_ci M->m[0][1] * M->m[1][0] * M->m[3][3] + 2436bf215546Sopenharmony_ci M->m[0][1] * M->m[3][0] * M->m[1][3] + 2437bf215546Sopenharmony_ci M->m[0][3] * M->m[1][0] * M->m[3][1] - 2438bf215546Sopenharmony_ci M->m[0][3] * M->m[3][0] * M->m[1][1]; 2439bf215546Sopenharmony_ci 2440bf215546Sopenharmony_ci D->m[2][3] = 2441bf215546Sopenharmony_ci -M->m[0][0] * M->m[1][1] * M->m[2][3] + 2442bf215546Sopenharmony_ci M->m[0][0] * M->m[2][1] * M->m[1][3] + 2443bf215546Sopenharmony_ci M->m[0][1] * M->m[1][0] * M->m[2][3] - 2444bf215546Sopenharmony_ci M->m[0][1] * M->m[2][0] * M->m[1][3] - 2445bf215546Sopenharmony_ci M->m[0][3] * M->m[1][0] * M->m[2][1] + 2446bf215546Sopenharmony_ci M->m[0][3] * M->m[2][0] * M->m[1][1]; 2447bf215546Sopenharmony_ci 2448bf215546Sopenharmony_ci D->m[3][0] = 2449bf215546Sopenharmony_ci -M->m[1][0] * M->m[2][1] * M->m[3][2] + 2450bf215546Sopenharmony_ci M->m[1][0] * M->m[3][1] * M->m[2][2] + 2451bf215546Sopenharmony_ci M->m[1][1] * M->m[2][0] * M->m[3][2] - 2452bf215546Sopenharmony_ci M->m[1][1] * M->m[3][0] * M->m[2][2] - 2453bf215546Sopenharmony_ci M->m[1][2] * M->m[2][0] * M->m[3][1] + 2454bf215546Sopenharmony_ci M->m[1][2] * M->m[3][0] * M->m[2][1]; 2455bf215546Sopenharmony_ci 2456bf215546Sopenharmony_ci D->m[3][1] = 2457bf215546Sopenharmony_ci M->m[0][0] * M->m[2][1] * M->m[3][2] - 2458bf215546Sopenharmony_ci M->m[0][0] * M->m[3][1] * M->m[2][2] - 2459bf215546Sopenharmony_ci M->m[0][1] * M->m[2][0] * M->m[3][2] + 2460bf215546Sopenharmony_ci M->m[0][1] * M->m[3][0] * M->m[2][2] + 2461bf215546Sopenharmony_ci M->m[0][2] * M->m[2][0] * M->m[3][1] - 2462bf215546Sopenharmony_ci M->m[0][2] * M->m[3][0] * M->m[2][1]; 2463bf215546Sopenharmony_ci 2464bf215546Sopenharmony_ci D->m[3][2] = 2465bf215546Sopenharmony_ci -M->m[0][0] * M->m[1][1] * M->m[3][2] + 2466bf215546Sopenharmony_ci M->m[0][0] * M->m[3][1] * M->m[1][2] + 2467bf215546Sopenharmony_ci M->m[0][1] * M->m[1][0] * M->m[3][2] - 2468bf215546Sopenharmony_ci M->m[0][1] * M->m[3][0] * M->m[1][2] - 2469bf215546Sopenharmony_ci M->m[0][2] * M->m[1][0] * M->m[3][1] + 2470bf215546Sopenharmony_ci M->m[0][2] * M->m[3][0] * M->m[1][1]; 2471bf215546Sopenharmony_ci 2472bf215546Sopenharmony_ci D->m[3][3] = 2473bf215546Sopenharmony_ci M->m[0][0] * M->m[1][1] * M->m[2][2] - 2474bf215546Sopenharmony_ci M->m[0][0] * M->m[2][1] * M->m[1][2] - 2475bf215546Sopenharmony_ci M->m[0][1] * M->m[1][0] * M->m[2][2] + 2476bf215546Sopenharmony_ci M->m[0][1] * M->m[2][0] * M->m[1][2] + 2477bf215546Sopenharmony_ci M->m[0][2] * M->m[1][0] * M->m[2][1] - 2478bf215546Sopenharmony_ci M->m[0][2] * M->m[2][0] * M->m[1][1]; 2479bf215546Sopenharmony_ci 2480bf215546Sopenharmony_ci det = 2481bf215546Sopenharmony_ci M->m[0][0] * D->m[0][0] + 2482bf215546Sopenharmony_ci M->m[1][0] * D->m[0][1] + 2483bf215546Sopenharmony_ci M->m[2][0] * D->m[0][2] + 2484bf215546Sopenharmony_ci M->m[3][0] * D->m[0][3]; 2485bf215546Sopenharmony_ci 2486bf215546Sopenharmony_ci if (fabsf(det) < 1e-30) {/* non inversible */ 2487bf215546Sopenharmony_ci *D = *M; /* wine tests */ 2488bf215546Sopenharmony_ci return; 2489bf215546Sopenharmony_ci } 2490bf215546Sopenharmony_ci 2491bf215546Sopenharmony_ci det = 1.0 / det; 2492bf215546Sopenharmony_ci 2493bf215546Sopenharmony_ci for (i = 0; i < 4; i++) 2494bf215546Sopenharmony_ci for (k = 0; k < 4; k++) 2495bf215546Sopenharmony_ci D->m[i][k] *= det; 2496bf215546Sopenharmony_ci 2497bf215546Sopenharmony_ci#if defined(DEBUG) || !defined(NDEBUG) 2498bf215546Sopenharmony_ci { 2499bf215546Sopenharmony_ci D3DMATRIX I; 2500bf215546Sopenharmony_ci 2501bf215546Sopenharmony_ci nine_d3d_matrix_matrix_mul(&I, D, M); 2502bf215546Sopenharmony_ci 2503bf215546Sopenharmony_ci for (i = 0; i < 4; ++i) 2504bf215546Sopenharmony_ci for (k = 0; k < 4; ++k) 2505bf215546Sopenharmony_ci if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3) 2506bf215546Sopenharmony_ci DBG("Matrix inversion check FAILED !\n"); 2507bf215546Sopenharmony_ci } 2508bf215546Sopenharmony_ci#endif 2509bf215546Sopenharmony_ci} 2510