1 /*
2 * Copyright (C) 2009 Nicolai Haehnle.
3 * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4 *
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining
8 * a copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sublicense, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial
17 * portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 */
28
29 #include "util/u_math.h"
30
31 #include "radeon_dataflow.h"
32
33 #include "radeon_compiler.h"
34 #include "radeon_compiler_util.h"
35 #include "radeon_list.h"
36 #include "radeon_swizzle.h"
37 #include "radeon_variable.h"
38
39 struct src_clobbered_reads_cb_data {
40 rc_register_file File;
41 unsigned int Index;
42 unsigned int Mask;
43 struct rc_reader_data * ReaderData;
44 };
45
46 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
47 struct rc_instruction *,
48 unsigned int);
49
chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)50 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
51 {
52 struct rc_src_register combine;
53 combine.File = inner.File;
54 combine.Index = inner.Index;
55 combine.RelAddr = inner.RelAddr;
56 if (outer.Abs) {
57 combine.Abs = 1;
58 combine.Negate = outer.Negate;
59 } else {
60 combine.Abs = inner.Abs;
61 combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
62 combine.Negate ^= outer.Negate;
63 }
64 combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
65 return combine;
66 }
67
copy_propagate_scan_read(void * data, struct rc_instruction * inst, struct rc_src_register * src)68 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
69 struct rc_src_register * src)
70 {
71 rc_register_file file = src->File;
72 struct rc_reader_data * reader_data = data;
73
74 if(!rc_inst_can_use_presub(inst,
75 reader_data->Writer->U.I.PreSub.Opcode,
76 rc_swizzle_to_writemask(src->Swizzle),
77 src,
78 &reader_data->Writer->U.I.PreSub.SrcReg[0],
79 &reader_data->Writer->U.I.PreSub.SrcReg[1])) {
80 reader_data->Abort = 1;
81 return;
82 }
83
84 /* XXX This could probably be handled better. */
85 if (file == RC_FILE_ADDRESS) {
86 reader_data->Abort = 1;
87 return;
88 }
89
90 /* These instructions cannot read from the constants file.
91 * see radeonTransformTEX()
92 */
93 if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
94 reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
95 (inst->U.I.Opcode == RC_OPCODE_TEX ||
96 inst->U.I.Opcode == RC_OPCODE_TXB ||
97 inst->U.I.Opcode == RC_OPCODE_TXP ||
98 inst->U.I.Opcode == RC_OPCODE_TXD ||
99 inst->U.I.Opcode == RC_OPCODE_TXL ||
100 inst->U.I.Opcode == RC_OPCODE_KIL)){
101 reader_data->Abort = 1;
102 return;
103 }
104 }
105
src_clobbered_reads_cb( void * data, struct rc_instruction * inst, struct rc_src_register * src)106 static void src_clobbered_reads_cb(
107 void * data,
108 struct rc_instruction * inst,
109 struct rc_src_register * src)
110 {
111 struct src_clobbered_reads_cb_data * sc_data = data;
112
113 if (src->File == sc_data->File
114 && src->Index == sc_data->Index
115 && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
116
117 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
118 }
119
120 if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
121 sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
122 }
123 }
124
is_src_clobbered_scan_write( void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask)125 static void is_src_clobbered_scan_write(
126 void * data,
127 struct rc_instruction * inst,
128 rc_register_file file,
129 unsigned int index,
130 unsigned int mask)
131 {
132 struct src_clobbered_reads_cb_data sc_data;
133 struct rc_reader_data * reader_data = data;
134 sc_data.File = file;
135 sc_data.Index = index;
136 sc_data.Mask = mask;
137 sc_data.ReaderData = reader_data;
138 rc_for_all_reads_src(reader_data->Writer,
139 src_clobbered_reads_cb, &sc_data);
140 }
141
copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)142 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
143 {
144 struct rc_reader_data reader_data;
145 unsigned int i;
146
147 if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
148 inst_mov->U.I.WriteALUResult)
149 return;
150
151 /* Get a list of all the readers of this MOV instruction. */
152 reader_data.ExitOnAbort = 1;
153 rc_get_readers(c, inst_mov, &reader_data,
154 copy_propagate_scan_read, NULL,
155 is_src_clobbered_scan_write);
156
157 if (reader_data.Abort || reader_data.ReaderCount == 0 || reader_data.ReadersAfterEndloop)
158 return;
159
160 /* We can propagate SaturateMode if all the readers are MOV instructions
161 * without a presubtract operation, source negation and absolute.
162 * In that case, we just move SaturateMode to all readers. */
163 if (inst_mov->U.I.SaturateMode) {
164 for (i = 0; i < reader_data.ReaderCount; i++) {
165 struct rc_instruction * inst = reader_data.Readers[i].Inst;
166
167 if (inst->U.I.Opcode != RC_OPCODE_MOV ||
168 inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
169 inst->U.I.SrcReg[0].Abs ||
170 inst->U.I.SrcReg[0].Negate) {
171 return;
172 }
173 }
174 }
175
176 /* Propagate the MOV instruction. */
177 for (i = 0; i < reader_data.ReaderCount; i++) {
178 struct rc_instruction * inst = reader_data.Readers[i].Inst;
179 *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
180
181 if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
182 inst->U.I.PreSub = inst_mov->U.I.PreSub;
183 if (!inst->U.I.SaturateMode)
184 inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
185 }
186
187 /* Finally, remove the original MOV instruction */
188 rc_remove_instruction(inst_mov);
189 }
190
191 /**
192 * Check if a source register is actually always the same
193 * swizzle constant.
194 */
is_src_uniform_constant(struct rc_src_register src, rc_swizzle * pswz, unsigned int * pnegate)195 static int is_src_uniform_constant(struct rc_src_register src,
196 rc_swizzle * pswz, unsigned int * pnegate)
197 {
198 int have_used = 0;
199
200 if (src.File != RC_FILE_NONE) {
201 *pswz = 0;
202 return 0;
203 }
204
205 for(unsigned int chan = 0; chan < 4; ++chan) {
206 unsigned int swz = GET_SWZ(src.Swizzle, chan);
207 if (swz < 4) {
208 *pswz = 0;
209 return 0;
210 }
211 if (swz == RC_SWIZZLE_UNUSED)
212 continue;
213
214 if (!have_used) {
215 *pswz = swz;
216 *pnegate = GET_BIT(src.Negate, chan);
217 have_used = 1;
218 } else {
219 if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
220 *pswz = 0;
221 return 0;
222 }
223 }
224 }
225
226 return 1;
227 }
228
constant_folding_mad(struct rc_instruction * inst)229 static void constant_folding_mad(struct rc_instruction * inst)
230 {
231 rc_swizzle swz = 0;
232 unsigned int negate= 0;
233
234 if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
235 if (swz == RC_SWIZZLE_ZERO) {
236 inst->U.I.Opcode = RC_OPCODE_MUL;
237 return;
238 }
239 }
240
241 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
242 if (swz == RC_SWIZZLE_ONE) {
243 inst->U.I.Opcode = RC_OPCODE_ADD;
244 if (negate)
245 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
246 inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
247 return;
248 } else if (swz == RC_SWIZZLE_ZERO) {
249 inst->U.I.Opcode = RC_OPCODE_MOV;
250 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
251 return;
252 }
253 }
254
255 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
256 if (swz == RC_SWIZZLE_ONE) {
257 inst->U.I.Opcode = RC_OPCODE_ADD;
258 if (negate)
259 inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
260 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
261 return;
262 } else if (swz == RC_SWIZZLE_ZERO) {
263 inst->U.I.Opcode = RC_OPCODE_MOV;
264 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
265 return;
266 }
267 }
268 }
269
constant_folding_mul(struct rc_instruction * inst)270 static void constant_folding_mul(struct rc_instruction * inst)
271 {
272 rc_swizzle swz = 0;
273 unsigned int negate = 0;
274
275 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
276 if (swz == RC_SWIZZLE_ONE) {
277 inst->U.I.Opcode = RC_OPCODE_MOV;
278 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
279 if (negate)
280 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
281 return;
282 } else if (swz == RC_SWIZZLE_ZERO) {
283 inst->U.I.Opcode = RC_OPCODE_MOV;
284 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
285 return;
286 }
287 }
288
289 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
290 if (swz == RC_SWIZZLE_ONE) {
291 inst->U.I.Opcode = RC_OPCODE_MOV;
292 if (negate)
293 inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
294 return;
295 } else if (swz == RC_SWIZZLE_ZERO) {
296 inst->U.I.Opcode = RC_OPCODE_MOV;
297 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
298 return;
299 }
300 }
301 }
302
constant_folding_add(struct rc_instruction * inst)303 static void constant_folding_add(struct rc_instruction * inst)
304 {
305 rc_swizzle swz = 0;
306 unsigned int negate = 0;
307
308 if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
309 if (swz == RC_SWIZZLE_ZERO) {
310 inst->U.I.Opcode = RC_OPCODE_MOV;
311 inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
312 return;
313 }
314 }
315
316 if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
317 if (swz == RC_SWIZZLE_ZERO) {
318 inst->U.I.Opcode = RC_OPCODE_MOV;
319 return;
320 }
321 }
322 }
323
324 /**
325 * Replace 0.0, 1.0 and 0.5 immediate constants by their
326 * respective swizzles. Simplify instructions like ADD dst, src, 0;
327 */
constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)328 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
329 {
330 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
331 unsigned int i;
332
333 /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
334 for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
335 struct rc_constant * constant;
336 struct rc_src_register newsrc;
337 int have_real_reference;
338 unsigned int chan;
339
340 /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
341 for (chan = 0; chan < 4; ++chan)
342 if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
343 break;
344 if (chan == 4) {
345 inst->U.I.SrcReg[src].File = RC_FILE_NONE;
346 continue;
347 }
348
349 /* Convert immediates to swizzles. */
350 if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
351 inst->U.I.SrcReg[src].RelAddr ||
352 inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
353 continue;
354
355 constant =
356 &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
357
358 if (constant->Type != RC_CONSTANT_IMMEDIATE)
359 continue;
360
361 newsrc = inst->U.I.SrcReg[src];
362 have_real_reference = 0;
363 for (chan = 0; chan < 4; ++chan) {
364 unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
365 unsigned int newswz;
366 float imm;
367 float baseimm;
368
369 if (swz >= 4)
370 continue;
371
372 imm = constant->u.Immediate[swz];
373 baseimm = imm;
374 if (imm < 0.0)
375 baseimm = -baseimm;
376
377 if (baseimm == 0.0) {
378 newswz = RC_SWIZZLE_ZERO;
379 } else if (baseimm == 1.0) {
380 newswz = RC_SWIZZLE_ONE;
381 } else if (baseimm == 0.5 && c->has_half_swizzles) {
382 newswz = RC_SWIZZLE_HALF;
383 } else {
384 have_real_reference = 1;
385 continue;
386 }
387
388 SET_SWZ(newsrc.Swizzle, chan, newswz);
389 if (imm < 0.0 && !newsrc.Abs)
390 newsrc.Negate ^= 1 << chan;
391 }
392
393 if (!have_real_reference) {
394 newsrc.File = RC_FILE_NONE;
395 newsrc.Index = 0;
396 }
397
398 /* don't make the swizzle worse */
399 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc))
400 continue;
401
402 inst->U.I.SrcReg[src] = newsrc;
403 }
404
405 /* Simplify instructions based on constants */
406 if (inst->U.I.Opcode == RC_OPCODE_MAD)
407 constant_folding_mad(inst);
408
409 /* note: MAD can simplify to MUL or ADD */
410 if (inst->U.I.Opcode == RC_OPCODE_MUL)
411 constant_folding_mul(inst);
412 else if (inst->U.I.Opcode == RC_OPCODE_ADD)
413 constant_folding_add(inst);
414
415 /* In case this instruction has been converted, make sure all of the
416 * registers that are no longer used are empty. */
417 opcode = rc_get_opcode_info(inst->U.I.Opcode);
418 for(i = opcode->NumSrcRegs; i < 3; i++) {
419 memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
420 }
421 }
422
423 /**
424 * If src and dst use the same register, this function returns a writemask that
425 * indicates which components are read by src. Otherwise zero is returned.
426 */
src_reads_dst_mask(struct rc_src_register src, struct rc_dst_register dst)427 static unsigned int src_reads_dst_mask(struct rc_src_register src,
428 struct rc_dst_register dst)
429 {
430 if (dst.File != src.File || dst.Index != src.Index) {
431 return 0;
432 }
433 return rc_swizzle_to_writemask(src.Swizzle);
434 }
435
436 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
437 * in any of its channels. Return 0 otherwise. */
src_has_const_swz(struct rc_src_register src)438 static int src_has_const_swz(struct rc_src_register src) {
439 int chan;
440 for(chan = 0; chan < 4; chan++) {
441 unsigned int swz = GET_SWZ(src.Swizzle, chan);
442 if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
443 || swz == RC_SWIZZLE_ONE) {
444 return 1;
445 }
446 }
447 return 0;
448 }
449
presub_scan_read( void * data, struct rc_instruction * inst, struct rc_src_register * src)450 static void presub_scan_read(
451 void * data,
452 struct rc_instruction * inst,
453 struct rc_src_register * src)
454 {
455 struct rc_reader_data * reader_data = data;
456 rc_presubtract_op * presub_opcode = reader_data->CbData;
457
458 if (!rc_inst_can_use_presub(inst, *presub_opcode,
459 reader_data->Writer->U.I.DstReg.WriteMask,
460 src,
461 &reader_data->Writer->U.I.SrcReg[0],
462 &reader_data->Writer->U.I.SrcReg[1])) {
463 reader_data->Abort = 1;
464 return;
465 }
466 }
467
presub_helper( struct radeon_compiler * c, struct rc_instruction * inst_add, rc_presubtract_op presub_opcode, rc_presub_replace_fn presub_replace)468 static int presub_helper(
469 struct radeon_compiler * c,
470 struct rc_instruction * inst_add,
471 rc_presubtract_op presub_opcode,
472 rc_presub_replace_fn presub_replace)
473 {
474 struct rc_reader_data reader_data;
475 unsigned int i;
476 rc_presubtract_op cb_op = presub_opcode;
477
478 reader_data.CbData = &cb_op;
479 reader_data.ExitOnAbort = 1;
480 rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
481 is_src_clobbered_scan_write);
482
483 if (reader_data.Abort || reader_data.ReaderCount == 0)
484 return 0;
485
486 for(i = 0; i < reader_data.ReaderCount; i++) {
487 unsigned int src_index;
488 struct rc_reader reader = reader_data.Readers[i];
489 const struct rc_opcode_info * info =
490 rc_get_opcode_info(reader.Inst->U.I.Opcode);
491
492 for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
493 if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
494 presub_replace(inst_add, reader.Inst, src_index);
495 }
496 }
497 return 1;
498 }
499
presub_replace_add( struct rc_instruction * inst_add, struct rc_instruction * inst_reader, unsigned int src_index)500 static void presub_replace_add(
501 struct rc_instruction * inst_add,
502 struct rc_instruction * inst_reader,
503 unsigned int src_index)
504 {
505 rc_presubtract_op presub_opcode;
506
507 /* This function assumes that inst_add->U.I.SrcReg[0] and
508 * inst_add->U.I.SrcReg[1] aren't both negative.
509 */
510 assert(!(inst_add->U.I.SrcReg[1].Negate && inst_add->U.I.SrcReg[0].Negate));
511
512 if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
513 presub_opcode = RC_PRESUB_SUB;
514 else
515 presub_opcode = RC_PRESUB_ADD;
516
517 if (inst_add->U.I.SrcReg[1].Negate) {
518 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
519 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
520 } else {
521 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
522 inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
523 }
524 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
525 inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
526 inst_reader->U.I.PreSub.Opcode = presub_opcode;
527 inst_reader->U.I.SrcReg[src_index] =
528 chain_srcregs(inst_reader->U.I.SrcReg[src_index],
529 inst_reader->U.I.PreSub.SrcReg[0]);
530 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
531 inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
532 }
533
is_presub_candidate( struct radeon_compiler * c, struct rc_instruction * inst)534 static int is_presub_candidate(
535 struct radeon_compiler * c,
536 struct rc_instruction * inst)
537 {
538 const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
539 unsigned int i;
540 unsigned int is_constant[2] = {0, 0};
541
542 assert(inst->U.I.Opcode == RC_OPCODE_ADD);
543
544 if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
545 || inst->U.I.SaturateMode
546 || inst->U.I.WriteALUResult
547 || inst->U.I.Omod) {
548 return 0;
549 }
550
551 /* If both sources use a constant swizzle, then we can't convert it to
552 * a presubtract operation. In fact for the ADD and SUB presubtract
553 * operations neither source can contain a constant swizzle. This
554 * specific case is checked in peephole_add_presub_add() when
555 * we make sure the swizzles for both sources are equal, so we
556 * don't need to worry about it here. */
557 for (i = 0; i < 2; i++) {
558 int chan;
559 for (chan = 0; chan < 4; chan++) {
560 rc_swizzle swz =
561 get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
562 if (swz == RC_SWIZZLE_ONE
563 || swz == RC_SWIZZLE_ZERO
564 || swz == RC_SWIZZLE_HALF) {
565 is_constant[i] = 1;
566 }
567 }
568 }
569 if (is_constant[0] && is_constant[1])
570 return 0;
571
572 for(i = 0; i < info->NumSrcRegs; i++) {
573 struct rc_src_register src = inst->U.I.SrcReg[i];
574 if (src_reads_dst_mask(src, inst->U.I.DstReg))
575 return 0;
576
577 src.File = RC_FILE_PRESUB;
578 if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
579 return 0;
580 }
581 return 1;
582 }
583
peephole_add_presub_add( struct radeon_compiler * c, struct rc_instruction * inst_add)584 static int peephole_add_presub_add(
585 struct radeon_compiler * c,
586 struct rc_instruction * inst_add)
587 {
588 unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
589 unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
590 unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
591
592 if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
593 return 0;
594
595 /* src0 and src1 can't have absolute values */
596 if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
597 return 0;
598
599 /* presub_replace_add() assumes only one is negative */
600 if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
601 return 0;
602
603 /* if src0 is negative, at least all bits of dstmask have to be set */
604 if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
605 return 0;
606
607 /* if src1 is negative, at least all bits of dstmask have to be set */
608 if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
609 return 0;
610
611 if (!is_presub_candidate(c, inst_add))
612 return 0;
613
614 if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
615 rc_remove_instruction(inst_add);
616 return 1;
617 }
618 return 0;
619 }
620
presub_replace_inv( struct rc_instruction * inst_add, struct rc_instruction * inst_reader, unsigned int src_index)621 static void presub_replace_inv(
622 struct rc_instruction * inst_add,
623 struct rc_instruction * inst_reader,
624 unsigned int src_index)
625 {
626 /* We must be careful not to modify inst_add, since it
627 * is possible it will remain part of the program.*/
628 inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
629 inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
630 inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
631 inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
632 inst_reader->U.I.PreSub.SrcReg[0]);
633
634 inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
635 inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
636 }
637
638 /**
639 * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
640 * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
641 * of the add instruction must have the constatnt 1 swizzle. This function
642 * does not check const registers to see if their value is 1.0, so it should
643 * be called after the constant_folding optimization.
644 * @return
645 * 0 if the ADD instruction is still part of the program.
646 * 1 if the ADD instruction is no longer part of the program.
647 */
peephole_add_presub_inv( struct radeon_compiler * c, struct rc_instruction * inst_add)648 static int peephole_add_presub_inv(
649 struct radeon_compiler * c,
650 struct rc_instruction * inst_add)
651 {
652 unsigned int i, swz;
653
654 if (!is_presub_candidate(c, inst_add))
655 return 0;
656
657 /* Check if src0 is 1. */
658 /* XXX It would be nice to use is_src_uniform_constant here, but that
659 * function only works if the register's file is RC_FILE_NONE */
660 for(i = 0; i < 4; i++ ) {
661 if (!(inst_add->U.I.DstReg.WriteMask & (1 << i)))
662 continue;
663
664 swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
665 if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i))
666 return 0;
667 }
668
669 /* Check src1. */
670 if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
671 inst_add->U.I.DstReg.WriteMask
672 || inst_add->U.I.SrcReg[1].Abs
673 || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
674 && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
675 || src_has_const_swz(inst_add->U.I.SrcReg[1])) {
676
677 return 0;
678 }
679
680 if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
681 rc_remove_instruction(inst_add);
682 return 1;
683 }
684 return 0;
685 }
686
687 struct peephole_mul_cb_data {
688 struct rc_dst_register * Writer;
689 unsigned int Clobbered;
690 };
691
omod_filter_reader_cb( void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask)692 static void omod_filter_reader_cb(
693 void * userdata,
694 struct rc_instruction * inst,
695 rc_register_file file,
696 unsigned int index,
697 unsigned int mask)
698 {
699 struct peephole_mul_cb_data * d = userdata;
700 if (rc_src_reads_dst_mask(file, mask, index,
701 d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
702
703 d->Clobbered = 1;
704 }
705 }
706
omod_filter_writer_cb( void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask)707 static void omod_filter_writer_cb(
708 void * userdata,
709 struct rc_instruction * inst,
710 rc_register_file file,
711 unsigned int index,
712 unsigned int mask)
713 {
714 struct peephole_mul_cb_data * d = userdata;
715 if (file == d->Writer->File && index == d->Writer->Index &&
716 (mask & d->Writer->WriteMask)) {
717 d->Clobbered = 1;
718 }
719 }
720
peephole_mul_omod( struct radeon_compiler * c, struct rc_instruction * inst_mul, struct rc_list * var_list)721 static int peephole_mul_omod(
722 struct radeon_compiler * c,
723 struct rc_instruction * inst_mul,
724 struct rc_list * var_list)
725 {
726 unsigned int chan = 0, swz, i;
727 int const_index = -1;
728 int temp_index = -1;
729 float const_value;
730 rc_omod_op omod_op = RC_OMOD_DISABLE;
731 struct rc_list * writer_list;
732 struct rc_variable * var;
733 struct peephole_mul_cb_data cb_data;
734 unsigned writemask_sum;
735
736 for (i = 0; i < 2; i++) {
737 unsigned int j;
738 if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
739 && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
740 return 0;
741 }
742 if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
743 if (temp_index != -1) {
744 /* The instruction has two temp sources */
745 return 0;
746 } else {
747 temp_index = i;
748 continue;
749 }
750 }
751 /* If we get this far Src[i] must be a constant src */
752 if (inst_mul->U.I.SrcReg[i].Negate) {
753 return 0;
754 }
755 /* The constant src needs to read from the same swizzle */
756 swz = RC_SWIZZLE_UNUSED;
757 chan = 0;
758 for (j = 0; j < 4; j++) {
759 unsigned int j_swz =
760 GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
761 if (j_swz == RC_SWIZZLE_UNUSED) {
762 continue;
763 }
764 if (swz == RC_SWIZZLE_UNUSED) {
765 swz = j_swz;
766 chan = j;
767 } else if (j_swz != swz) {
768 return 0;
769 }
770 }
771
772 if (const_index != -1) {
773 /* The instruction has two constant sources */
774 return 0;
775 } else {
776 const_index = i;
777 }
778 }
779
780 if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
781 inst_mul->U.I.SrcReg[const_index].Index)) {
782 return 0;
783 }
784 const_value = rc_get_constant_value(c,
785 inst_mul->U.I.SrcReg[const_index].Index,
786 inst_mul->U.I.SrcReg[const_index].Swizzle,
787 inst_mul->U.I.SrcReg[const_index].Negate,
788 chan);
789
790 if (const_value == 2.0f) {
791 omod_op = RC_OMOD_MUL_2;
792 } else if (const_value == 4.0f) {
793 omod_op = RC_OMOD_MUL_4;
794 } else if (const_value == 8.0f) {
795 omod_op = RC_OMOD_MUL_8;
796 } else if (const_value == (1.0f / 2.0f)) {
797 omod_op = RC_OMOD_DIV_2;
798 } else if (const_value == (1.0f / 4.0f)) {
799 omod_op = RC_OMOD_DIV_4;
800 } else if (const_value == (1.0f / 8.0f)) {
801 omod_op = RC_OMOD_DIV_8;
802 } else {
803 return 0;
804 }
805
806 writer_list = rc_variable_list_get_writers_one_reader(var_list,
807 RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
808
809 if (!writer_list) {
810 return 0;
811 }
812
813 cb_data.Clobbered = 0;
814 cb_data.Writer = &inst_mul->U.I.DstReg;
815 for (var = writer_list->Item; var; var = var->Friend) {
816 struct rc_instruction * inst;
817 const struct rc_opcode_info * info = rc_get_opcode_info(
818 var->Inst->U.I.Opcode);
819 if (info->HasTexture) {
820 return 0;
821 }
822 if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
823 return 0;
824 }
825 for (inst = inst_mul->Prev; inst != var->Inst;
826 inst = inst->Prev) {
827 rc_for_all_reads_mask(inst, omod_filter_reader_cb,
828 &cb_data);
829 rc_for_all_writes_mask(inst, omod_filter_writer_cb,
830 &cb_data);
831 if (cb_data.Clobbered) {
832 break;
833 }
834 }
835 }
836
837 if (cb_data.Clobbered) {
838 return 0;
839 }
840
841 writemask_sum = rc_variable_writemask_sum(writer_list->Item);
842
843 /* rc_normal_rewrite_writemask can't expand a previous writemask to store
844 * more channels replicated.
845 */
846 if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask))
847 return 0;
848
849 /* Rewrite the instructions */
850 for (var = writer_list->Item; var; var = var->Friend) {
851 struct rc_variable * writer = var;
852 unsigned conversion_swizzle = rc_make_conversion_swizzle(
853 writemask_sum,
854 inst_mul->U.I.DstReg.WriteMask);
855 writer->Inst->U.I.Omod = omod_op;
856 writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
857 writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
858 rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
859 writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
860 }
861
862 rc_remove_instruction(inst_mul);
863
864 return 1;
865 }
866
867 /**
868 * @return
869 * 0 if inst is still part of the program.
870 * 1 if inst is no longer part of the program.
871 */
peephole(struct radeon_compiler * c, struct rc_instruction * inst)872 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
873 {
874 switch(inst->U.I.Opcode){
875 case RC_OPCODE_ADD:
876 if (c->has_presub) {
877 if(peephole_add_presub_inv(c, inst))
878 return 1;
879 if(peephole_add_presub_add(c, inst))
880 return 1;
881 }
882 break;
883 default:
884 break;
885 }
886 return 0;
887 }
888
merge_swizzles(unsigned int swz1, unsigned int swz2)889 static unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) {
890 unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
891 for (unsigned int chan = 0; chan < 4; chan++) {
892 unsigned int swz = GET_SWZ(swz1, chan);
893 if (swz != RC_SWIZZLE_UNUSED) {
894 SET_SWZ(new_swz, chan, swz);
895 continue;
896 }
897 swz = GET_SWZ(swz2, chan);
898 SET_SWZ(new_swz, chan, swz);
899 }
900 return new_swz;
901 }
902
merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)903 static int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)
904 {
905 unsigned int orig_dst_reg = inst->U.I.DstReg.Index;
906 unsigned int orig_dst_file = inst->U.I.DstReg.File;
907 unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask;
908 unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index;
909 unsigned int orig_src_file = inst->U.I.SrcReg[0].File;
910
911 struct rc_instruction * cur = inst;
912 while (cur!= &c->Program.Instructions) {
913 cur = cur->Next;
914 const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode);
915
916 /* Keep it simple for now and stop when encountering any
917 * control flow.
918 */
919 if (opcode->IsFlowControl)
920 return 0;
921
922 /* Stop when the original destination is overwritten */
923 if (orig_dst_reg == cur->U.I.DstReg.Index &&
924 orig_dst_file == cur->U.I.DstReg.File &&
925 (orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0)
926 return 0;
927
928 /* Stop the search when the original instruction destination
929 * is used as a source for anything.
930 */
931 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
932 if (cur->U.I.SrcReg[i].File == orig_dst_file &&
933 cur->U.I.SrcReg[i].Index == orig_dst_reg)
934 return 0;
935 }
936
937 if (cur->U.I.Opcode == RC_OPCODE_MOV &&
938 cur->U.I.DstReg.File == orig_dst_file &&
939 cur->U.I.DstReg.Index == orig_dst_reg &&
940 (cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) {
941
942 /* We can merge the movs if one of them is from inline constant */
943 if (cur->U.I.SrcReg[0].File == RC_FILE_NONE ||
944 orig_src_file == RC_FILE_NONE) {
945 cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
946
947 if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) {
948 cur->U.I.SrcReg[0].File = orig_src_file;
949 cur->U.I.SrcReg[0].Index = orig_src_reg;
950 cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs;
951 cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr;
952 }
953 cur->U.I.SrcReg[0].Swizzle =
954 merge_swizzles(cur->U.I.SrcReg[0].Swizzle,
955 inst->U.I.SrcReg[0].Swizzle);
956
957 cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate;
958
959 /* finally delete the original mov */
960 rc_remove_instruction(inst);
961
962 return 1;
963 }
964 }
965 }
966 return 0;
967 }
968
rc_optimize(struct radeon_compiler * c, void *user)969 void rc_optimize(struct radeon_compiler * c, void *user)
970 {
971 struct rc_instruction * inst = c->Program.Instructions.Next;
972 while(inst != &c->Program.Instructions) {
973 struct rc_instruction * cur = inst;
974 inst = inst->Next;
975
976 constant_folding(c, cur);
977
978 if(peephole(c, cur))
979 continue;
980
981 if (cur->U.I.Opcode == RC_OPCODE_MOV) {
982 if (c->is_r500) {
983 if (merge_movs(c, cur))
984 continue;
985 }
986 copy_propagate(c, cur);
987 /* cur may no longer be part of the program */
988 }
989 }
990
991 if (!c->has_omod) {
992 return;
993 }
994
995 inst = c->Program.Instructions.Next;
996 struct rc_list * var_list = NULL;
997 while(inst != &c->Program.Instructions) {
998 struct rc_instruction * cur = inst;
999 inst = inst->Next;
1000 if (cur->U.I.Opcode == RC_OPCODE_MUL) {
1001 if (!var_list)
1002 var_list = rc_get_variables(c);
1003 if (peephole_mul_omod(c, cur, var_list))
1004 var_list = NULL;
1005 }
1006 }
1007 }
1008