1 /*
2  * Copyright (C) 2009 Nicolai Haehnle.
3  * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4  *
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining
8  * a copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sublicense, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial
17  * portions of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22  * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23  * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24  * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  */
28 
29 #include "util/u_math.h"
30 
31 #include "radeon_dataflow.h"
32 
33 #include "radeon_compiler.h"
34 #include "radeon_compiler_util.h"
35 #include "radeon_list.h"
36 #include "radeon_swizzle.h"
37 #include "radeon_variable.h"
38 
39 struct src_clobbered_reads_cb_data {
40 	rc_register_file File;
41 	unsigned int Index;
42 	unsigned int Mask;
43 	struct rc_reader_data * ReaderData;
44 };
45 
46 typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
47 						struct rc_instruction *,
48 						unsigned int);
49 
chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)50 static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
51 {
52 	struct rc_src_register combine;
53 	combine.File = inner.File;
54 	combine.Index = inner.Index;
55 	combine.RelAddr = inner.RelAddr;
56 	if (outer.Abs) {
57 		combine.Abs = 1;
58 		combine.Negate = outer.Negate;
59 	} else {
60 		combine.Abs = inner.Abs;
61 		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
62 		combine.Negate ^= outer.Negate;
63 	}
64 	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
65 	return combine;
66 }
67 
copy_propagate_scan_read(void * data, struct rc_instruction * inst, struct rc_src_register * src)68 static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
69 						struct rc_src_register * src)
70 {
71 	rc_register_file file = src->File;
72 	struct rc_reader_data * reader_data = data;
73 
74 	if(!rc_inst_can_use_presub(inst,
75 				reader_data->Writer->U.I.PreSub.Opcode,
76 				rc_swizzle_to_writemask(src->Swizzle),
77 				src,
78 				&reader_data->Writer->U.I.PreSub.SrcReg[0],
79 				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
80 		reader_data->Abort = 1;
81 		return;
82 	}
83 
84 	/* XXX This could probably be handled better. */
85 	if (file == RC_FILE_ADDRESS) {
86 		reader_data->Abort = 1;
87 		return;
88 	}
89 
90 	/* These instructions cannot read from the constants file.
91 	 * see radeonTransformTEX()
92 	 */
93 	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
94 			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
95 				(inst->U.I.Opcode == RC_OPCODE_TEX ||
96 				inst->U.I.Opcode == RC_OPCODE_TXB ||
97 				inst->U.I.Opcode == RC_OPCODE_TXP ||
98 				inst->U.I.Opcode == RC_OPCODE_TXD ||
99 				inst->U.I.Opcode == RC_OPCODE_TXL ||
100 				inst->U.I.Opcode == RC_OPCODE_KIL)){
101 		reader_data->Abort = 1;
102 		return;
103 	}
104 }
105 
src_clobbered_reads_cb( void * data, struct rc_instruction * inst, struct rc_src_register * src)106 static void src_clobbered_reads_cb(
107 	void * data,
108 	struct rc_instruction * inst,
109 	struct rc_src_register * src)
110 {
111 	struct src_clobbered_reads_cb_data * sc_data = data;
112 
113 	if (src->File == sc_data->File
114 	    && src->Index == sc_data->Index
115 	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
116 
117 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
118 	}
119 
120 	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
121 		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
122 	}
123 }
124 
is_src_clobbered_scan_write( void * data, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask)125 static void is_src_clobbered_scan_write(
126 	void * data,
127 	struct rc_instruction * inst,
128 	rc_register_file file,
129 	unsigned int index,
130 	unsigned int mask)
131 {
132 	struct src_clobbered_reads_cb_data sc_data;
133 	struct rc_reader_data * reader_data = data;
134 	sc_data.File = file;
135 	sc_data.Index = index;
136 	sc_data.Mask = mask;
137 	sc_data.ReaderData = reader_data;
138 	rc_for_all_reads_src(reader_data->Writer,
139 					src_clobbered_reads_cb, &sc_data);
140 }
141 
copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)142 static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
143 {
144 	struct rc_reader_data reader_data;
145 	unsigned int i;
146 
147 	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
148 	    inst_mov->U.I.WriteALUResult)
149 		return;
150 
151 	/* Get a list of all the readers of this MOV instruction. */
152 	reader_data.ExitOnAbort = 1;
153 	rc_get_readers(c, inst_mov, &reader_data,
154 		       copy_propagate_scan_read, NULL,
155 		       is_src_clobbered_scan_write);
156 
157 	if (reader_data.Abort || reader_data.ReaderCount == 0 || reader_data.ReadersAfterEndloop)
158 		return;
159 
160 	/* We can propagate SaturateMode if all the readers are MOV instructions
161 	 * without a presubtract operation, source negation and absolute.
162 	 * In that case, we just move SaturateMode to all readers. */
163         if (inst_mov->U.I.SaturateMode) {
164 		for (i = 0; i < reader_data.ReaderCount; i++) {
165 			struct rc_instruction * inst = reader_data.Readers[i].Inst;
166 
167 			if (inst->U.I.Opcode != RC_OPCODE_MOV ||
168 			    inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
169 			    inst->U.I.SrcReg[0].Abs ||
170 			    inst->U.I.SrcReg[0].Negate) {
171 				return;
172 			}
173 		}
174 	}
175 
176 	/* Propagate the MOV instruction. */
177 	for (i = 0; i < reader_data.ReaderCount; i++) {
178 		struct rc_instruction * inst = reader_data.Readers[i].Inst;
179 		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
180 
181 		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
182 			inst->U.I.PreSub = inst_mov->U.I.PreSub;
183 		if (!inst->U.I.SaturateMode)
184 			inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
185 	}
186 
187 	/* Finally, remove the original MOV instruction */
188 	rc_remove_instruction(inst_mov);
189 }
190 
191 /**
192  * Check if a source register is actually always the same
193  * swizzle constant.
194  */
is_src_uniform_constant(struct rc_src_register src, rc_swizzle * pswz, unsigned int * pnegate)195 static int is_src_uniform_constant(struct rc_src_register src,
196 		rc_swizzle * pswz, unsigned int * pnegate)
197 {
198 	int have_used = 0;
199 
200 	if (src.File != RC_FILE_NONE) {
201 		*pswz = 0;
202 		return 0;
203 	}
204 
205 	for(unsigned int chan = 0; chan < 4; ++chan) {
206 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
207 		if (swz < 4) {
208 			*pswz = 0;
209 			return 0;
210 		}
211 		if (swz == RC_SWIZZLE_UNUSED)
212 			continue;
213 
214 		if (!have_used) {
215 			*pswz = swz;
216 			*pnegate = GET_BIT(src.Negate, chan);
217 			have_used = 1;
218 		} else {
219 			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
220 				*pswz = 0;
221 				return 0;
222 			}
223 		}
224 	}
225 
226 	return 1;
227 }
228 
constant_folding_mad(struct rc_instruction * inst)229 static void constant_folding_mad(struct rc_instruction * inst)
230 {
231 	rc_swizzle swz = 0;
232 	unsigned int negate= 0;
233 
234 	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
235 		if (swz == RC_SWIZZLE_ZERO) {
236 			inst->U.I.Opcode = RC_OPCODE_MUL;
237 			return;
238 		}
239 	}
240 
241 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
242 		if (swz == RC_SWIZZLE_ONE) {
243 			inst->U.I.Opcode = RC_OPCODE_ADD;
244 			if (negate)
245 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
246 			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
247 			return;
248 		} else if (swz == RC_SWIZZLE_ZERO) {
249 			inst->U.I.Opcode = RC_OPCODE_MOV;
250 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
251 			return;
252 		}
253 	}
254 
255 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
256 		if (swz == RC_SWIZZLE_ONE) {
257 			inst->U.I.Opcode = RC_OPCODE_ADD;
258 			if (negate)
259 				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
260 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
261 			return;
262 		} else if (swz == RC_SWIZZLE_ZERO) {
263 			inst->U.I.Opcode = RC_OPCODE_MOV;
264 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
265 			return;
266 		}
267 	}
268 }
269 
constant_folding_mul(struct rc_instruction * inst)270 static void constant_folding_mul(struct rc_instruction * inst)
271 {
272 	rc_swizzle swz = 0;
273 	unsigned int negate = 0;
274 
275 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
276 		if (swz == RC_SWIZZLE_ONE) {
277 			inst->U.I.Opcode = RC_OPCODE_MOV;
278 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
279 			if (negate)
280 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
281 			return;
282 		} else if (swz == RC_SWIZZLE_ZERO) {
283 			inst->U.I.Opcode = RC_OPCODE_MOV;
284 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
285 			return;
286 		}
287 	}
288 
289 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
290 		if (swz == RC_SWIZZLE_ONE) {
291 			inst->U.I.Opcode = RC_OPCODE_MOV;
292 			if (negate)
293 				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
294 			return;
295 		} else if (swz == RC_SWIZZLE_ZERO) {
296 			inst->U.I.Opcode = RC_OPCODE_MOV;
297 			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
298 			return;
299 		}
300 	}
301 }
302 
constant_folding_add(struct rc_instruction * inst)303 static void constant_folding_add(struct rc_instruction * inst)
304 {
305 	rc_swizzle swz = 0;
306 	unsigned int negate = 0;
307 
308 	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
309 		if (swz == RC_SWIZZLE_ZERO) {
310 			inst->U.I.Opcode = RC_OPCODE_MOV;
311 			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
312 			return;
313 		}
314 	}
315 
316 	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
317 		if (swz == RC_SWIZZLE_ZERO) {
318 			inst->U.I.Opcode = RC_OPCODE_MOV;
319 			return;
320 		}
321 	}
322 }
323 
324 /**
325  * Replace 0.0, 1.0 and 0.5 immediate constants by their
326  * respective swizzles. Simplify instructions like ADD dst, src, 0;
327  */
constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)328 static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
329 {
330 	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
331 	unsigned int i;
332 
333 	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
334 	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
335 		struct rc_constant * constant;
336 		struct rc_src_register newsrc;
337 		int have_real_reference;
338 		unsigned int chan;
339 
340 		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
341 		for (chan = 0; chan < 4; ++chan)
342 			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
343 				break;
344 		if (chan == 4) {
345 			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
346 			continue;
347 		}
348 
349 		/* Convert immediates to swizzles. */
350 		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
351 		    inst->U.I.SrcReg[src].RelAddr ||
352 		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
353 			continue;
354 
355 		constant =
356 			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
357 
358 		if (constant->Type != RC_CONSTANT_IMMEDIATE)
359 			continue;
360 
361 		newsrc = inst->U.I.SrcReg[src];
362 		have_real_reference = 0;
363 		for (chan = 0; chan < 4; ++chan) {
364 			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
365 			unsigned int newswz;
366 			float imm;
367 			float baseimm;
368 
369 			if (swz >= 4)
370 				continue;
371 
372 			imm = constant->u.Immediate[swz];
373 			baseimm = imm;
374 			if (imm < 0.0)
375 				baseimm = -baseimm;
376 
377 			if (baseimm == 0.0) {
378 				newswz = RC_SWIZZLE_ZERO;
379 			} else if (baseimm == 1.0) {
380 				newswz = RC_SWIZZLE_ONE;
381 			} else if (baseimm == 0.5 && c->has_half_swizzles) {
382 				newswz = RC_SWIZZLE_HALF;
383 			} else {
384 				have_real_reference = 1;
385 				continue;
386 			}
387 
388 			SET_SWZ(newsrc.Swizzle, chan, newswz);
389 			if (imm < 0.0 && !newsrc.Abs)
390 				newsrc.Negate ^= 1 << chan;
391 		}
392 
393 		if (!have_real_reference) {
394 			newsrc.File = RC_FILE_NONE;
395 			newsrc.Index = 0;
396 		}
397 
398 		/* don't make the swizzle worse */
399 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc))
400 			continue;
401 
402 		inst->U.I.SrcReg[src] = newsrc;
403 	}
404 
405 	/* Simplify instructions based on constants */
406 	if (inst->U.I.Opcode == RC_OPCODE_MAD)
407 		constant_folding_mad(inst);
408 
409 	/* note: MAD can simplify to MUL or ADD */
410 	if (inst->U.I.Opcode == RC_OPCODE_MUL)
411 		constant_folding_mul(inst);
412 	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
413 		constant_folding_add(inst);
414 
415 	/* In case this instruction has been converted, make sure all of the
416 	 * registers that are no longer used are empty. */
417 	opcode = rc_get_opcode_info(inst->U.I.Opcode);
418 	for(i = opcode->NumSrcRegs; i < 3; i++) {
419 		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
420 	}
421 }
422 
423 /**
424  * If src and dst use the same register, this function returns a writemask that
425  * indicates which components are read by src.  Otherwise zero is returned.
426  */
src_reads_dst_mask(struct rc_src_register src, struct rc_dst_register dst)427 static unsigned int src_reads_dst_mask(struct rc_src_register src,
428 						struct rc_dst_register dst)
429 {
430 	if (dst.File != src.File || dst.Index != src.Index) {
431 		return 0;
432 	}
433 	return rc_swizzle_to_writemask(src.Swizzle);
434 }
435 
436 /* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
437  * in any of its channels.  Return 0 otherwise. */
src_has_const_swz(struct rc_src_register src)438 static int src_has_const_swz(struct rc_src_register src) {
439 	int chan;
440 	for(chan = 0; chan < 4; chan++) {
441 		unsigned int swz = GET_SWZ(src.Swizzle, chan);
442 		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
443 						|| swz == RC_SWIZZLE_ONE) {
444 			return 1;
445 		}
446 	}
447 	return 0;
448 }
449 
presub_scan_read( void * data, struct rc_instruction * inst, struct rc_src_register * src)450 static void presub_scan_read(
451 	void * data,
452 	struct rc_instruction * inst,
453 	struct rc_src_register * src)
454 {
455 	struct rc_reader_data * reader_data = data;
456 	rc_presubtract_op * presub_opcode = reader_data->CbData;
457 
458 	if (!rc_inst_can_use_presub(inst, *presub_opcode,
459 			reader_data->Writer->U.I.DstReg.WriteMask,
460 			src,
461 			&reader_data->Writer->U.I.SrcReg[0],
462 			&reader_data->Writer->U.I.SrcReg[1])) {
463 		reader_data->Abort = 1;
464 		return;
465 	}
466 }
467 
presub_helper( struct radeon_compiler * c, struct rc_instruction * inst_add, rc_presubtract_op presub_opcode, rc_presub_replace_fn presub_replace)468 static int presub_helper(
469 	struct radeon_compiler * c,
470 	struct rc_instruction * inst_add,
471 	rc_presubtract_op presub_opcode,
472 	rc_presub_replace_fn presub_replace)
473 {
474 	struct rc_reader_data reader_data;
475 	unsigned int i;
476 	rc_presubtract_op cb_op = presub_opcode;
477 
478 	reader_data.CbData = &cb_op;
479 	reader_data.ExitOnAbort = 1;
480 	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
481 						is_src_clobbered_scan_write);
482 
483 	if (reader_data.Abort || reader_data.ReaderCount == 0)
484 		return 0;
485 
486 	for(i = 0; i < reader_data.ReaderCount; i++) {
487 		unsigned int src_index;
488 		struct rc_reader reader = reader_data.Readers[i];
489 		const struct rc_opcode_info * info =
490 				rc_get_opcode_info(reader.Inst->U.I.Opcode);
491 
492 		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
493 			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
494 				presub_replace(inst_add, reader.Inst, src_index);
495 		}
496 	}
497 	return 1;
498 }
499 
presub_replace_add( struct rc_instruction * inst_add, struct rc_instruction * inst_reader, unsigned int src_index)500 static void presub_replace_add(
501 	struct rc_instruction * inst_add,
502 	struct rc_instruction * inst_reader,
503 	unsigned int src_index)
504 {
505 	rc_presubtract_op presub_opcode;
506 
507 	/* This function assumes that inst_add->U.I.SrcReg[0] and
508 	 * inst_add->U.I.SrcReg[1] aren't both negative.
509 	 */
510 	assert(!(inst_add->U.I.SrcReg[1].Negate && inst_add->U.I.SrcReg[0].Negate));
511 
512 	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
513 		presub_opcode = RC_PRESUB_SUB;
514 	else
515 		presub_opcode = RC_PRESUB_ADD;
516 
517 	if (inst_add->U.I.SrcReg[1].Negate) {
518 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
519 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
520 	} else {
521 		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
522 		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
523 	}
524 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
525 	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
526 	inst_reader->U.I.PreSub.Opcode = presub_opcode;
527 	inst_reader->U.I.SrcReg[src_index] =
528 			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
529 					inst_reader->U.I.PreSub.SrcReg[0]);
530 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
531 	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
532 }
533 
is_presub_candidate( struct radeon_compiler * c, struct rc_instruction * inst)534 static int is_presub_candidate(
535 	struct radeon_compiler * c,
536 	struct rc_instruction * inst)
537 {
538 	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
539 	unsigned int i;
540 	unsigned int is_constant[2] = {0, 0};
541 
542 	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
543 
544 	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
545 			|| inst->U.I.SaturateMode
546 			|| inst->U.I.WriteALUResult
547 			|| inst->U.I.Omod) {
548 		return 0;
549 	}
550 
551 	/* If both sources use a constant swizzle, then we can't convert it to
552 	 * a presubtract operation.  In fact for the ADD and SUB presubtract
553 	 * operations neither source can contain a constant swizzle.  This
554 	 * specific case is checked in peephole_add_presub_add() when
555 	 * we make sure the swizzles for both sources are equal, so we
556 	 * don't need to worry about it here. */
557 	for (i = 0; i < 2; i++) {
558 		int chan;
559 		for (chan = 0; chan < 4; chan++) {
560 			rc_swizzle swz =
561 				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
562 			if (swz == RC_SWIZZLE_ONE
563 					|| swz == RC_SWIZZLE_ZERO
564 					|| swz == RC_SWIZZLE_HALF) {
565 				is_constant[i] = 1;
566 			}
567 		}
568 	}
569 	if (is_constant[0] && is_constant[1])
570 		return 0;
571 
572 	for(i = 0; i < info->NumSrcRegs; i++) {
573 		struct rc_src_register src = inst->U.I.SrcReg[i];
574 		if (src_reads_dst_mask(src, inst->U.I.DstReg))
575 			return 0;
576 
577 		src.File = RC_FILE_PRESUB;
578 		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
579 			return 0;
580 	}
581 	return 1;
582 }
583 
peephole_add_presub_add( struct radeon_compiler * c, struct rc_instruction * inst_add)584 static int peephole_add_presub_add(
585 	struct radeon_compiler * c,
586 	struct rc_instruction * inst_add)
587 {
588 	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
589         unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
590         unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
591 
592 	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
593 		return 0;
594 
595 	/* src0 and src1 can't have absolute values */
596 	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
597 	        return 0;
598 
599 	/* presub_replace_add() assumes only one is negative */
600 	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
601 	        return 0;
602 
603         /* if src0 is negative, at least all bits of dstmask have to be set */
604         if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
605 	        return 0;
606 
607         /* if src1 is negative, at least all bits of dstmask have to be set */
608         if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
609 	        return 0;
610 
611 	if (!is_presub_candidate(c, inst_add))
612 		return 0;
613 
614 	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
615 		rc_remove_instruction(inst_add);
616 		return 1;
617 	}
618 	return 0;
619 }
620 
presub_replace_inv( struct rc_instruction * inst_add, struct rc_instruction * inst_reader, unsigned int src_index)621 static void presub_replace_inv(
622 	struct rc_instruction * inst_add,
623 	struct rc_instruction * inst_reader,
624 	unsigned int src_index)
625 {
626 	/* We must be careful not to modify inst_add, since it
627 	 * is possible it will remain part of the program.*/
628 	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
629 	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
630 	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
631 	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
632 						inst_reader->U.I.PreSub.SrcReg[0]);
633 
634 	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
635 	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
636 }
637 
638 /**
639  * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
640  * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
641  * of the add instruction must have the constatnt 1 swizzle.  This function
642  * does not check const registers to see if their value is 1.0, so it should
643  * be called after the constant_folding optimization.
644  * @return
645  * 	0 if the ADD instruction is still part of the program.
646  * 	1 if the ADD instruction is no longer part of the program.
647  */
peephole_add_presub_inv( struct radeon_compiler * c, struct rc_instruction * inst_add)648 static int peephole_add_presub_inv(
649 	struct radeon_compiler * c,
650 	struct rc_instruction * inst_add)
651 {
652 	unsigned int i, swz;
653 
654 	if (!is_presub_candidate(c, inst_add))
655 		return 0;
656 
657 	/* Check if src0 is 1. */
658 	/* XXX It would be nice to use is_src_uniform_constant here, but that
659 	 * function only works if the register's file is RC_FILE_NONE */
660 	for(i = 0; i < 4; i++ ) {
661 		if (!(inst_add->U.I.DstReg.WriteMask & (1 << i)))
662 			continue;
663 
664 		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
665 		if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i))
666 			return 0;
667 	}
668 
669 	/* Check src1. */
670 	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
671 						inst_add->U.I.DstReg.WriteMask
672 		|| inst_add->U.I.SrcReg[1].Abs
673 		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
674 			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
675 		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
676 
677 		return 0;
678 	}
679 
680 	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
681 		rc_remove_instruction(inst_add);
682 		return 1;
683 	}
684 	return 0;
685 }
686 
687 struct peephole_mul_cb_data {
688 	struct rc_dst_register * Writer;
689 	unsigned int Clobbered;
690 };
691 
omod_filter_reader_cb( void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask)692 static void omod_filter_reader_cb(
693 	void * userdata,
694 	struct rc_instruction * inst,
695 	rc_register_file file,
696 	unsigned int index,
697 	unsigned int mask)
698 {
699 	struct peephole_mul_cb_data * d = userdata;
700 	if (rc_src_reads_dst_mask(file, mask, index,
701 		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
702 
703 		d->Clobbered = 1;
704 	}
705 }
706 
omod_filter_writer_cb( void * userdata, struct rc_instruction * inst, rc_register_file file, unsigned int index, unsigned int mask)707 static void omod_filter_writer_cb(
708 	void * userdata,
709 	struct rc_instruction * inst,
710 	rc_register_file file,
711 	unsigned int index,
712 	unsigned int mask)
713 {
714 	struct peephole_mul_cb_data * d = userdata;
715 	if (file == d->Writer->File && index == d->Writer->Index &&
716 					(mask & d->Writer->WriteMask)) {
717 		d->Clobbered = 1;
718 	}
719 }
720 
peephole_mul_omod( struct radeon_compiler * c, struct rc_instruction * inst_mul, struct rc_list * var_list)721 static int peephole_mul_omod(
722 	struct radeon_compiler * c,
723 	struct rc_instruction * inst_mul,
724 	struct rc_list * var_list)
725 {
726 	unsigned int chan = 0, swz, i;
727 	int const_index = -1;
728 	int temp_index = -1;
729 	float const_value;
730 	rc_omod_op omod_op = RC_OMOD_DISABLE;
731 	struct rc_list * writer_list;
732 	struct rc_variable * var;
733 	struct peephole_mul_cb_data cb_data;
734 	unsigned writemask_sum;
735 
736 	for (i = 0; i < 2; i++) {
737 		unsigned int j;
738 		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
739 			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
740 			return 0;
741 		}
742 		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
743 			if (temp_index != -1) {
744 				/* The instruction has two temp sources */
745 				return 0;
746 			} else {
747 				temp_index = i;
748 				continue;
749 			}
750 		}
751 		/* If we get this far Src[i] must be a constant src */
752 		if (inst_mul->U.I.SrcReg[i].Negate) {
753 			return 0;
754 		}
755 		/* The constant src needs to read from the same swizzle */
756 		swz = RC_SWIZZLE_UNUSED;
757 		chan = 0;
758 		for (j = 0; j < 4; j++) {
759 			unsigned int j_swz =
760 				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
761 			if (j_swz == RC_SWIZZLE_UNUSED) {
762 				continue;
763 			}
764 			if (swz == RC_SWIZZLE_UNUSED) {
765 				swz = j_swz;
766 				chan = j;
767 			} else if (j_swz != swz) {
768 				return 0;
769 			}
770 		}
771 
772 		if (const_index != -1) {
773 			/* The instruction has two constant sources */
774 			return 0;
775 		} else {
776 			const_index = i;
777 		}
778 	}
779 
780 	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
781 				inst_mul->U.I.SrcReg[const_index].Index)) {
782 		return 0;
783 	}
784 	const_value = rc_get_constant_value(c,
785 			inst_mul->U.I.SrcReg[const_index].Index,
786 			inst_mul->U.I.SrcReg[const_index].Swizzle,
787 			inst_mul->U.I.SrcReg[const_index].Negate,
788 			chan);
789 
790 	if (const_value == 2.0f) {
791 		omod_op = RC_OMOD_MUL_2;
792 	} else if (const_value == 4.0f) {
793 		omod_op = RC_OMOD_MUL_4;
794 	} else if (const_value == 8.0f) {
795 		omod_op = RC_OMOD_MUL_8;
796 	} else if (const_value == (1.0f / 2.0f)) {
797 		omod_op = RC_OMOD_DIV_2;
798 	} else if (const_value == (1.0f / 4.0f)) {
799 		omod_op = RC_OMOD_DIV_4;
800 	} else if (const_value == (1.0f / 8.0f)) {
801 		omod_op = RC_OMOD_DIV_8;
802 	} else {
803 		return 0;
804 	}
805 
806 	writer_list = rc_variable_list_get_writers_one_reader(var_list,
807 		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
808 
809 	if (!writer_list) {
810 		return 0;
811 	}
812 
813 	cb_data.Clobbered = 0;
814 	cb_data.Writer = &inst_mul->U.I.DstReg;
815 	for (var = writer_list->Item; var; var = var->Friend) {
816 		struct rc_instruction * inst;
817 		const struct rc_opcode_info * info = rc_get_opcode_info(
818 				var->Inst->U.I.Opcode);
819 		if (info->HasTexture) {
820 			return 0;
821 		}
822 		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
823 			return 0;
824 		}
825 		for (inst = inst_mul->Prev; inst != var->Inst;
826 							inst = inst->Prev) {
827 			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
828 								&cb_data);
829 			rc_for_all_writes_mask(inst, omod_filter_writer_cb,
830 								&cb_data);
831 			if (cb_data.Clobbered) {
832 				break;
833 			}
834 		}
835 	}
836 
837 	if (cb_data.Clobbered) {
838 		return 0;
839 	}
840 
841 	writemask_sum = rc_variable_writemask_sum(writer_list->Item);
842 
843 	/* rc_normal_rewrite_writemask can't expand a previous writemask to store
844 	 * more channels replicated.
845 	 */
846 	if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask))
847 		return 0;
848 
849 	/* Rewrite the instructions */
850 	for (var = writer_list->Item; var; var = var->Friend) {
851 		struct rc_variable * writer = var;
852 		unsigned conversion_swizzle = rc_make_conversion_swizzle(
853 					writemask_sum,
854 					inst_mul->U.I.DstReg.WriteMask);
855 		writer->Inst->U.I.Omod = omod_op;
856 		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
857 		writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
858 		rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
859 		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
860 	}
861 
862 	rc_remove_instruction(inst_mul);
863 
864 	return 1;
865 }
866 
867 /**
868  * @return
869  * 	0 if inst is still part of the program.
870  * 	1 if inst is no longer part of the program.
871  */
peephole(struct radeon_compiler * c, struct rc_instruction * inst)872 static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
873 {
874 	switch(inst->U.I.Opcode){
875 	case RC_OPCODE_ADD:
876 		if (c->has_presub) {
877 			if(peephole_add_presub_inv(c, inst))
878 				return 1;
879 			if(peephole_add_presub_add(c, inst))
880 				return 1;
881 		}
882 		break;
883 	default:
884 		break;
885 	}
886 	return 0;
887 }
888 
merge_swizzles(unsigned int swz1, unsigned int swz2)889 static unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) {
890 	unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
891 	for (unsigned int chan = 0; chan < 4; chan++) {
892 		unsigned int swz = GET_SWZ(swz1, chan);
893 		if (swz != RC_SWIZZLE_UNUSED) {
894 			SET_SWZ(new_swz, chan, swz);
895 			continue;
896 		}
897 		swz = GET_SWZ(swz2, chan);
898 		SET_SWZ(new_swz, chan, swz);
899 	}
900 	return new_swz;
901 }
902 
merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)903 static int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)
904 {
905 	unsigned int orig_dst_reg = inst->U.I.DstReg.Index;
906 	unsigned int orig_dst_file = inst->U.I.DstReg.File;
907 	unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask;
908 	unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index;
909 	unsigned int orig_src_file = inst->U.I.SrcReg[0].File;
910 
911 	struct rc_instruction * cur = inst;
912 	while (cur!= &c->Program.Instructions) {
913 		cur = cur->Next;
914 		const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode);
915 
916 		/* Keep it simple for now and stop when encountering any
917 		 * control flow.
918 		 */
919 		if (opcode->IsFlowControl)
920 			return 0;
921 
922 		/* Stop when the original destination is overwritten */
923 		if (orig_dst_reg == cur->U.I.DstReg.Index &&
924 			orig_dst_file == cur->U.I.DstReg.File &&
925 			(orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0)
926 			return 0;
927 
928 		/* Stop the search when the original instruction destination
929 		 * is used as a source for anything.
930 		 */
931 		for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
932 			if (cur->U.I.SrcReg[i].File == orig_dst_file &&
933 				cur->U.I.SrcReg[i].Index == orig_dst_reg)
934 				return 0;
935 		}
936 
937 		if (cur->U.I.Opcode == RC_OPCODE_MOV &&
938 			cur->U.I.DstReg.File == orig_dst_file &&
939 			cur->U.I.DstReg.Index == orig_dst_reg &&
940 			(cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) {
941 
942 			/* We can merge the movs if one of them is from inline constant */
943 			if (cur->U.I.SrcReg[0].File == RC_FILE_NONE ||
944 				orig_src_file == RC_FILE_NONE) {
945 				cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
946 
947 				if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) {
948 					cur->U.I.SrcReg[0].File = orig_src_file;
949 					cur->U.I.SrcReg[0].Index = orig_src_reg;
950 					cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs;
951 					cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr;
952 				}
953 				cur->U.I.SrcReg[0].Swizzle =
954 					merge_swizzles(cur->U.I.SrcReg[0].Swizzle,
955 							inst->U.I.SrcReg[0].Swizzle);
956 
957 				cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate;
958 
959 				/* finally delete the original mov */
960 				rc_remove_instruction(inst);
961 
962 				return 1;
963 			}
964 		}
965 	}
966 	return 0;
967 }
968 
rc_optimize(struct radeon_compiler * c, void *user)969 void rc_optimize(struct radeon_compiler * c, void *user)
970 {
971 	struct rc_instruction * inst = c->Program.Instructions.Next;
972 	while(inst != &c->Program.Instructions) {
973 		struct rc_instruction * cur = inst;
974 		inst = inst->Next;
975 
976 		constant_folding(c, cur);
977 
978 		if(peephole(c, cur))
979 			continue;
980 
981 		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
982 			if (c->is_r500) {
983 				if (merge_movs(c, cur))
984 					continue;
985 			}
986 			copy_propagate(c, cur);
987 			/* cur may no longer be part of the program */
988 		}
989 	}
990 
991 	if (!c->has_omod) {
992 		return;
993 	}
994 
995 	inst = c->Program.Instructions.Next;
996 	struct rc_list * var_list = NULL;
997 	while(inst != &c->Program.Instructions) {
998 		struct rc_instruction * cur = inst;
999 		inst = inst->Next;
1000 		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
1001 			if (!var_list)
1002 				var_list = rc_get_variables(c);
1003 			if (peephole_mul_omod(c, cur, var_list))
1004 				var_list = NULL;
1005 		}
1006 	}
1007 }
1008