1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2009 Nicolai Haehnle.
3bf215546Sopenharmony_ci * Copyright 2010 Tom Stellard <tstellar@gmail.com>
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * All Rights Reserved.
6bf215546Sopenharmony_ci *
7bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining
8bf215546Sopenharmony_ci * a copy of this software and associated documentation files (the
9bf215546Sopenharmony_ci * "Software"), to deal in the Software without restriction, including
10bf215546Sopenharmony_ci * without limitation the rights to use, copy, modify, merge, publish,
11bf215546Sopenharmony_ci * distribute, sublicense, and/or sell copies of the Software, and to
12bf215546Sopenharmony_ci * permit persons to whom the Software is furnished to do so, subject to
13bf215546Sopenharmony_ci * the following conditions:
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the
16bf215546Sopenharmony_ci * next paragraph) shall be included in all copies or substantial
17bf215546Sopenharmony_ci * portions of the Software.
18bf215546Sopenharmony_ci *
19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20bf215546Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21bf215546Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22bf215546Sopenharmony_ci * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23bf215546Sopenharmony_ci * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24bf215546Sopenharmony_ci * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25bf215546Sopenharmony_ci * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26bf215546Sopenharmony_ci *
27bf215546Sopenharmony_ci */
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "util/u_math.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include "radeon_dataflow.h"
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_ci#include "radeon_compiler.h"
34bf215546Sopenharmony_ci#include "radeon_compiler_util.h"
35bf215546Sopenharmony_ci#include "radeon_list.h"
36bf215546Sopenharmony_ci#include "radeon_swizzle.h"
37bf215546Sopenharmony_ci#include "radeon_variable.h"
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_cistruct src_clobbered_reads_cb_data {
40bf215546Sopenharmony_ci	rc_register_file File;
41bf215546Sopenharmony_ci	unsigned int Index;
42bf215546Sopenharmony_ci	unsigned int Mask;
43bf215546Sopenharmony_ci	struct rc_reader_data * ReaderData;
44bf215546Sopenharmony_ci};
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_citypedef void (*rc_presub_replace_fn)(struct rc_instruction *,
47bf215546Sopenharmony_ci						struct rc_instruction *,
48bf215546Sopenharmony_ci						unsigned int);
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cistatic struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
51bf215546Sopenharmony_ci{
52bf215546Sopenharmony_ci	struct rc_src_register combine;
53bf215546Sopenharmony_ci	combine.File = inner.File;
54bf215546Sopenharmony_ci	combine.Index = inner.Index;
55bf215546Sopenharmony_ci	combine.RelAddr = inner.RelAddr;
56bf215546Sopenharmony_ci	if (outer.Abs) {
57bf215546Sopenharmony_ci		combine.Abs = 1;
58bf215546Sopenharmony_ci		combine.Negate = outer.Negate;
59bf215546Sopenharmony_ci	} else {
60bf215546Sopenharmony_ci		combine.Abs = inner.Abs;
61bf215546Sopenharmony_ci		combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
62bf215546Sopenharmony_ci		combine.Negate ^= outer.Negate;
63bf215546Sopenharmony_ci	}
64bf215546Sopenharmony_ci	combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
65bf215546Sopenharmony_ci	return combine;
66bf215546Sopenharmony_ci}
67bf215546Sopenharmony_ci
68bf215546Sopenharmony_cistatic void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
69bf215546Sopenharmony_ci						struct rc_src_register * src)
70bf215546Sopenharmony_ci{
71bf215546Sopenharmony_ci	rc_register_file file = src->File;
72bf215546Sopenharmony_ci	struct rc_reader_data * reader_data = data;
73bf215546Sopenharmony_ci
74bf215546Sopenharmony_ci	if(!rc_inst_can_use_presub(inst,
75bf215546Sopenharmony_ci				reader_data->Writer->U.I.PreSub.Opcode,
76bf215546Sopenharmony_ci				rc_swizzle_to_writemask(src->Swizzle),
77bf215546Sopenharmony_ci				src,
78bf215546Sopenharmony_ci				&reader_data->Writer->U.I.PreSub.SrcReg[0],
79bf215546Sopenharmony_ci				&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
80bf215546Sopenharmony_ci		reader_data->Abort = 1;
81bf215546Sopenharmony_ci		return;
82bf215546Sopenharmony_ci	}
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci	/* XXX This could probably be handled better. */
85bf215546Sopenharmony_ci	if (file == RC_FILE_ADDRESS) {
86bf215546Sopenharmony_ci		reader_data->Abort = 1;
87bf215546Sopenharmony_ci		return;
88bf215546Sopenharmony_ci	}
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci	/* These instructions cannot read from the constants file.
91bf215546Sopenharmony_ci	 * see radeonTransformTEX()
92bf215546Sopenharmony_ci	 */
93bf215546Sopenharmony_ci	if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
94bf215546Sopenharmony_ci			reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
95bf215546Sopenharmony_ci				(inst->U.I.Opcode == RC_OPCODE_TEX ||
96bf215546Sopenharmony_ci				inst->U.I.Opcode == RC_OPCODE_TXB ||
97bf215546Sopenharmony_ci				inst->U.I.Opcode == RC_OPCODE_TXP ||
98bf215546Sopenharmony_ci				inst->U.I.Opcode == RC_OPCODE_TXD ||
99bf215546Sopenharmony_ci				inst->U.I.Opcode == RC_OPCODE_TXL ||
100bf215546Sopenharmony_ci				inst->U.I.Opcode == RC_OPCODE_KIL)){
101bf215546Sopenharmony_ci		reader_data->Abort = 1;
102bf215546Sopenharmony_ci		return;
103bf215546Sopenharmony_ci	}
104bf215546Sopenharmony_ci}
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_cistatic void src_clobbered_reads_cb(
107bf215546Sopenharmony_ci	void * data,
108bf215546Sopenharmony_ci	struct rc_instruction * inst,
109bf215546Sopenharmony_ci	struct rc_src_register * src)
110bf215546Sopenharmony_ci{
111bf215546Sopenharmony_ci	struct src_clobbered_reads_cb_data * sc_data = data;
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci	if (src->File == sc_data->File
114bf215546Sopenharmony_ci	    && src->Index == sc_data->Index
115bf215546Sopenharmony_ci	    && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
118bf215546Sopenharmony_ci	}
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci	if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
121bf215546Sopenharmony_ci		sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
122bf215546Sopenharmony_ci	}
123bf215546Sopenharmony_ci}
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_cistatic void is_src_clobbered_scan_write(
126bf215546Sopenharmony_ci	void * data,
127bf215546Sopenharmony_ci	struct rc_instruction * inst,
128bf215546Sopenharmony_ci	rc_register_file file,
129bf215546Sopenharmony_ci	unsigned int index,
130bf215546Sopenharmony_ci	unsigned int mask)
131bf215546Sopenharmony_ci{
132bf215546Sopenharmony_ci	struct src_clobbered_reads_cb_data sc_data;
133bf215546Sopenharmony_ci	struct rc_reader_data * reader_data = data;
134bf215546Sopenharmony_ci	sc_data.File = file;
135bf215546Sopenharmony_ci	sc_data.Index = index;
136bf215546Sopenharmony_ci	sc_data.Mask = mask;
137bf215546Sopenharmony_ci	sc_data.ReaderData = reader_data;
138bf215546Sopenharmony_ci	rc_for_all_reads_src(reader_data->Writer,
139bf215546Sopenharmony_ci					src_clobbered_reads_cb, &sc_data);
140bf215546Sopenharmony_ci}
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_cistatic void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
143bf215546Sopenharmony_ci{
144bf215546Sopenharmony_ci	struct rc_reader_data reader_data;
145bf215546Sopenharmony_ci	unsigned int i;
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci	if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
148bf215546Sopenharmony_ci	    inst_mov->U.I.WriteALUResult)
149bf215546Sopenharmony_ci		return;
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci	/* Get a list of all the readers of this MOV instruction. */
152bf215546Sopenharmony_ci	reader_data.ExitOnAbort = 1;
153bf215546Sopenharmony_ci	rc_get_readers(c, inst_mov, &reader_data,
154bf215546Sopenharmony_ci		       copy_propagate_scan_read, NULL,
155bf215546Sopenharmony_ci		       is_src_clobbered_scan_write);
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci	if (reader_data.Abort || reader_data.ReaderCount == 0 || reader_data.ReadersAfterEndloop)
158bf215546Sopenharmony_ci		return;
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci	/* We can propagate SaturateMode if all the readers are MOV instructions
161bf215546Sopenharmony_ci	 * without a presubtract operation, source negation and absolute.
162bf215546Sopenharmony_ci	 * In that case, we just move SaturateMode to all readers. */
163bf215546Sopenharmony_ci        if (inst_mov->U.I.SaturateMode) {
164bf215546Sopenharmony_ci		for (i = 0; i < reader_data.ReaderCount; i++) {
165bf215546Sopenharmony_ci			struct rc_instruction * inst = reader_data.Readers[i].Inst;
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci			if (inst->U.I.Opcode != RC_OPCODE_MOV ||
168bf215546Sopenharmony_ci			    inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
169bf215546Sopenharmony_ci			    inst->U.I.SrcReg[0].Abs ||
170bf215546Sopenharmony_ci			    inst->U.I.SrcReg[0].Negate) {
171bf215546Sopenharmony_ci				return;
172bf215546Sopenharmony_ci			}
173bf215546Sopenharmony_ci		}
174bf215546Sopenharmony_ci	}
175bf215546Sopenharmony_ci
176bf215546Sopenharmony_ci	/* Propagate the MOV instruction. */
177bf215546Sopenharmony_ci	for (i = 0; i < reader_data.ReaderCount; i++) {
178bf215546Sopenharmony_ci		struct rc_instruction * inst = reader_data.Readers[i].Inst;
179bf215546Sopenharmony_ci		*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci		if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
182bf215546Sopenharmony_ci			inst->U.I.PreSub = inst_mov->U.I.PreSub;
183bf215546Sopenharmony_ci		if (!inst->U.I.SaturateMode)
184bf215546Sopenharmony_ci			inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
185bf215546Sopenharmony_ci	}
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci	/* Finally, remove the original MOV instruction */
188bf215546Sopenharmony_ci	rc_remove_instruction(inst_mov);
189bf215546Sopenharmony_ci}
190bf215546Sopenharmony_ci
191bf215546Sopenharmony_ci/**
192bf215546Sopenharmony_ci * Check if a source register is actually always the same
193bf215546Sopenharmony_ci * swizzle constant.
194bf215546Sopenharmony_ci */
195bf215546Sopenharmony_cistatic int is_src_uniform_constant(struct rc_src_register src,
196bf215546Sopenharmony_ci		rc_swizzle * pswz, unsigned int * pnegate)
197bf215546Sopenharmony_ci{
198bf215546Sopenharmony_ci	int have_used = 0;
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci	if (src.File != RC_FILE_NONE) {
201bf215546Sopenharmony_ci		*pswz = 0;
202bf215546Sopenharmony_ci		return 0;
203bf215546Sopenharmony_ci	}
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci	for(unsigned int chan = 0; chan < 4; ++chan) {
206bf215546Sopenharmony_ci		unsigned int swz = GET_SWZ(src.Swizzle, chan);
207bf215546Sopenharmony_ci		if (swz < 4) {
208bf215546Sopenharmony_ci			*pswz = 0;
209bf215546Sopenharmony_ci			return 0;
210bf215546Sopenharmony_ci		}
211bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_UNUSED)
212bf215546Sopenharmony_ci			continue;
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci		if (!have_used) {
215bf215546Sopenharmony_ci			*pswz = swz;
216bf215546Sopenharmony_ci			*pnegate = GET_BIT(src.Negate, chan);
217bf215546Sopenharmony_ci			have_used = 1;
218bf215546Sopenharmony_ci		} else {
219bf215546Sopenharmony_ci			if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
220bf215546Sopenharmony_ci				*pswz = 0;
221bf215546Sopenharmony_ci				return 0;
222bf215546Sopenharmony_ci			}
223bf215546Sopenharmony_ci		}
224bf215546Sopenharmony_ci	}
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci	return 1;
227bf215546Sopenharmony_ci}
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_cistatic void constant_folding_mad(struct rc_instruction * inst)
230bf215546Sopenharmony_ci{
231bf215546Sopenharmony_ci	rc_swizzle swz = 0;
232bf215546Sopenharmony_ci	unsigned int negate= 0;
233bf215546Sopenharmony_ci
234bf215546Sopenharmony_ci	if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
235bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_ZERO) {
236bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MUL;
237bf215546Sopenharmony_ci			return;
238bf215546Sopenharmony_ci		}
239bf215546Sopenharmony_ci	}
240bf215546Sopenharmony_ci
241bf215546Sopenharmony_ci	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
242bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_ONE) {
243bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_ADD;
244bf215546Sopenharmony_ci			if (negate)
245bf215546Sopenharmony_ci				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
246bf215546Sopenharmony_ci			inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
247bf215546Sopenharmony_ci			return;
248bf215546Sopenharmony_ci		} else if (swz == RC_SWIZZLE_ZERO) {
249bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MOV;
250bf215546Sopenharmony_ci			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
251bf215546Sopenharmony_ci			return;
252bf215546Sopenharmony_ci		}
253bf215546Sopenharmony_ci	}
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
256bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_ONE) {
257bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_ADD;
258bf215546Sopenharmony_ci			if (negate)
259bf215546Sopenharmony_ci				inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
260bf215546Sopenharmony_ci			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
261bf215546Sopenharmony_ci			return;
262bf215546Sopenharmony_ci		} else if (swz == RC_SWIZZLE_ZERO) {
263bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MOV;
264bf215546Sopenharmony_ci			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
265bf215546Sopenharmony_ci			return;
266bf215546Sopenharmony_ci		}
267bf215546Sopenharmony_ci	}
268bf215546Sopenharmony_ci}
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_cistatic void constant_folding_mul(struct rc_instruction * inst)
271bf215546Sopenharmony_ci{
272bf215546Sopenharmony_ci	rc_swizzle swz = 0;
273bf215546Sopenharmony_ci	unsigned int negate = 0;
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
276bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_ONE) {
277bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MOV;
278bf215546Sopenharmony_ci			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
279bf215546Sopenharmony_ci			if (negate)
280bf215546Sopenharmony_ci				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
281bf215546Sopenharmony_ci			return;
282bf215546Sopenharmony_ci		} else if (swz == RC_SWIZZLE_ZERO) {
283bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MOV;
284bf215546Sopenharmony_ci			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
285bf215546Sopenharmony_ci			return;
286bf215546Sopenharmony_ci		}
287bf215546Sopenharmony_ci	}
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
290bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_ONE) {
291bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MOV;
292bf215546Sopenharmony_ci			if (negate)
293bf215546Sopenharmony_ci				inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
294bf215546Sopenharmony_ci			return;
295bf215546Sopenharmony_ci		} else if (swz == RC_SWIZZLE_ZERO) {
296bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MOV;
297bf215546Sopenharmony_ci			inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
298bf215546Sopenharmony_ci			return;
299bf215546Sopenharmony_ci		}
300bf215546Sopenharmony_ci	}
301bf215546Sopenharmony_ci}
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_cistatic void constant_folding_add(struct rc_instruction * inst)
304bf215546Sopenharmony_ci{
305bf215546Sopenharmony_ci	rc_swizzle swz = 0;
306bf215546Sopenharmony_ci	unsigned int negate = 0;
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_ci	if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
309bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_ZERO) {
310bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MOV;
311bf215546Sopenharmony_ci			inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
312bf215546Sopenharmony_ci			return;
313bf215546Sopenharmony_ci		}
314bf215546Sopenharmony_ci	}
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci	if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
317bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_ZERO) {
318bf215546Sopenharmony_ci			inst->U.I.Opcode = RC_OPCODE_MOV;
319bf215546Sopenharmony_ci			return;
320bf215546Sopenharmony_ci		}
321bf215546Sopenharmony_ci	}
322bf215546Sopenharmony_ci}
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci/**
325bf215546Sopenharmony_ci * Replace 0.0, 1.0 and 0.5 immediate constants by their
326bf215546Sopenharmony_ci * respective swizzles. Simplify instructions like ADD dst, src, 0;
327bf215546Sopenharmony_ci */
328bf215546Sopenharmony_cistatic void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
329bf215546Sopenharmony_ci{
330bf215546Sopenharmony_ci	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
331bf215546Sopenharmony_ci	unsigned int i;
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci	/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
334bf215546Sopenharmony_ci	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
335bf215546Sopenharmony_ci		struct rc_constant * constant;
336bf215546Sopenharmony_ci		struct rc_src_register newsrc;
337bf215546Sopenharmony_ci		int have_real_reference;
338bf215546Sopenharmony_ci		unsigned int chan;
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci		/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
341bf215546Sopenharmony_ci		for (chan = 0; chan < 4; ++chan)
342bf215546Sopenharmony_ci			if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
343bf215546Sopenharmony_ci				break;
344bf215546Sopenharmony_ci		if (chan == 4) {
345bf215546Sopenharmony_ci			inst->U.I.SrcReg[src].File = RC_FILE_NONE;
346bf215546Sopenharmony_ci			continue;
347bf215546Sopenharmony_ci		}
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci		/* Convert immediates to swizzles. */
350bf215546Sopenharmony_ci		if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
351bf215546Sopenharmony_ci		    inst->U.I.SrcReg[src].RelAddr ||
352bf215546Sopenharmony_ci		    inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
353bf215546Sopenharmony_ci			continue;
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci		constant =
356bf215546Sopenharmony_ci			&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
357bf215546Sopenharmony_ci
358bf215546Sopenharmony_ci		if (constant->Type != RC_CONSTANT_IMMEDIATE)
359bf215546Sopenharmony_ci			continue;
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci		newsrc = inst->U.I.SrcReg[src];
362bf215546Sopenharmony_ci		have_real_reference = 0;
363bf215546Sopenharmony_ci		for (chan = 0; chan < 4; ++chan) {
364bf215546Sopenharmony_ci			unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
365bf215546Sopenharmony_ci			unsigned int newswz;
366bf215546Sopenharmony_ci			float imm;
367bf215546Sopenharmony_ci			float baseimm;
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci			if (swz >= 4)
370bf215546Sopenharmony_ci				continue;
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci			imm = constant->u.Immediate[swz];
373bf215546Sopenharmony_ci			baseimm = imm;
374bf215546Sopenharmony_ci			if (imm < 0.0)
375bf215546Sopenharmony_ci				baseimm = -baseimm;
376bf215546Sopenharmony_ci
377bf215546Sopenharmony_ci			if (baseimm == 0.0) {
378bf215546Sopenharmony_ci				newswz = RC_SWIZZLE_ZERO;
379bf215546Sopenharmony_ci			} else if (baseimm == 1.0) {
380bf215546Sopenharmony_ci				newswz = RC_SWIZZLE_ONE;
381bf215546Sopenharmony_ci			} else if (baseimm == 0.5 && c->has_half_swizzles) {
382bf215546Sopenharmony_ci				newswz = RC_SWIZZLE_HALF;
383bf215546Sopenharmony_ci			} else {
384bf215546Sopenharmony_ci				have_real_reference = 1;
385bf215546Sopenharmony_ci				continue;
386bf215546Sopenharmony_ci			}
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci			SET_SWZ(newsrc.Swizzle, chan, newswz);
389bf215546Sopenharmony_ci			if (imm < 0.0 && !newsrc.Abs)
390bf215546Sopenharmony_ci				newsrc.Negate ^= 1 << chan;
391bf215546Sopenharmony_ci		}
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci		if (!have_real_reference) {
394bf215546Sopenharmony_ci			newsrc.File = RC_FILE_NONE;
395bf215546Sopenharmony_ci			newsrc.Index = 0;
396bf215546Sopenharmony_ci		}
397bf215546Sopenharmony_ci
398bf215546Sopenharmony_ci		/* don't make the swizzle worse */
399bf215546Sopenharmony_ci		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc))
400bf215546Sopenharmony_ci			continue;
401bf215546Sopenharmony_ci
402bf215546Sopenharmony_ci		inst->U.I.SrcReg[src] = newsrc;
403bf215546Sopenharmony_ci	}
404bf215546Sopenharmony_ci
405bf215546Sopenharmony_ci	/* Simplify instructions based on constants */
406bf215546Sopenharmony_ci	if (inst->U.I.Opcode == RC_OPCODE_MAD)
407bf215546Sopenharmony_ci		constant_folding_mad(inst);
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_ci	/* note: MAD can simplify to MUL or ADD */
410bf215546Sopenharmony_ci	if (inst->U.I.Opcode == RC_OPCODE_MUL)
411bf215546Sopenharmony_ci		constant_folding_mul(inst);
412bf215546Sopenharmony_ci	else if (inst->U.I.Opcode == RC_OPCODE_ADD)
413bf215546Sopenharmony_ci		constant_folding_add(inst);
414bf215546Sopenharmony_ci
415bf215546Sopenharmony_ci	/* In case this instruction has been converted, make sure all of the
416bf215546Sopenharmony_ci	 * registers that are no longer used are empty. */
417bf215546Sopenharmony_ci	opcode = rc_get_opcode_info(inst->U.I.Opcode);
418bf215546Sopenharmony_ci	for(i = opcode->NumSrcRegs; i < 3; i++) {
419bf215546Sopenharmony_ci		memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
420bf215546Sopenharmony_ci	}
421bf215546Sopenharmony_ci}
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_ci/**
424bf215546Sopenharmony_ci * If src and dst use the same register, this function returns a writemask that
425bf215546Sopenharmony_ci * indicates which components are read by src.  Otherwise zero is returned.
426bf215546Sopenharmony_ci */
427bf215546Sopenharmony_cistatic unsigned int src_reads_dst_mask(struct rc_src_register src,
428bf215546Sopenharmony_ci						struct rc_dst_register dst)
429bf215546Sopenharmony_ci{
430bf215546Sopenharmony_ci	if (dst.File != src.File || dst.Index != src.Index) {
431bf215546Sopenharmony_ci		return 0;
432bf215546Sopenharmony_ci	}
433bf215546Sopenharmony_ci	return rc_swizzle_to_writemask(src.Swizzle);
434bf215546Sopenharmony_ci}
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
437bf215546Sopenharmony_ci * in any of its channels.  Return 0 otherwise. */
438bf215546Sopenharmony_cistatic int src_has_const_swz(struct rc_src_register src) {
439bf215546Sopenharmony_ci	int chan;
440bf215546Sopenharmony_ci	for(chan = 0; chan < 4; chan++) {
441bf215546Sopenharmony_ci		unsigned int swz = GET_SWZ(src.Swizzle, chan);
442bf215546Sopenharmony_ci		if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
443bf215546Sopenharmony_ci						|| swz == RC_SWIZZLE_ONE) {
444bf215546Sopenharmony_ci			return 1;
445bf215546Sopenharmony_ci		}
446bf215546Sopenharmony_ci	}
447bf215546Sopenharmony_ci	return 0;
448bf215546Sopenharmony_ci}
449bf215546Sopenharmony_ci
450bf215546Sopenharmony_cistatic void presub_scan_read(
451bf215546Sopenharmony_ci	void * data,
452bf215546Sopenharmony_ci	struct rc_instruction * inst,
453bf215546Sopenharmony_ci	struct rc_src_register * src)
454bf215546Sopenharmony_ci{
455bf215546Sopenharmony_ci	struct rc_reader_data * reader_data = data;
456bf215546Sopenharmony_ci	rc_presubtract_op * presub_opcode = reader_data->CbData;
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_ci	if (!rc_inst_can_use_presub(inst, *presub_opcode,
459bf215546Sopenharmony_ci			reader_data->Writer->U.I.DstReg.WriteMask,
460bf215546Sopenharmony_ci			src,
461bf215546Sopenharmony_ci			&reader_data->Writer->U.I.SrcReg[0],
462bf215546Sopenharmony_ci			&reader_data->Writer->U.I.SrcReg[1])) {
463bf215546Sopenharmony_ci		reader_data->Abort = 1;
464bf215546Sopenharmony_ci		return;
465bf215546Sopenharmony_ci	}
466bf215546Sopenharmony_ci}
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_cistatic int presub_helper(
469bf215546Sopenharmony_ci	struct radeon_compiler * c,
470bf215546Sopenharmony_ci	struct rc_instruction * inst_add,
471bf215546Sopenharmony_ci	rc_presubtract_op presub_opcode,
472bf215546Sopenharmony_ci	rc_presub_replace_fn presub_replace)
473bf215546Sopenharmony_ci{
474bf215546Sopenharmony_ci	struct rc_reader_data reader_data;
475bf215546Sopenharmony_ci	unsigned int i;
476bf215546Sopenharmony_ci	rc_presubtract_op cb_op = presub_opcode;
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci	reader_data.CbData = &cb_op;
479bf215546Sopenharmony_ci	reader_data.ExitOnAbort = 1;
480bf215546Sopenharmony_ci	rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
481bf215546Sopenharmony_ci						is_src_clobbered_scan_write);
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_ci	if (reader_data.Abort || reader_data.ReaderCount == 0)
484bf215546Sopenharmony_ci		return 0;
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci	for(i = 0; i < reader_data.ReaderCount; i++) {
487bf215546Sopenharmony_ci		unsigned int src_index;
488bf215546Sopenharmony_ci		struct rc_reader reader = reader_data.Readers[i];
489bf215546Sopenharmony_ci		const struct rc_opcode_info * info =
490bf215546Sopenharmony_ci				rc_get_opcode_info(reader.Inst->U.I.Opcode);
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci		for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
493bf215546Sopenharmony_ci			if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
494bf215546Sopenharmony_ci				presub_replace(inst_add, reader.Inst, src_index);
495bf215546Sopenharmony_ci		}
496bf215546Sopenharmony_ci	}
497bf215546Sopenharmony_ci	return 1;
498bf215546Sopenharmony_ci}
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_cistatic void presub_replace_add(
501bf215546Sopenharmony_ci	struct rc_instruction * inst_add,
502bf215546Sopenharmony_ci	struct rc_instruction * inst_reader,
503bf215546Sopenharmony_ci	unsigned int src_index)
504bf215546Sopenharmony_ci{
505bf215546Sopenharmony_ci	rc_presubtract_op presub_opcode;
506bf215546Sopenharmony_ci
507bf215546Sopenharmony_ci	/* This function assumes that inst_add->U.I.SrcReg[0] and
508bf215546Sopenharmony_ci	 * inst_add->U.I.SrcReg[1] aren't both negative.
509bf215546Sopenharmony_ci	 */
510bf215546Sopenharmony_ci	assert(!(inst_add->U.I.SrcReg[1].Negate && inst_add->U.I.SrcReg[0].Negate));
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci	if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
513bf215546Sopenharmony_ci		presub_opcode = RC_PRESUB_SUB;
514bf215546Sopenharmony_ci	else
515bf215546Sopenharmony_ci		presub_opcode = RC_PRESUB_ADD;
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci	if (inst_add->U.I.SrcReg[1].Negate) {
518bf215546Sopenharmony_ci		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
519bf215546Sopenharmony_ci		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
520bf215546Sopenharmony_ci	} else {
521bf215546Sopenharmony_ci		inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
522bf215546Sopenharmony_ci		inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
523bf215546Sopenharmony_ci	}
524bf215546Sopenharmony_ci	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
525bf215546Sopenharmony_ci	inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
526bf215546Sopenharmony_ci	inst_reader->U.I.PreSub.Opcode = presub_opcode;
527bf215546Sopenharmony_ci	inst_reader->U.I.SrcReg[src_index] =
528bf215546Sopenharmony_ci			chain_srcregs(inst_reader->U.I.SrcReg[src_index],
529bf215546Sopenharmony_ci					inst_reader->U.I.PreSub.SrcReg[0]);
530bf215546Sopenharmony_ci	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
531bf215546Sopenharmony_ci	inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
532bf215546Sopenharmony_ci}
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_cistatic int is_presub_candidate(
535bf215546Sopenharmony_ci	struct radeon_compiler * c,
536bf215546Sopenharmony_ci	struct rc_instruction * inst)
537bf215546Sopenharmony_ci{
538bf215546Sopenharmony_ci	const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
539bf215546Sopenharmony_ci	unsigned int i;
540bf215546Sopenharmony_ci	unsigned int is_constant[2] = {0, 0};
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci	assert(inst->U.I.Opcode == RC_OPCODE_ADD);
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci	if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
545bf215546Sopenharmony_ci			|| inst->U.I.SaturateMode
546bf215546Sopenharmony_ci			|| inst->U.I.WriteALUResult
547bf215546Sopenharmony_ci			|| inst->U.I.Omod) {
548bf215546Sopenharmony_ci		return 0;
549bf215546Sopenharmony_ci	}
550bf215546Sopenharmony_ci
551bf215546Sopenharmony_ci	/* If both sources use a constant swizzle, then we can't convert it to
552bf215546Sopenharmony_ci	 * a presubtract operation.  In fact for the ADD and SUB presubtract
553bf215546Sopenharmony_ci	 * operations neither source can contain a constant swizzle.  This
554bf215546Sopenharmony_ci	 * specific case is checked in peephole_add_presub_add() when
555bf215546Sopenharmony_ci	 * we make sure the swizzles for both sources are equal, so we
556bf215546Sopenharmony_ci	 * don't need to worry about it here. */
557bf215546Sopenharmony_ci	for (i = 0; i < 2; i++) {
558bf215546Sopenharmony_ci		int chan;
559bf215546Sopenharmony_ci		for (chan = 0; chan < 4; chan++) {
560bf215546Sopenharmony_ci			rc_swizzle swz =
561bf215546Sopenharmony_ci				get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
562bf215546Sopenharmony_ci			if (swz == RC_SWIZZLE_ONE
563bf215546Sopenharmony_ci					|| swz == RC_SWIZZLE_ZERO
564bf215546Sopenharmony_ci					|| swz == RC_SWIZZLE_HALF) {
565bf215546Sopenharmony_ci				is_constant[i] = 1;
566bf215546Sopenharmony_ci			}
567bf215546Sopenharmony_ci		}
568bf215546Sopenharmony_ci	}
569bf215546Sopenharmony_ci	if (is_constant[0] && is_constant[1])
570bf215546Sopenharmony_ci		return 0;
571bf215546Sopenharmony_ci
572bf215546Sopenharmony_ci	for(i = 0; i < info->NumSrcRegs; i++) {
573bf215546Sopenharmony_ci		struct rc_src_register src = inst->U.I.SrcReg[i];
574bf215546Sopenharmony_ci		if (src_reads_dst_mask(src, inst->U.I.DstReg))
575bf215546Sopenharmony_ci			return 0;
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci		src.File = RC_FILE_PRESUB;
578bf215546Sopenharmony_ci		if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
579bf215546Sopenharmony_ci			return 0;
580bf215546Sopenharmony_ci	}
581bf215546Sopenharmony_ci	return 1;
582bf215546Sopenharmony_ci}
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_cistatic int peephole_add_presub_add(
585bf215546Sopenharmony_ci	struct radeon_compiler * c,
586bf215546Sopenharmony_ci	struct rc_instruction * inst_add)
587bf215546Sopenharmony_ci{
588bf215546Sopenharmony_ci	unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
589bf215546Sopenharmony_ci        unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
590bf215546Sopenharmony_ci        unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
591bf215546Sopenharmony_ci
592bf215546Sopenharmony_ci	if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
593bf215546Sopenharmony_ci		return 0;
594bf215546Sopenharmony_ci
595bf215546Sopenharmony_ci	/* src0 and src1 can't have absolute values */
596bf215546Sopenharmony_ci	if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
597bf215546Sopenharmony_ci	        return 0;
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci	/* presub_replace_add() assumes only one is negative */
600bf215546Sopenharmony_ci	if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
601bf215546Sopenharmony_ci	        return 0;
602bf215546Sopenharmony_ci
603bf215546Sopenharmony_ci        /* if src0 is negative, at least all bits of dstmask have to be set */
604bf215546Sopenharmony_ci        if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
605bf215546Sopenharmony_ci	        return 0;
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci        /* if src1 is negative, at least all bits of dstmask have to be set */
608bf215546Sopenharmony_ci        if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
609bf215546Sopenharmony_ci	        return 0;
610bf215546Sopenharmony_ci
611bf215546Sopenharmony_ci	if (!is_presub_candidate(c, inst_add))
612bf215546Sopenharmony_ci		return 0;
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci	if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
615bf215546Sopenharmony_ci		rc_remove_instruction(inst_add);
616bf215546Sopenharmony_ci		return 1;
617bf215546Sopenharmony_ci	}
618bf215546Sopenharmony_ci	return 0;
619bf215546Sopenharmony_ci}
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_cistatic void presub_replace_inv(
622bf215546Sopenharmony_ci	struct rc_instruction * inst_add,
623bf215546Sopenharmony_ci	struct rc_instruction * inst_reader,
624bf215546Sopenharmony_ci	unsigned int src_index)
625bf215546Sopenharmony_ci{
626bf215546Sopenharmony_ci	/* We must be careful not to modify inst_add, since it
627bf215546Sopenharmony_ci	 * is possible it will remain part of the program.*/
628bf215546Sopenharmony_ci	inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
629bf215546Sopenharmony_ci	inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
630bf215546Sopenharmony_ci	inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
631bf215546Sopenharmony_ci	inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
632bf215546Sopenharmony_ci						inst_reader->U.I.PreSub.SrcReg[0]);
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci	inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
635bf215546Sopenharmony_ci	inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
636bf215546Sopenharmony_ci}
637bf215546Sopenharmony_ci
638bf215546Sopenharmony_ci/**
639bf215546Sopenharmony_ci * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
640bf215546Sopenharmony_ci * Use the presubtract 1 - src0 for all readers of TEMP[0].  The first source
641bf215546Sopenharmony_ci * of the add instruction must have the constatnt 1 swizzle.  This function
642bf215546Sopenharmony_ci * does not check const registers to see if their value is 1.0, so it should
643bf215546Sopenharmony_ci * be called after the constant_folding optimization.
644bf215546Sopenharmony_ci * @return
645bf215546Sopenharmony_ci * 	0 if the ADD instruction is still part of the program.
646bf215546Sopenharmony_ci * 	1 if the ADD instruction is no longer part of the program.
647bf215546Sopenharmony_ci */
648bf215546Sopenharmony_cistatic int peephole_add_presub_inv(
649bf215546Sopenharmony_ci	struct radeon_compiler * c,
650bf215546Sopenharmony_ci	struct rc_instruction * inst_add)
651bf215546Sopenharmony_ci{
652bf215546Sopenharmony_ci	unsigned int i, swz;
653bf215546Sopenharmony_ci
654bf215546Sopenharmony_ci	if (!is_presub_candidate(c, inst_add))
655bf215546Sopenharmony_ci		return 0;
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci	/* Check if src0 is 1. */
658bf215546Sopenharmony_ci	/* XXX It would be nice to use is_src_uniform_constant here, but that
659bf215546Sopenharmony_ci	 * function only works if the register's file is RC_FILE_NONE */
660bf215546Sopenharmony_ci	for(i = 0; i < 4; i++ ) {
661bf215546Sopenharmony_ci		if (!(inst_add->U.I.DstReg.WriteMask & (1 << i)))
662bf215546Sopenharmony_ci			continue;
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci		swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
665bf215546Sopenharmony_ci		if (swz != RC_SWIZZLE_ONE || inst_add->U.I.SrcReg[0].Negate & (1 << i))
666bf215546Sopenharmony_ci			return 0;
667bf215546Sopenharmony_ci	}
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ci	/* Check src1. */
670bf215546Sopenharmony_ci	if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
671bf215546Sopenharmony_ci						inst_add->U.I.DstReg.WriteMask
672bf215546Sopenharmony_ci		|| inst_add->U.I.SrcReg[1].Abs
673bf215546Sopenharmony_ci		|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
674bf215546Sopenharmony_ci			&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
675bf215546Sopenharmony_ci		|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
676bf215546Sopenharmony_ci
677bf215546Sopenharmony_ci		return 0;
678bf215546Sopenharmony_ci	}
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci	if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
681bf215546Sopenharmony_ci		rc_remove_instruction(inst_add);
682bf215546Sopenharmony_ci		return 1;
683bf215546Sopenharmony_ci	}
684bf215546Sopenharmony_ci	return 0;
685bf215546Sopenharmony_ci}
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_cistruct peephole_mul_cb_data {
688bf215546Sopenharmony_ci	struct rc_dst_register * Writer;
689bf215546Sopenharmony_ci	unsigned int Clobbered;
690bf215546Sopenharmony_ci};
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_cistatic void omod_filter_reader_cb(
693bf215546Sopenharmony_ci	void * userdata,
694bf215546Sopenharmony_ci	struct rc_instruction * inst,
695bf215546Sopenharmony_ci	rc_register_file file,
696bf215546Sopenharmony_ci	unsigned int index,
697bf215546Sopenharmony_ci	unsigned int mask)
698bf215546Sopenharmony_ci{
699bf215546Sopenharmony_ci	struct peephole_mul_cb_data * d = userdata;
700bf215546Sopenharmony_ci	if (rc_src_reads_dst_mask(file, mask, index,
701bf215546Sopenharmony_ci		d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci		d->Clobbered = 1;
704bf215546Sopenharmony_ci	}
705bf215546Sopenharmony_ci}
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_cistatic void omod_filter_writer_cb(
708bf215546Sopenharmony_ci	void * userdata,
709bf215546Sopenharmony_ci	struct rc_instruction * inst,
710bf215546Sopenharmony_ci	rc_register_file file,
711bf215546Sopenharmony_ci	unsigned int index,
712bf215546Sopenharmony_ci	unsigned int mask)
713bf215546Sopenharmony_ci{
714bf215546Sopenharmony_ci	struct peephole_mul_cb_data * d = userdata;
715bf215546Sopenharmony_ci	if (file == d->Writer->File && index == d->Writer->Index &&
716bf215546Sopenharmony_ci					(mask & d->Writer->WriteMask)) {
717bf215546Sopenharmony_ci		d->Clobbered = 1;
718bf215546Sopenharmony_ci	}
719bf215546Sopenharmony_ci}
720bf215546Sopenharmony_ci
721bf215546Sopenharmony_cistatic int peephole_mul_omod(
722bf215546Sopenharmony_ci	struct radeon_compiler * c,
723bf215546Sopenharmony_ci	struct rc_instruction * inst_mul,
724bf215546Sopenharmony_ci	struct rc_list * var_list)
725bf215546Sopenharmony_ci{
726bf215546Sopenharmony_ci	unsigned int chan = 0, swz, i;
727bf215546Sopenharmony_ci	int const_index = -1;
728bf215546Sopenharmony_ci	int temp_index = -1;
729bf215546Sopenharmony_ci	float const_value;
730bf215546Sopenharmony_ci	rc_omod_op omod_op = RC_OMOD_DISABLE;
731bf215546Sopenharmony_ci	struct rc_list * writer_list;
732bf215546Sopenharmony_ci	struct rc_variable * var;
733bf215546Sopenharmony_ci	struct peephole_mul_cb_data cb_data;
734bf215546Sopenharmony_ci	unsigned writemask_sum;
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci	for (i = 0; i < 2; i++) {
737bf215546Sopenharmony_ci		unsigned int j;
738bf215546Sopenharmony_ci		if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
739bf215546Sopenharmony_ci			&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
740bf215546Sopenharmony_ci			return 0;
741bf215546Sopenharmony_ci		}
742bf215546Sopenharmony_ci		if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
743bf215546Sopenharmony_ci			if (temp_index != -1) {
744bf215546Sopenharmony_ci				/* The instruction has two temp sources */
745bf215546Sopenharmony_ci				return 0;
746bf215546Sopenharmony_ci			} else {
747bf215546Sopenharmony_ci				temp_index = i;
748bf215546Sopenharmony_ci				continue;
749bf215546Sopenharmony_ci			}
750bf215546Sopenharmony_ci		}
751bf215546Sopenharmony_ci		/* If we get this far Src[i] must be a constant src */
752bf215546Sopenharmony_ci		if (inst_mul->U.I.SrcReg[i].Negate) {
753bf215546Sopenharmony_ci			return 0;
754bf215546Sopenharmony_ci		}
755bf215546Sopenharmony_ci		/* The constant src needs to read from the same swizzle */
756bf215546Sopenharmony_ci		swz = RC_SWIZZLE_UNUSED;
757bf215546Sopenharmony_ci		chan = 0;
758bf215546Sopenharmony_ci		for (j = 0; j < 4; j++) {
759bf215546Sopenharmony_ci			unsigned int j_swz =
760bf215546Sopenharmony_ci				GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
761bf215546Sopenharmony_ci			if (j_swz == RC_SWIZZLE_UNUSED) {
762bf215546Sopenharmony_ci				continue;
763bf215546Sopenharmony_ci			}
764bf215546Sopenharmony_ci			if (swz == RC_SWIZZLE_UNUSED) {
765bf215546Sopenharmony_ci				swz = j_swz;
766bf215546Sopenharmony_ci				chan = j;
767bf215546Sopenharmony_ci			} else if (j_swz != swz) {
768bf215546Sopenharmony_ci				return 0;
769bf215546Sopenharmony_ci			}
770bf215546Sopenharmony_ci		}
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci		if (const_index != -1) {
773bf215546Sopenharmony_ci			/* The instruction has two constant sources */
774bf215546Sopenharmony_ci			return 0;
775bf215546Sopenharmony_ci		} else {
776bf215546Sopenharmony_ci			const_index = i;
777bf215546Sopenharmony_ci		}
778bf215546Sopenharmony_ci	}
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_ci	if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
781bf215546Sopenharmony_ci				inst_mul->U.I.SrcReg[const_index].Index)) {
782bf215546Sopenharmony_ci		return 0;
783bf215546Sopenharmony_ci	}
784bf215546Sopenharmony_ci	const_value = rc_get_constant_value(c,
785bf215546Sopenharmony_ci			inst_mul->U.I.SrcReg[const_index].Index,
786bf215546Sopenharmony_ci			inst_mul->U.I.SrcReg[const_index].Swizzle,
787bf215546Sopenharmony_ci			inst_mul->U.I.SrcReg[const_index].Negate,
788bf215546Sopenharmony_ci			chan);
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_ci	if (const_value == 2.0f) {
791bf215546Sopenharmony_ci		omod_op = RC_OMOD_MUL_2;
792bf215546Sopenharmony_ci	} else if (const_value == 4.0f) {
793bf215546Sopenharmony_ci		omod_op = RC_OMOD_MUL_4;
794bf215546Sopenharmony_ci	} else if (const_value == 8.0f) {
795bf215546Sopenharmony_ci		omod_op = RC_OMOD_MUL_8;
796bf215546Sopenharmony_ci	} else if (const_value == (1.0f / 2.0f)) {
797bf215546Sopenharmony_ci		omod_op = RC_OMOD_DIV_2;
798bf215546Sopenharmony_ci	} else if (const_value == (1.0f / 4.0f)) {
799bf215546Sopenharmony_ci		omod_op = RC_OMOD_DIV_4;
800bf215546Sopenharmony_ci	} else if (const_value == (1.0f / 8.0f)) {
801bf215546Sopenharmony_ci		omod_op = RC_OMOD_DIV_8;
802bf215546Sopenharmony_ci	} else {
803bf215546Sopenharmony_ci		return 0;
804bf215546Sopenharmony_ci	}
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci	writer_list = rc_variable_list_get_writers_one_reader(var_list,
807bf215546Sopenharmony_ci		RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci	if (!writer_list) {
810bf215546Sopenharmony_ci		return 0;
811bf215546Sopenharmony_ci	}
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci	cb_data.Clobbered = 0;
814bf215546Sopenharmony_ci	cb_data.Writer = &inst_mul->U.I.DstReg;
815bf215546Sopenharmony_ci	for (var = writer_list->Item; var; var = var->Friend) {
816bf215546Sopenharmony_ci		struct rc_instruction * inst;
817bf215546Sopenharmony_ci		const struct rc_opcode_info * info = rc_get_opcode_info(
818bf215546Sopenharmony_ci				var->Inst->U.I.Opcode);
819bf215546Sopenharmony_ci		if (info->HasTexture) {
820bf215546Sopenharmony_ci			return 0;
821bf215546Sopenharmony_ci		}
822bf215546Sopenharmony_ci		if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
823bf215546Sopenharmony_ci			return 0;
824bf215546Sopenharmony_ci		}
825bf215546Sopenharmony_ci		for (inst = inst_mul->Prev; inst != var->Inst;
826bf215546Sopenharmony_ci							inst = inst->Prev) {
827bf215546Sopenharmony_ci			rc_for_all_reads_mask(inst, omod_filter_reader_cb,
828bf215546Sopenharmony_ci								&cb_data);
829bf215546Sopenharmony_ci			rc_for_all_writes_mask(inst, omod_filter_writer_cb,
830bf215546Sopenharmony_ci								&cb_data);
831bf215546Sopenharmony_ci			if (cb_data.Clobbered) {
832bf215546Sopenharmony_ci				break;
833bf215546Sopenharmony_ci			}
834bf215546Sopenharmony_ci		}
835bf215546Sopenharmony_ci	}
836bf215546Sopenharmony_ci
837bf215546Sopenharmony_ci	if (cb_data.Clobbered) {
838bf215546Sopenharmony_ci		return 0;
839bf215546Sopenharmony_ci	}
840bf215546Sopenharmony_ci
841bf215546Sopenharmony_ci	writemask_sum = rc_variable_writemask_sum(writer_list->Item);
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci	/* rc_normal_rewrite_writemask can't expand a previous writemask to store
844bf215546Sopenharmony_ci	 * more channels replicated.
845bf215546Sopenharmony_ci	 */
846bf215546Sopenharmony_ci	if (util_bitcount(writemask_sum) < util_bitcount(inst_mul->U.I.DstReg.WriteMask))
847bf215546Sopenharmony_ci		return 0;
848bf215546Sopenharmony_ci
849bf215546Sopenharmony_ci	/* Rewrite the instructions */
850bf215546Sopenharmony_ci	for (var = writer_list->Item; var; var = var->Friend) {
851bf215546Sopenharmony_ci		struct rc_variable * writer = var;
852bf215546Sopenharmony_ci		unsigned conversion_swizzle = rc_make_conversion_swizzle(
853bf215546Sopenharmony_ci					writemask_sum,
854bf215546Sopenharmony_ci					inst_mul->U.I.DstReg.WriteMask);
855bf215546Sopenharmony_ci		writer->Inst->U.I.Omod = omod_op;
856bf215546Sopenharmony_ci		writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
857bf215546Sopenharmony_ci		writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
858bf215546Sopenharmony_ci		rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
859bf215546Sopenharmony_ci		writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
860bf215546Sopenharmony_ci	}
861bf215546Sopenharmony_ci
862bf215546Sopenharmony_ci	rc_remove_instruction(inst_mul);
863bf215546Sopenharmony_ci
864bf215546Sopenharmony_ci	return 1;
865bf215546Sopenharmony_ci}
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci/**
868bf215546Sopenharmony_ci * @return
869bf215546Sopenharmony_ci * 	0 if inst is still part of the program.
870bf215546Sopenharmony_ci * 	1 if inst is no longer part of the program.
871bf215546Sopenharmony_ci */
872bf215546Sopenharmony_cistatic int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
873bf215546Sopenharmony_ci{
874bf215546Sopenharmony_ci	switch(inst->U.I.Opcode){
875bf215546Sopenharmony_ci	case RC_OPCODE_ADD:
876bf215546Sopenharmony_ci		if (c->has_presub) {
877bf215546Sopenharmony_ci			if(peephole_add_presub_inv(c, inst))
878bf215546Sopenharmony_ci				return 1;
879bf215546Sopenharmony_ci			if(peephole_add_presub_add(c, inst))
880bf215546Sopenharmony_ci				return 1;
881bf215546Sopenharmony_ci		}
882bf215546Sopenharmony_ci		break;
883bf215546Sopenharmony_ci	default:
884bf215546Sopenharmony_ci		break;
885bf215546Sopenharmony_ci	}
886bf215546Sopenharmony_ci	return 0;
887bf215546Sopenharmony_ci}
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_cistatic unsigned int merge_swizzles(unsigned int swz1, unsigned int swz2) {
890bf215546Sopenharmony_ci	unsigned int new_swz = rc_init_swizzle(RC_SWIZZLE_UNUSED, 0);
891bf215546Sopenharmony_ci	for (unsigned int chan = 0; chan < 4; chan++) {
892bf215546Sopenharmony_ci		unsigned int swz = GET_SWZ(swz1, chan);
893bf215546Sopenharmony_ci		if (swz != RC_SWIZZLE_UNUSED) {
894bf215546Sopenharmony_ci			SET_SWZ(new_swz, chan, swz);
895bf215546Sopenharmony_ci			continue;
896bf215546Sopenharmony_ci		}
897bf215546Sopenharmony_ci		swz = GET_SWZ(swz2, chan);
898bf215546Sopenharmony_ci		SET_SWZ(new_swz, chan, swz);
899bf215546Sopenharmony_ci	}
900bf215546Sopenharmony_ci	return new_swz;
901bf215546Sopenharmony_ci}
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_cistatic int merge_movs(struct radeon_compiler * c, struct rc_instruction * inst)
904bf215546Sopenharmony_ci{
905bf215546Sopenharmony_ci	unsigned int orig_dst_reg = inst->U.I.DstReg.Index;
906bf215546Sopenharmony_ci	unsigned int orig_dst_file = inst->U.I.DstReg.File;
907bf215546Sopenharmony_ci	unsigned int orig_dst_wmask = inst->U.I.DstReg.WriteMask;
908bf215546Sopenharmony_ci	unsigned int orig_src_reg = inst->U.I.SrcReg[0].Index;
909bf215546Sopenharmony_ci	unsigned int orig_src_file = inst->U.I.SrcReg[0].File;
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci	struct rc_instruction * cur = inst;
912bf215546Sopenharmony_ci	while (cur!= &c->Program.Instructions) {
913bf215546Sopenharmony_ci		cur = cur->Next;
914bf215546Sopenharmony_ci		const struct rc_opcode_info * opcode = rc_get_opcode_info(cur->U.I.Opcode);
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci		/* Keep it simple for now and stop when encountering any
917bf215546Sopenharmony_ci		 * control flow.
918bf215546Sopenharmony_ci		 */
919bf215546Sopenharmony_ci		if (opcode->IsFlowControl)
920bf215546Sopenharmony_ci			return 0;
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci		/* Stop when the original destination is overwritten */
923bf215546Sopenharmony_ci		if (orig_dst_reg == cur->U.I.DstReg.Index &&
924bf215546Sopenharmony_ci			orig_dst_file == cur->U.I.DstReg.File &&
925bf215546Sopenharmony_ci			(orig_dst_wmask & cur->U.I.DstReg.WriteMask) != 0)
926bf215546Sopenharmony_ci			return 0;
927bf215546Sopenharmony_ci
928bf215546Sopenharmony_ci		/* Stop the search when the original instruction destination
929bf215546Sopenharmony_ci		 * is used as a source for anything.
930bf215546Sopenharmony_ci		 */
931bf215546Sopenharmony_ci		for (unsigned i = 0; i < opcode->NumSrcRegs; i++) {
932bf215546Sopenharmony_ci			if (cur->U.I.SrcReg[i].File == orig_dst_file &&
933bf215546Sopenharmony_ci				cur->U.I.SrcReg[i].Index == orig_dst_reg)
934bf215546Sopenharmony_ci				return 0;
935bf215546Sopenharmony_ci		}
936bf215546Sopenharmony_ci
937bf215546Sopenharmony_ci		if (cur->U.I.Opcode == RC_OPCODE_MOV &&
938bf215546Sopenharmony_ci			cur->U.I.DstReg.File == orig_dst_file &&
939bf215546Sopenharmony_ci			cur->U.I.DstReg.Index == orig_dst_reg &&
940bf215546Sopenharmony_ci			(cur->U.I.DstReg.WriteMask & orig_dst_wmask) == 0) {
941bf215546Sopenharmony_ci
942bf215546Sopenharmony_ci			/* We can merge the movs if one of them is from inline constant */
943bf215546Sopenharmony_ci			if (cur->U.I.SrcReg[0].File == RC_FILE_NONE ||
944bf215546Sopenharmony_ci				orig_src_file == RC_FILE_NONE) {
945bf215546Sopenharmony_ci				cur->U.I.DstReg.WriteMask |= orig_dst_wmask;
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci				if (cur->U.I.SrcReg[0].File == RC_FILE_NONE) {
948bf215546Sopenharmony_ci					cur->U.I.SrcReg[0].File = orig_src_file;
949bf215546Sopenharmony_ci					cur->U.I.SrcReg[0].Index = orig_src_reg;
950bf215546Sopenharmony_ci					cur->U.I.SrcReg[0].Abs = inst->U.I.SrcReg[0].Abs;
951bf215546Sopenharmony_ci					cur->U.I.SrcReg[0].RelAddr = inst->U.I.SrcReg[0].RelAddr;
952bf215546Sopenharmony_ci				}
953bf215546Sopenharmony_ci				cur->U.I.SrcReg[0].Swizzle =
954bf215546Sopenharmony_ci					merge_swizzles(cur->U.I.SrcReg[0].Swizzle,
955bf215546Sopenharmony_ci							inst->U.I.SrcReg[0].Swizzle);
956bf215546Sopenharmony_ci
957bf215546Sopenharmony_ci				cur->U.I.SrcReg[0].Negate |= inst->U.I.SrcReg[0].Negate;
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_ci				/* finally delete the original mov */
960bf215546Sopenharmony_ci				rc_remove_instruction(inst);
961bf215546Sopenharmony_ci
962bf215546Sopenharmony_ci				return 1;
963bf215546Sopenharmony_ci			}
964bf215546Sopenharmony_ci		}
965bf215546Sopenharmony_ci	}
966bf215546Sopenharmony_ci	return 0;
967bf215546Sopenharmony_ci}
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_civoid rc_optimize(struct radeon_compiler * c, void *user)
970bf215546Sopenharmony_ci{
971bf215546Sopenharmony_ci	struct rc_instruction * inst = c->Program.Instructions.Next;
972bf215546Sopenharmony_ci	while(inst != &c->Program.Instructions) {
973bf215546Sopenharmony_ci		struct rc_instruction * cur = inst;
974bf215546Sopenharmony_ci		inst = inst->Next;
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci		constant_folding(c, cur);
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_ci		if(peephole(c, cur))
979bf215546Sopenharmony_ci			continue;
980bf215546Sopenharmony_ci
981bf215546Sopenharmony_ci		if (cur->U.I.Opcode == RC_OPCODE_MOV) {
982bf215546Sopenharmony_ci			if (c->is_r500) {
983bf215546Sopenharmony_ci				if (merge_movs(c, cur))
984bf215546Sopenharmony_ci					continue;
985bf215546Sopenharmony_ci			}
986bf215546Sopenharmony_ci			copy_propagate(c, cur);
987bf215546Sopenharmony_ci			/* cur may no longer be part of the program */
988bf215546Sopenharmony_ci		}
989bf215546Sopenharmony_ci	}
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci	if (!c->has_omod) {
992bf215546Sopenharmony_ci		return;
993bf215546Sopenharmony_ci	}
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci	inst = c->Program.Instructions.Next;
996bf215546Sopenharmony_ci	struct rc_list * var_list = NULL;
997bf215546Sopenharmony_ci	while(inst != &c->Program.Instructions) {
998bf215546Sopenharmony_ci		struct rc_instruction * cur = inst;
999bf215546Sopenharmony_ci		inst = inst->Next;
1000bf215546Sopenharmony_ci		if (cur->U.I.Opcode == RC_OPCODE_MUL) {
1001bf215546Sopenharmony_ci			if (!var_list)
1002bf215546Sopenharmony_ci				var_list = rc_get_variables(c);
1003bf215546Sopenharmony_ci			if (peephole_mul_omod(c, cur, var_list))
1004bf215546Sopenharmony_ci				var_list = NULL;
1005bf215546Sopenharmony_ci		}
1006bf215546Sopenharmony_ci	}
1007bf215546Sopenharmony_ci}
1008