1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci *
23bf215546Sopenharmony_ci * Authors:
24bf215546Sopenharmony_ci *      Vadim Girlin
25bf215546Sopenharmony_ci */
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include <cmath>
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include "sb_shader.h"
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cinamespace r600_sb {
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_civalue* get_select_value_for_em(shader& sh, value* em) {
34bf215546Sopenharmony_ci	if (!em->def)
35bf215546Sopenharmony_ci		return NULL;
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci	node *predset = em->def;
38bf215546Sopenharmony_ci	if (!predset->is_pred_set())
39bf215546Sopenharmony_ci		return NULL;
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ci	alu_node *s = sh.clone(static_cast<alu_node*>(predset));
42bf215546Sopenharmony_ci	convert_predset_to_set(sh, s);
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ci	predset->insert_after(s);
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci	value* &d0 = s->dst[0];
47bf215546Sopenharmony_ci	d0 = sh.create_temp_value();
48bf215546Sopenharmony_ci	d0->def = s;
49bf215546Sopenharmony_ci	return d0;
50bf215546Sopenharmony_ci}
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_civoid convert_to_mov(alu_node &n, value *src, bool neg, bool abs) {
53bf215546Sopenharmony_ci	n.src.resize(1);
54bf215546Sopenharmony_ci	n.src[0] = src;
55bf215546Sopenharmony_ci	n.bc.src[0].abs = abs;
56bf215546Sopenharmony_ci	n.bc.src[0].neg = neg;
57bf215546Sopenharmony_ci	n.bc.set_op(ALU_OP1_MOV);
58bf215546Sopenharmony_ci}
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_ciexpr_handler::expr_handler(shader& sh) : sh(sh), vt(sh.vt) {}
61bf215546Sopenharmony_ci
62bf215546Sopenharmony_civalue * expr_handler::get_const(const literal &l) {
63bf215546Sopenharmony_ci	value *v = sh.get_const_value(l);
64bf215546Sopenharmony_ci	if (!v->gvn_source)
65bf215546Sopenharmony_ci		vt.add_value(v);
66bf215546Sopenharmony_ci	return v;
67bf215546Sopenharmony_ci}
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_civoid expr_handler::assign_source(value *dst, value *src) {
70bf215546Sopenharmony_ci	dst->gvn_source = src->gvn_source;
71bf215546Sopenharmony_ci}
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_cibool expr_handler::equal(value *l, value *r) {
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci	assert(l != r);
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci	if (l->is_lds_access() || r->is_lds_access())
78bf215546Sopenharmony_ci		return false;
79bf215546Sopenharmony_ci	if (l->gvalue() == r->gvalue())
80bf215546Sopenharmony_ci		return true;
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci	if (l->def && r->def)
83bf215546Sopenharmony_ci		return defs_equal(l, r);
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci	if (l->is_rel() && r->is_rel())
86bf215546Sopenharmony_ci		return ivars_equal(l, r);
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci	return false;
89bf215546Sopenharmony_ci}
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_cibool expr_handler::ivars_equal(value* l, value* r) {
92bf215546Sopenharmony_ci	if (l->rel->gvalue() == r->rel->gvalue()
93bf215546Sopenharmony_ci			&& l->select == r->select) {
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_ci		vvec &lv = l->mdef.empty() ? l->muse : l->mdef;
96bf215546Sopenharmony_ci		vvec &rv = r->mdef.empty() ? r->muse : r->mdef;
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci		// FIXME: replace this with more precise aliasing test
99bf215546Sopenharmony_ci		return lv == rv;
100bf215546Sopenharmony_ci	}
101bf215546Sopenharmony_ci	return false;
102bf215546Sopenharmony_ci}
103bf215546Sopenharmony_ci
104bf215546Sopenharmony_cibool expr_handler::defs_equal(value* l, value* r) {
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci	node *d1 = l->def;
107bf215546Sopenharmony_ci	node *d2 = r->def;
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci	if (d1->type != d2->type || d1->subtype != d2->subtype)
110bf215546Sopenharmony_ci		return false;
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci	if (d1->is_pred_set() || d2->is_pred_set())
113bf215546Sopenharmony_ci		return false;
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci	if (d1->type == NT_OP) {
116bf215546Sopenharmony_ci		switch (d1->subtype) {
117bf215546Sopenharmony_ci		case NST_ALU_INST:
118bf215546Sopenharmony_ci			return ops_equal(
119bf215546Sopenharmony_ci					static_cast<alu_node*>(d1),
120bf215546Sopenharmony_ci					static_cast<alu_node*>(d2));
121bf215546Sopenharmony_ci//		case NST_FETCH_INST: return ops_equal(static_cast<fetch_node*>(d1),
122bf215546Sopenharmony_ci//			static_cast<fetch_node*>(d2);
123bf215546Sopenharmony_ci//		case NST_CF_INST: return ops_equal(static_cast<cf_node*>(d1),
124bf215546Sopenharmony_ci//			static_cast<cf_node*>(d2);
125bf215546Sopenharmony_ci		default:
126bf215546Sopenharmony_ci			break;
127bf215546Sopenharmony_ci		}
128bf215546Sopenharmony_ci	}
129bf215546Sopenharmony_ci	return false;
130bf215546Sopenharmony_ci}
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_cibool expr_handler::try_fold(value* v) {
133bf215546Sopenharmony_ci	assert(!v->gvn_source);
134bf215546Sopenharmony_ci
135bf215546Sopenharmony_ci	if (v->def)
136bf215546Sopenharmony_ci		try_fold(v->def);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci	if (v->gvn_source)
139bf215546Sopenharmony_ci		return true;
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci	return false;
142bf215546Sopenharmony_ci}
143bf215546Sopenharmony_ci
144bf215546Sopenharmony_cibool expr_handler::try_fold(node* n) {
145bf215546Sopenharmony_ci	return n->fold_dispatch(this);
146bf215546Sopenharmony_ci}
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_cibool expr_handler::fold(node& n) {
149bf215546Sopenharmony_ci	if (n.subtype == NST_PHI) {
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ci		value *s = n.src[0];
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci		// FIXME disabling phi folding for registers for now, otherwise we lose
154bf215546Sopenharmony_ci		// control flow information in some cases
155bf215546Sopenharmony_ci		// (GCM fails on tests/shaders/glsl-fs-if-nested-loop.shader_test)
156bf215546Sopenharmony_ci		// probably control flow transformation is required to enable it
157bf215546Sopenharmony_ci		if (s->is_sgpr())
158bf215546Sopenharmony_ci			return false;
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci		for(vvec::iterator I = n.src.begin() + 1, E = n.src.end(); I != E; ++I) {
161bf215546Sopenharmony_ci			value *v = *I;
162bf215546Sopenharmony_ci			if (!s->v_equal(v))
163bf215546Sopenharmony_ci				return false;
164bf215546Sopenharmony_ci		}
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci		assign_source(n.dst[0], s);
167bf215546Sopenharmony_ci	} else {
168bf215546Sopenharmony_ci		assert(n.subtype == NST_PSI);
169bf215546Sopenharmony_ci		assert(n.src.size() >= 6);
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci		value *s = n.src[2];
172bf215546Sopenharmony_ci		assert(s->gvn_source);
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci		for(vvec::iterator I = n.src.begin() + 3, E = n.src.end(); I != E; I += 3) {
175bf215546Sopenharmony_ci			value *v = *(I+2);
176bf215546Sopenharmony_ci			if (!s->v_equal(v))
177bf215546Sopenharmony_ci				return false;
178bf215546Sopenharmony_ci		}
179bf215546Sopenharmony_ci		assign_source(n.dst[0], s);
180bf215546Sopenharmony_ci	}
181bf215546Sopenharmony_ci	return true;
182bf215546Sopenharmony_ci}
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_cibool expr_handler::fold(container_node& n) {
185bf215546Sopenharmony_ci	return false;
186bf215546Sopenharmony_ci}
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_cibool expr_handler::fold_setcc(alu_node &n) {
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci	value* v0 = n.src[0]->gvalue();
191bf215546Sopenharmony_ci	value* v1 = n.src[1]->gvalue();
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci	assert(v0 && v1 && n.dst[0]);
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci	unsigned flags = n.bc.op_ptr->flags;
196bf215546Sopenharmony_ci	unsigned cc = flags & AF_CC_MASK;
197bf215546Sopenharmony_ci	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
198bf215546Sopenharmony_ci	unsigned dst_type = flags & AF_DST_TYPE_MASK;
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci	bool cond_result;
201bf215546Sopenharmony_ci	bool have_result = false;
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci	bool isc0 = v0->is_const();
204bf215546Sopenharmony_ci	bool isc1 = v1->is_const();
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci	literal dv, cv0, cv1;
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ci	if (isc0) {
209bf215546Sopenharmony_ci		cv0 = v0->get_const_value();
210bf215546Sopenharmony_ci		apply_alu_src_mod(n.bc, 0, cv0);
211bf215546Sopenharmony_ci	}
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci	if (isc1) {
214bf215546Sopenharmony_ci		cv1 = v1->get_const_value();
215bf215546Sopenharmony_ci		apply_alu_src_mod(n.bc, 1, cv1);
216bf215546Sopenharmony_ci	}
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci	if (isc0 && isc1) {
219bf215546Sopenharmony_ci		cond_result = evaluate_condition(flags, cv0, cv1);
220bf215546Sopenharmony_ci		have_result = true;
221bf215546Sopenharmony_ci	} else if (isc1) {
222bf215546Sopenharmony_ci		if (cmp_type == AF_FLOAT_CMP) {
223bf215546Sopenharmony_ci			if (n.bc.src[0].abs && !n.bc.src[0].neg) {
224bf215546Sopenharmony_ci				if (cv1.f < 0.0f && cc == AF_CC_NE) {
225bf215546Sopenharmony_ci					cond_result = true;
226bf215546Sopenharmony_ci					have_result = true;
227bf215546Sopenharmony_ci				}
228bf215546Sopenharmony_ci			} else if (n.bc.src[0].abs && n.bc.src[0].neg) {
229bf215546Sopenharmony_ci				if (cv1.f > 0.0f && cc == AF_CC_E) {
230bf215546Sopenharmony_ci					cond_result = false;
231bf215546Sopenharmony_ci					have_result = true;
232bf215546Sopenharmony_ci				}
233bf215546Sopenharmony_ci			}
234bf215546Sopenharmony_ci		} else if (cmp_type == AF_UINT_CMP && cv1.u == 0 && cc == AF_CC_GE) {
235bf215546Sopenharmony_ci			cond_result = true;
236bf215546Sopenharmony_ci			have_result = true;
237bf215546Sopenharmony_ci		}
238bf215546Sopenharmony_ci	} else if (isc0) {
239bf215546Sopenharmony_ci		if (cmp_type == AF_FLOAT_CMP) {
240bf215546Sopenharmony_ci			if (n.bc.src[1].abs && !n.bc.src[1].neg) {
241bf215546Sopenharmony_ci				if (cv0.f < 0.0f && (cc == AF_CC_E)) {
242bf215546Sopenharmony_ci					cond_result = false;
243bf215546Sopenharmony_ci					have_result = true;
244bf215546Sopenharmony_ci				}
245bf215546Sopenharmony_ci			} else if (n.bc.src[1].abs && n.bc.src[1].neg) {
246bf215546Sopenharmony_ci				if (cv0.f > 0.0f && cc == AF_CC_NE) {
247bf215546Sopenharmony_ci					cond_result = true;
248bf215546Sopenharmony_ci					have_result = true;
249bf215546Sopenharmony_ci				}
250bf215546Sopenharmony_ci			}
251bf215546Sopenharmony_ci		} else if (cmp_type == AF_UINT_CMP && cv0.u == 0 && cc == AF_CC_GT) {
252bf215546Sopenharmony_ci			cond_result = false;
253bf215546Sopenharmony_ci			have_result = true;
254bf215546Sopenharmony_ci		}
255bf215546Sopenharmony_ci	} else if (v0 == v1) {
256bf215546Sopenharmony_ci		bc_alu_src &s0 = n.bc.src[0], &s1 = n.bc.src[1];
257bf215546Sopenharmony_ci		if (s0.abs == s1.abs && s0.neg == s1.neg && cmp_type != AF_FLOAT_CMP) {
258bf215546Sopenharmony_ci			// NOTE can't handle float comparisons here because of NaNs
259bf215546Sopenharmony_ci			cond_result = (cc == AF_CC_E || cc == AF_CC_GE);
260bf215546Sopenharmony_ci			have_result = true;
261bf215546Sopenharmony_ci		}
262bf215546Sopenharmony_ci	}
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci	if (have_result) {
265bf215546Sopenharmony_ci		literal result;
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci		if (cond_result)
268bf215546Sopenharmony_ci			result = dst_type != AF_FLOAT_DST ?
269bf215546Sopenharmony_ci					literal(0xFFFFFFFFu) : literal(1.0f);
270bf215546Sopenharmony_ci		else
271bf215546Sopenharmony_ci			result = literal(0);
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci		convert_to_mov(n, sh.get_const_value(result));
274bf215546Sopenharmony_ci		return fold_alu_op1(n);
275bf215546Sopenharmony_ci	}
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci	return false;
278bf215546Sopenharmony_ci}
279bf215546Sopenharmony_ci
280bf215546Sopenharmony_cibool expr_handler::fold(alu_node& n) {
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ci	switch (n.bc.op_ptr->src_count) {
283bf215546Sopenharmony_ci	case 1: return fold_alu_op1(n);
284bf215546Sopenharmony_ci	case 2: return fold_alu_op2(n);
285bf215546Sopenharmony_ci	case 3: return fold_alu_op3(n);
286bf215546Sopenharmony_ci	default:
287bf215546Sopenharmony_ci		assert(0);
288bf215546Sopenharmony_ci	}
289bf215546Sopenharmony_ci	return false;
290bf215546Sopenharmony_ci}
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_cibool expr_handler::fold(fetch_node& n) {
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci	unsigned chan = 0;
295bf215546Sopenharmony_ci	for (vvec::iterator I = n.dst.begin(), E = n.dst.end(); I != E; ++I) {
296bf215546Sopenharmony_ci		value* &v = *I;
297bf215546Sopenharmony_ci		if (v) {
298bf215546Sopenharmony_ci			if (n.bc.dst_sel[chan] == SEL_0)
299bf215546Sopenharmony_ci				assign_source(*I, get_const(0.0f));
300bf215546Sopenharmony_ci			else if (n.bc.dst_sel[chan] == SEL_1)
301bf215546Sopenharmony_ci				assign_source(*I, get_const(1.0f));
302bf215546Sopenharmony_ci		}
303bf215546Sopenharmony_ci		++chan;
304bf215546Sopenharmony_ci	}
305bf215546Sopenharmony_ci	return false;
306bf215546Sopenharmony_ci}
307bf215546Sopenharmony_ci
308bf215546Sopenharmony_cibool expr_handler::fold(cf_node& n) {
309bf215546Sopenharmony_ci	return false;
310bf215546Sopenharmony_ci}
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_civoid expr_handler::apply_alu_src_mod(const bc_alu &bc, unsigned src,
313bf215546Sopenharmony_ci                                     literal &v) {
314bf215546Sopenharmony_ci	const bc_alu_src &s = bc.src[src];
315bf215546Sopenharmony_ci
316bf215546Sopenharmony_ci	if (s.abs)
317bf215546Sopenharmony_ci		v = fabsf(v.f);
318bf215546Sopenharmony_ci	if (s.neg)
319bf215546Sopenharmony_ci		v = -v.f;
320bf215546Sopenharmony_ci}
321bf215546Sopenharmony_ci
322bf215546Sopenharmony_civoid expr_handler::apply_alu_dst_mod(const bc_alu &bc, literal &v) {
323bf215546Sopenharmony_ci	const float omod_coeff[] = {2.0f, 4.0, 0.5f};
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci	if (bc.omod)
326bf215546Sopenharmony_ci		v = v.f * omod_coeff[bc.omod - 1];
327bf215546Sopenharmony_ci	if (bc.clamp)
328bf215546Sopenharmony_ci		v = float_clamp(v.f);
329bf215546Sopenharmony_ci}
330bf215546Sopenharmony_ci
331bf215546Sopenharmony_cibool expr_handler::args_equal(const vvec &l, const vvec &r) {
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_ci	assert(l.size() == r.size());
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci	int s = l.size();
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_ci	for (int k = 0; k < s; ++k) {
338bf215546Sopenharmony_ci		if (!l[k]->v_equal(r[k]))
339bf215546Sopenharmony_ci			return false;
340bf215546Sopenharmony_ci	}
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci	return true;
343bf215546Sopenharmony_ci}
344bf215546Sopenharmony_ci
345bf215546Sopenharmony_cibool expr_handler::ops_equal(const alu_node *l, const alu_node* r) {
346bf215546Sopenharmony_ci	const bc_alu &b0 = l->bc;
347bf215546Sopenharmony_ci	const bc_alu &b1 = r->bc;
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci	if (b0.op != b1.op)
350bf215546Sopenharmony_ci		return false;
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci	unsigned src_count = b0.op_ptr->src_count;
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci	if (b0.index_mode != b1.index_mode)
355bf215546Sopenharmony_ci		return false;
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci	if (b0.clamp != b1.clamp || b0.omod != b1.omod)
358bf215546Sopenharmony_ci			return false;
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci	for (unsigned s = 0; s < src_count; ++s) {
361bf215546Sopenharmony_ci		const bc_alu_src &s0 = b0.src[s];
362bf215546Sopenharmony_ci		const bc_alu_src &s1 = b1.src[s];
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci		if (s0.abs != s1.abs || s0.neg != s1.neg)
365bf215546Sopenharmony_ci			return false;
366bf215546Sopenharmony_ci	}
367bf215546Sopenharmony_ci	return args_equal(l->src, r->src);
368bf215546Sopenharmony_ci}
369bf215546Sopenharmony_ci
370bf215546Sopenharmony_cibool expr_handler::fold_alu_op1(alu_node& n) {
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci	assert(!n.src.empty());
373bf215546Sopenharmony_ci	if (n.src.empty())
374bf215546Sopenharmony_ci		return false;
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci	/* don't fold LDS instructions */
377bf215546Sopenharmony_ci	if (n.bc.op_ptr->flags & AF_LDS)
378bf215546Sopenharmony_ci		return false;
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci	value* v0 = n.src[0]->gvalue();
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci	if (v0->is_lds_oq() || v0->is_lds_access())
383bf215546Sopenharmony_ci		return false;
384bf215546Sopenharmony_ci	assert(v0 && n.dst[0]);
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci	if (!v0->is_const()) {
387bf215546Sopenharmony_ci		// handle (MOV -(MOV -x)) => (MOV x)
388bf215546Sopenharmony_ci		if (n.bc.op == ALU_OP1_MOV && n.bc.src[0].neg && !n.bc.src[1].abs
389bf215546Sopenharmony_ci				&& v0->def && v0->def->is_alu_op(ALU_OP1_MOV)) {
390bf215546Sopenharmony_ci			alu_node *sd = static_cast<alu_node*>(v0->def);
391bf215546Sopenharmony_ci			if (!sd->bc.clamp && !sd->bc.omod && !sd->bc.src[0].abs &&
392bf215546Sopenharmony_ci					sd->bc.src[0].neg) {
393bf215546Sopenharmony_ci				n.src[0] = sd->src[0];
394bf215546Sopenharmony_ci				n.bc.src[0].neg = 0;
395bf215546Sopenharmony_ci				v0 = n.src[0]->gvalue();
396bf215546Sopenharmony_ci			}
397bf215546Sopenharmony_ci		}
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci		if ((n.bc.op == ALU_OP1_MOV || n.bc.op == ALU_OP1_MOVA_INT ||
400bf215546Sopenharmony_ci				n.bc.op == ALU_OP1_MOVA_GPR_INT)
401bf215546Sopenharmony_ci				&& n.bc.clamp == 0 && n.bc.omod == 0
402bf215546Sopenharmony_ci				&& n.bc.src[0].abs == 0 && n.bc.src[0].neg == 0 &&
403bf215546Sopenharmony_ci				n.src.size() == 1 /* RIM/SIM can be appended as additional values */
404bf215546Sopenharmony_ci				&& n.dst[0]->no_reladdr_conflict_with(v0)) {
405bf215546Sopenharmony_ci			assign_source(n.dst[0], v0);
406bf215546Sopenharmony_ci			return true;
407bf215546Sopenharmony_ci		}
408bf215546Sopenharmony_ci		return false;
409bf215546Sopenharmony_ci	}
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci	literal dv, cv = v0->get_const_value();
412bf215546Sopenharmony_ci	apply_alu_src_mod(n.bc, 0, cv);
413bf215546Sopenharmony_ci
414bf215546Sopenharmony_ci	switch (n.bc.op) {
415bf215546Sopenharmony_ci	case ALU_OP1_CEIL: dv = ceilf(cv.f); break;
416bf215546Sopenharmony_ci	case ALU_OP1_COS: dv = cos(cv.f * 2.0f * M_PI); break;
417bf215546Sopenharmony_ci	case ALU_OP1_EXP_IEEE: dv = exp2f(cv.f); break;
418bf215546Sopenharmony_ci	case ALU_OP1_FLOOR: dv = floorf(cv.f); break;
419bf215546Sopenharmony_ci	case ALU_OP1_FLT_TO_INT: dv = (int)cv.f; break; // FIXME: round modes ????
420bf215546Sopenharmony_ci	case ALU_OP1_FLT_TO_INT_FLOOR: dv = (int32_t)floorf(cv.f); break;
421bf215546Sopenharmony_ci	case ALU_OP1_FLT_TO_INT_RPI: dv = (int32_t)floorf(cv.f + 0.5f); break;
422bf215546Sopenharmony_ci	case ALU_OP1_FLT_TO_INT_TRUNC: dv = (int32_t)truncf(cv.f); break;
423bf215546Sopenharmony_ci	case ALU_OP1_FLT_TO_UINT: dv = (uint32_t)cv.f; break;
424bf215546Sopenharmony_ci	case ALU_OP1_FRACT: dv = cv.f - floorf(cv.f); break;
425bf215546Sopenharmony_ci	case ALU_OP1_INT_TO_FLT: dv = (float)cv.i; break;
426bf215546Sopenharmony_ci	case ALU_OP1_LOG_CLAMPED:
427bf215546Sopenharmony_ci	case ALU_OP1_LOG_IEEE:
428bf215546Sopenharmony_ci		if (cv.f != 0.0f)
429bf215546Sopenharmony_ci			dv = log2f(cv.f);
430bf215546Sopenharmony_ci		else
431bf215546Sopenharmony_ci			// don't fold to NAN, let the GPU handle it for now
432bf215546Sopenharmony_ci			// (prevents degenerate LIT tests from failing)
433bf215546Sopenharmony_ci			return false;
434bf215546Sopenharmony_ci		break;
435bf215546Sopenharmony_ci	case ALU_OP1_MOV: dv = cv; break;
436bf215546Sopenharmony_ci	case ALU_OP1_MOVA_INT: dv = cv; break; // FIXME ???
437bf215546Sopenharmony_ci//	case ALU_OP1_MOVA_FLOOR: dv = (int32_t)floor(cv.f); break;
438bf215546Sopenharmony_ci//	case ALU_OP1_MOVA_GPR_INT:
439bf215546Sopenharmony_ci	case ALU_OP1_NOT_INT: dv = ~cv.i; break;
440bf215546Sopenharmony_ci	case ALU_OP1_PRED_SET_INV:
441bf215546Sopenharmony_ci		dv = cv.f == 0.0f ? 1.0f : (cv.f == 1.0f ? 0.0f : cv.f); break;
442bf215546Sopenharmony_ci	case ALU_OP1_PRED_SET_RESTORE: dv = cv; break;
443bf215546Sopenharmony_ci	case ALU_OP1_RECIPSQRT_CLAMPED:
444bf215546Sopenharmony_ci	case ALU_OP1_RECIPSQRT_FF:
445bf215546Sopenharmony_ci	case ALU_OP1_RECIPSQRT_IEEE: dv = 1.0f / sqrtf(cv.f); break;
446bf215546Sopenharmony_ci	case ALU_OP1_RECIP_CLAMPED:
447bf215546Sopenharmony_ci	case ALU_OP1_RECIP_FF:
448bf215546Sopenharmony_ci	case ALU_OP1_RECIP_IEEE: dv = 1.0f / cv.f; break;
449bf215546Sopenharmony_ci//	case ALU_OP1_RECIP_INT:
450bf215546Sopenharmony_ci	case ALU_OP1_RECIP_UINT: {
451bf215546Sopenharmony_ci		if (!cv.u)
452bf215546Sopenharmony_ci			return false;
453bf215546Sopenharmony_ci		dv.u = (1ull << 32) / cv.u;
454bf215546Sopenharmony_ci		break;
455bf215546Sopenharmony_ci	}
456bf215546Sopenharmony_ci	//	case ALU_OP1_RNDNE: dv = floor(cv.f + 0.5f); break;
457bf215546Sopenharmony_ci	case ALU_OP1_SIN: dv = sin(cv.f * 2.0f * M_PI); break;
458bf215546Sopenharmony_ci	case ALU_OP1_SQRT_IEEE: dv = sqrtf(cv.f); break;
459bf215546Sopenharmony_ci	case ALU_OP1_TRUNC: dv = truncf(cv.f); break;
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci	default:
462bf215546Sopenharmony_ci		return false;
463bf215546Sopenharmony_ci	}
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci	apply_alu_dst_mod(n.bc, dv);
466bf215546Sopenharmony_ci	assign_source(n.dst[0], get_const(dv));
467bf215546Sopenharmony_ci	return true;
468bf215546Sopenharmony_ci}
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_cibool expr_handler::fold_mul_add(alu_node *n) {
471bf215546Sopenharmony_ci
472bf215546Sopenharmony_ci	bool ieee;
473bf215546Sopenharmony_ci	value* v0 = n->src[0]->gvalue();
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_ci	alu_node *d0 = (v0->def && v0->def->is_alu_inst()) ?
476bf215546Sopenharmony_ci			static_cast<alu_node*>(v0->def) : NULL;
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci	if (d0) {
479bf215546Sopenharmony_ci		if (d0->is_alu_op(ALU_OP2_MUL_IEEE))
480bf215546Sopenharmony_ci			ieee = true;
481bf215546Sopenharmony_ci		else if (d0->is_alu_op(ALU_OP2_MUL))
482bf215546Sopenharmony_ci			ieee = false;
483bf215546Sopenharmony_ci		else
484bf215546Sopenharmony_ci			return false;
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci		if (!d0->bc.src[0].abs && !d0->bc.src[1].abs &&
487bf215546Sopenharmony_ci				!n->bc.src[1].abs && !n->bc.src[0].abs && !d0->bc.omod &&
488bf215546Sopenharmony_ci				!d0->bc.clamp && !n->bc.omod &&
489bf215546Sopenharmony_ci				(!d0->src[0]->is_kcache() || !d0->src[1]->is_kcache() ||
490bf215546Sopenharmony_ci						!n->src[1]->is_kcache())) {
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci			bool mul_neg = n->bc.src[0].neg;
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci			n->src.resize(3);
495bf215546Sopenharmony_ci			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
496bf215546Sopenharmony_ci			n->src[2] = n->src[1];
497bf215546Sopenharmony_ci			n->bc.src[2] = n->bc.src[1];
498bf215546Sopenharmony_ci			n->src[0] = d0->src[0];
499bf215546Sopenharmony_ci			n->bc.src[0] = d0->bc.src[0];
500bf215546Sopenharmony_ci			n->src[1] = d0->src[1];
501bf215546Sopenharmony_ci			n->bc.src[1] = d0->bc.src[1];
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_ci			n->bc.src[0].neg ^= mul_neg;
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci			fold_alu_op3(*n);
506bf215546Sopenharmony_ci			return true;
507bf215546Sopenharmony_ci		}
508bf215546Sopenharmony_ci	}
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci	value* v1 = n->src[1]->gvalue();
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_ci	alu_node *d1 = (v1->def && v1->def->is_alu_inst()) ?
513bf215546Sopenharmony_ci			static_cast<alu_node*>(v1->def) : NULL;
514bf215546Sopenharmony_ci
515bf215546Sopenharmony_ci	if (d1) {
516bf215546Sopenharmony_ci		if (d1->is_alu_op(ALU_OP2_MUL_IEEE))
517bf215546Sopenharmony_ci			ieee = true;
518bf215546Sopenharmony_ci		else if (d1->is_alu_op(ALU_OP2_MUL))
519bf215546Sopenharmony_ci			ieee = false;
520bf215546Sopenharmony_ci		else
521bf215546Sopenharmony_ci			return false;
522bf215546Sopenharmony_ci
523bf215546Sopenharmony_ci		if (!d1->bc.src[1].abs && !d1->bc.src[0].abs &&
524bf215546Sopenharmony_ci				!n->bc.src[0].abs && !n->bc.src[1].abs && !d1->bc.omod &&
525bf215546Sopenharmony_ci				!d1->bc.clamp && !n->bc.omod &&
526bf215546Sopenharmony_ci				(!d1->src[0]->is_kcache() || !d1->src[1]->is_kcache() ||
527bf215546Sopenharmony_ci						!n->src[0]->is_kcache())) {
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_ci			bool mul_neg = n->bc.src[1].neg;
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci			n->src.resize(3);
532bf215546Sopenharmony_ci			n->bc.set_op(ieee ? ALU_OP3_MULADD_IEEE : ALU_OP3_MULADD);
533bf215546Sopenharmony_ci			n->src[2] = n->src[0];
534bf215546Sopenharmony_ci			n->bc.src[2] = n->bc.src[0];
535bf215546Sopenharmony_ci			n->src[1] = d1->src[1];
536bf215546Sopenharmony_ci			n->bc.src[1] = d1->bc.src[1];
537bf215546Sopenharmony_ci			n->src[0] = d1->src[0];
538bf215546Sopenharmony_ci			n->bc.src[0] = d1->bc.src[0];
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci			n->bc.src[1].neg ^= mul_neg;
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci			fold_alu_op3(*n);
543bf215546Sopenharmony_ci			return true;
544bf215546Sopenharmony_ci		}
545bf215546Sopenharmony_ci	}
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci	return false;
548bf215546Sopenharmony_ci}
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_cibool expr_handler::eval_const_op(unsigned op, literal &r,
551bf215546Sopenharmony_ci                                 literal cv0, literal cv1) {
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci	switch (op) {
554bf215546Sopenharmony_ci	case ALU_OP2_ADD: r = cv0.f + cv1.f; break;
555bf215546Sopenharmony_ci	case ALU_OP2_ADDC_UINT:
556bf215546Sopenharmony_ci		r = (uint32_t)(((uint64_t)cv0.u + cv1.u)>>32); break;
557bf215546Sopenharmony_ci	case ALU_OP2_ADD_INT: r = cv0.i + cv1.i; break;
558bf215546Sopenharmony_ci	case ALU_OP2_AND_INT: r = cv0.i & cv1.i; break;
559bf215546Sopenharmony_ci	case ALU_OP2_ASHR_INT: r = cv0.i >> (cv1.i & 0x1F); break;
560bf215546Sopenharmony_ci	case ALU_OP2_BFM_INT:
561bf215546Sopenharmony_ci		r = (((1 << (cv0.i & 0x1F)) - 1) << (cv1.i & 0x1F)); break;
562bf215546Sopenharmony_ci	case ALU_OP2_LSHL_INT: r = cv0.i << cv1.i; break;
563bf215546Sopenharmony_ci	case ALU_OP2_LSHR_INT: r = cv0.u >> cv1.u; break;
564bf215546Sopenharmony_ci	case ALU_OP2_MAX:
565bf215546Sopenharmony_ci	case ALU_OP2_MAX_DX10: r = cv0.f > cv1.f ? cv0.f : cv1.f; break;
566bf215546Sopenharmony_ci	case ALU_OP2_MAX_INT: r = cv0.i > cv1.i ? cv0.i : cv1.i; break;
567bf215546Sopenharmony_ci	case ALU_OP2_MAX_UINT: r = cv0.u > cv1.u ? cv0.u : cv1.u; break;
568bf215546Sopenharmony_ci	case ALU_OP2_MIN:
569bf215546Sopenharmony_ci	case ALU_OP2_MIN_DX10: r = cv0.f < cv1.f ? cv0.f : cv1.f; break;
570bf215546Sopenharmony_ci	case ALU_OP2_MIN_INT: r = cv0.i < cv1.i ? cv0.i : cv1.i; break;
571bf215546Sopenharmony_ci	case ALU_OP2_MIN_UINT: r = cv0.u < cv1.u ? cv0.u : cv1.u; break;
572bf215546Sopenharmony_ci	case ALU_OP2_MUL:
573bf215546Sopenharmony_ci	case ALU_OP2_MUL_IEEE: r = cv0.f * cv1.f; break;
574bf215546Sopenharmony_ci	case ALU_OP2_MULHI_INT:
575bf215546Sopenharmony_ci		r = (int32_t)(((int64_t)cv0.u * cv1.u)>>32); break;
576bf215546Sopenharmony_ci	case ALU_OP2_MULHI_UINT:
577bf215546Sopenharmony_ci		r = (uint32_t)(((uint64_t)cv0.u * cv1.u)>>32); break;
578bf215546Sopenharmony_ci	case ALU_OP2_MULLO_INT:
579bf215546Sopenharmony_ci		r = (int32_t)(((int64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
580bf215546Sopenharmony_ci	case ALU_OP2_MULLO_UINT:
581bf215546Sopenharmony_ci		r = (uint32_t)(((uint64_t)cv0.u * cv1.u) & 0xFFFFFFFF); break;
582bf215546Sopenharmony_ci	case ALU_OP2_OR_INT: r = cv0.i | cv1.i; break;
583bf215546Sopenharmony_ci	case ALU_OP2_SUB_INT: r = cv0.i - cv1.i; break;
584bf215546Sopenharmony_ci	case ALU_OP2_XOR_INT: r = cv0.i ^ cv1.i; break;
585bf215546Sopenharmony_ci
586bf215546Sopenharmony_ci	default:
587bf215546Sopenharmony_ci		return false;
588bf215546Sopenharmony_ci	}
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_ci	return true;
591bf215546Sopenharmony_ci}
592bf215546Sopenharmony_ci
593bf215546Sopenharmony_ci// fold the chain of associative ops, e.g. (ADD 2, (ADD x, 3)) => (ADD x, 5)
594bf215546Sopenharmony_cibool expr_handler::fold_assoc(alu_node *n) {
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci	alu_node *a = n;
597bf215546Sopenharmony_ci	literal cr;
598bf215546Sopenharmony_ci
599bf215546Sopenharmony_ci	int last_arg = -3;
600bf215546Sopenharmony_ci
601bf215546Sopenharmony_ci	unsigned op = n->bc.op;
602bf215546Sopenharmony_ci	bool allow_neg = false, cur_neg = false;
603bf215546Sopenharmony_ci	bool distribute_neg = false;
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci	switch(op) {
606bf215546Sopenharmony_ci	case ALU_OP2_ADD:
607bf215546Sopenharmony_ci		distribute_neg = true;
608bf215546Sopenharmony_ci		allow_neg = true;
609bf215546Sopenharmony_ci		break;
610bf215546Sopenharmony_ci	case ALU_OP2_MUL:
611bf215546Sopenharmony_ci	case ALU_OP2_MUL_IEEE:
612bf215546Sopenharmony_ci		allow_neg = true;
613bf215546Sopenharmony_ci		break;
614bf215546Sopenharmony_ci	case ALU_OP3_MULADD:
615bf215546Sopenharmony_ci		allow_neg = true;
616bf215546Sopenharmony_ci		op = ALU_OP2_MUL;
617bf215546Sopenharmony_ci		break;
618bf215546Sopenharmony_ci	case ALU_OP3_MULADD_IEEE:
619bf215546Sopenharmony_ci		allow_neg = true;
620bf215546Sopenharmony_ci		op = ALU_OP2_MUL_IEEE;
621bf215546Sopenharmony_ci		break;
622bf215546Sopenharmony_ci	default:
623bf215546Sopenharmony_ci		if (n->bc.op_ptr->src_count != 2)
624bf215546Sopenharmony_ci			return false;
625bf215546Sopenharmony_ci	}
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_ci	// check if we can evaluate the op
628bf215546Sopenharmony_ci	if (!eval_const_op(op, cr, literal(0), literal(0)))
629bf215546Sopenharmony_ci		return false;
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_ci	while (true) {
632bf215546Sopenharmony_ci
633bf215546Sopenharmony_ci		value *v0 = a->src[0]->gvalue();
634bf215546Sopenharmony_ci		value *v1 = a->src[1]->gvalue();
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci		last_arg = -2;
637bf215546Sopenharmony_ci
638bf215546Sopenharmony_ci		if (v1->is_const()) {
639bf215546Sopenharmony_ci			literal arg = v1->get_const_value();
640bf215546Sopenharmony_ci			apply_alu_src_mod(a->bc, 1, arg);
641bf215546Sopenharmony_ci			if (cur_neg && distribute_neg)
642bf215546Sopenharmony_ci				arg.f = -arg.f;
643bf215546Sopenharmony_ci
644bf215546Sopenharmony_ci			if (a == n)
645bf215546Sopenharmony_ci				cr = arg;
646bf215546Sopenharmony_ci			else
647bf215546Sopenharmony_ci				eval_const_op(op, cr, cr, arg);
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci			if (v0->def) {
650bf215546Sopenharmony_ci				alu_node *d0 = static_cast<alu_node*>(v0->def);
651bf215546Sopenharmony_ci				if ((d0->is_alu_op(op) ||
652bf215546Sopenharmony_ci						(op == ALU_OP2_MUL_IEEE &&
653bf215546Sopenharmony_ci								d0->is_alu_op(ALU_OP2_MUL))) &&
654bf215546Sopenharmony_ci						!d0->bc.omod && !d0->bc.clamp &&
655bf215546Sopenharmony_ci						!a->bc.src[0].abs &&
656bf215546Sopenharmony_ci						(!a->bc.src[0].neg || allow_neg)) {
657bf215546Sopenharmony_ci					cur_neg ^= a->bc.src[0].neg;
658bf215546Sopenharmony_ci					a = d0;
659bf215546Sopenharmony_ci					continue;
660bf215546Sopenharmony_ci				}
661bf215546Sopenharmony_ci			}
662bf215546Sopenharmony_ci			last_arg = 0;
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci		}
665bf215546Sopenharmony_ci
666bf215546Sopenharmony_ci		if (v0->is_const()) {
667bf215546Sopenharmony_ci			literal arg = v0->get_const_value();
668bf215546Sopenharmony_ci			apply_alu_src_mod(a->bc, 0, arg);
669bf215546Sopenharmony_ci			if (cur_neg && distribute_neg)
670bf215546Sopenharmony_ci				arg.f = -arg.f;
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci			if (last_arg == 0) {
673bf215546Sopenharmony_ci				eval_const_op(op, cr, cr, arg);
674bf215546Sopenharmony_ci				last_arg = -1;
675bf215546Sopenharmony_ci				break;
676bf215546Sopenharmony_ci			}
677bf215546Sopenharmony_ci
678bf215546Sopenharmony_ci			if (a == n)
679bf215546Sopenharmony_ci				cr = arg;
680bf215546Sopenharmony_ci			else
681bf215546Sopenharmony_ci				eval_const_op(op, cr, cr, arg);
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci			if (v1->def) {
684bf215546Sopenharmony_ci				alu_node *d1 = static_cast<alu_node*>(v1->def);
685bf215546Sopenharmony_ci				if ((d1->is_alu_op(op) ||
686bf215546Sopenharmony_ci						(op == ALU_OP2_MUL_IEEE &&
687bf215546Sopenharmony_ci								d1->is_alu_op(ALU_OP2_MUL))) &&
688bf215546Sopenharmony_ci						!d1->bc.omod && !d1->bc.clamp &&
689bf215546Sopenharmony_ci						!a->bc.src[1].abs &&
690bf215546Sopenharmony_ci						(!a->bc.src[1].neg || allow_neg)) {
691bf215546Sopenharmony_ci					cur_neg ^= a->bc.src[1].neg;
692bf215546Sopenharmony_ci					a = d1;
693bf215546Sopenharmony_ci					continue;
694bf215546Sopenharmony_ci				}
695bf215546Sopenharmony_ci			}
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_ci			last_arg = 1;
698bf215546Sopenharmony_ci		}
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci		break;
701bf215546Sopenharmony_ci	};
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci	if (last_arg == -1) {
704bf215546Sopenharmony_ci		// result is const
705bf215546Sopenharmony_ci		apply_alu_dst_mod(n->bc, cr);
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci		if (n->bc.op == op) {
708bf215546Sopenharmony_ci			convert_to_mov(*n, sh.get_const_value(cr));
709bf215546Sopenharmony_ci			fold_alu_op1(*n);
710bf215546Sopenharmony_ci			return true;
711bf215546Sopenharmony_ci		} else { // MULADD => ADD
712bf215546Sopenharmony_ci			n->src[0] = n->src[2];
713bf215546Sopenharmony_ci			n->bc.src[0] = n->bc.src[2];
714bf215546Sopenharmony_ci			n->src[1] = sh.get_const_value(cr);
715bf215546Sopenharmony_ci			n->bc.src[1].clear();
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci			n->src.resize(2);
718bf215546Sopenharmony_ci			n->bc.set_op(ALU_OP2_ADD);
719bf215546Sopenharmony_ci		}
720bf215546Sopenharmony_ci	} else if (last_arg >= 0) {
721bf215546Sopenharmony_ci		n->src[0] = a->src[last_arg];
722bf215546Sopenharmony_ci		n->bc.src[0] = a->bc.src[last_arg];
723bf215546Sopenharmony_ci		n->bc.src[0].neg ^= cur_neg;
724bf215546Sopenharmony_ci		n->src[1] = sh.get_const_value(cr);
725bf215546Sopenharmony_ci		n->bc.src[1].clear();
726bf215546Sopenharmony_ci	}
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci	return false;
729bf215546Sopenharmony_ci}
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_cibool expr_handler::fold_alu_op2(alu_node& n) {
732bf215546Sopenharmony_ci
733bf215546Sopenharmony_ci	if (n.src.size() < 2)
734bf215546Sopenharmony_ci		return false;
735bf215546Sopenharmony_ci
736bf215546Sopenharmony_ci	unsigned flags = n.bc.op_ptr->flags;
737bf215546Sopenharmony_ci
738bf215546Sopenharmony_ci	if (flags & AF_SET) {
739bf215546Sopenharmony_ci		return fold_setcc(n);
740bf215546Sopenharmony_ci	}
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci	if (!sh.safe_math && (flags & AF_M_ASSOC)) {
743bf215546Sopenharmony_ci		if (fold_assoc(&n))
744bf215546Sopenharmony_ci			return true;
745bf215546Sopenharmony_ci	}
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci	value* v0 = n.src[0]->gvalue();
748bf215546Sopenharmony_ci	value* v1 = n.src[1]->gvalue();
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci	assert(v0 && v1);
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci	// handle some operations with equal args, e.g. x + x => x * 2
753bf215546Sopenharmony_ci	if (v0 == v1) {
754bf215546Sopenharmony_ci		if (n.bc.src[0].neg == n.bc.src[1].neg &&
755bf215546Sopenharmony_ci				n.bc.src[0].abs == n.bc.src[1].abs) {
756bf215546Sopenharmony_ci			switch (n.bc.op) {
757bf215546Sopenharmony_ci			case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
758bf215546Sopenharmony_ci			case ALU_OP2_MIN_DX10:
759bf215546Sopenharmony_ci			case ALU_OP2_MAX:
760bf215546Sopenharmony_ci			case ALU_OP2_MAX_DX10:
761bf215546Sopenharmony_ci				convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
762bf215546Sopenharmony_ci				return fold_alu_op1(n);
763bf215546Sopenharmony_ci			case ALU_OP2_ADD:  // (ADD x, x) => (MUL x, 2)
764bf215546Sopenharmony_ci				if (!sh.safe_math) {
765bf215546Sopenharmony_ci					n.src[1] = sh.get_const_value(2.0f);
766bf215546Sopenharmony_ci					n.bc.src[1].clear();
767bf215546Sopenharmony_ci					n.bc.set_op(ALU_OP2_MUL);
768bf215546Sopenharmony_ci					return fold_alu_op2(n);
769bf215546Sopenharmony_ci				}
770bf215546Sopenharmony_ci				break;
771bf215546Sopenharmony_ci			}
772bf215546Sopenharmony_ci		}
773bf215546Sopenharmony_ci		if (n.bc.src[0].neg != n.bc.src[1].neg &&
774bf215546Sopenharmony_ci				n.bc.src[0].abs == n.bc.src[1].abs) {
775bf215546Sopenharmony_ci			switch (n.bc.op) {
776bf215546Sopenharmony_ci			case ALU_OP2_ADD:  // (ADD x, -x) => (MOV 0)
777bf215546Sopenharmony_ci				if (!sh.safe_math) {
778bf215546Sopenharmony_ci					convert_to_mov(n, sh.get_const_value(literal(0)));
779bf215546Sopenharmony_ci					return fold_alu_op1(n);
780bf215546Sopenharmony_ci				}
781bf215546Sopenharmony_ci				break;
782bf215546Sopenharmony_ci			}
783bf215546Sopenharmony_ci		}
784bf215546Sopenharmony_ci	}
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_ci	if (n.bc.op == ALU_OP2_ADD) {
787bf215546Sopenharmony_ci		if (fold_mul_add(&n))
788bf215546Sopenharmony_ci			return true;
789bf215546Sopenharmony_ci	}
790bf215546Sopenharmony_ci
791bf215546Sopenharmony_ci	bool isc0 = v0->is_const();
792bf215546Sopenharmony_ci	bool isc1 = v1->is_const();
793bf215546Sopenharmony_ci
794bf215546Sopenharmony_ci	if (!isc0 && !isc1)
795bf215546Sopenharmony_ci		return false;
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_ci	literal dv, cv0, cv1;
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_ci	if (isc0) {
800bf215546Sopenharmony_ci		cv0 = v0->get_const_value();
801bf215546Sopenharmony_ci		apply_alu_src_mod(n.bc, 0, cv0);
802bf215546Sopenharmony_ci	}
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_ci	if (isc1) {
805bf215546Sopenharmony_ci		cv1 = v1->get_const_value();
806bf215546Sopenharmony_ci		apply_alu_src_mod(n.bc, 1, cv1);
807bf215546Sopenharmony_ci	}
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_ci	if (isc0 && isc1) {
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci		if (!eval_const_op(n.bc.op, dv, cv0, cv1))
812bf215546Sopenharmony_ci			return false;
813bf215546Sopenharmony_ci
814bf215546Sopenharmony_ci	} else { // one source is const
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci		if (isc0 && cv0 == literal(0)) {
817bf215546Sopenharmony_ci			switch (n.bc.op) {
818bf215546Sopenharmony_ci			case ALU_OP2_ADD:
819bf215546Sopenharmony_ci			case ALU_OP2_ADD_INT:
820bf215546Sopenharmony_ci			case ALU_OP2_MAX_UINT:
821bf215546Sopenharmony_ci			case ALU_OP2_OR_INT:
822bf215546Sopenharmony_ci			case ALU_OP2_XOR_INT:
823bf215546Sopenharmony_ci				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
824bf215546Sopenharmony_ci				return fold_alu_op1(n);
825bf215546Sopenharmony_ci			case ALU_OP2_AND_INT:
826bf215546Sopenharmony_ci			case ALU_OP2_ASHR_INT:
827bf215546Sopenharmony_ci			case ALU_OP2_LSHL_INT:
828bf215546Sopenharmony_ci			case ALU_OP2_LSHR_INT:
829bf215546Sopenharmony_ci			case ALU_OP2_MIN_UINT:
830bf215546Sopenharmony_ci			case ALU_OP2_MUL:
831bf215546Sopenharmony_ci			case ALU_OP2_MULHI_UINT:
832bf215546Sopenharmony_ci			case ALU_OP2_MULLO_UINT:
833bf215546Sopenharmony_ci				convert_to_mov(n, sh.get_const_value(literal(0)));
834bf215546Sopenharmony_ci				return fold_alu_op1(n);
835bf215546Sopenharmony_ci			}
836bf215546Sopenharmony_ci		} else if (isc1 && cv1 == literal(0)) {
837bf215546Sopenharmony_ci			switch (n.bc.op) {
838bf215546Sopenharmony_ci			case ALU_OP2_ADD:
839bf215546Sopenharmony_ci			case ALU_OP2_ADD_INT:
840bf215546Sopenharmony_ci			case ALU_OP2_ASHR_INT:
841bf215546Sopenharmony_ci			case ALU_OP2_LSHL_INT:
842bf215546Sopenharmony_ci			case ALU_OP2_LSHR_INT:
843bf215546Sopenharmony_ci			case ALU_OP2_MAX_UINT:
844bf215546Sopenharmony_ci			case ALU_OP2_OR_INT:
845bf215546Sopenharmony_ci			case ALU_OP2_SUB_INT:
846bf215546Sopenharmony_ci			case ALU_OP2_XOR_INT:
847bf215546Sopenharmony_ci				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
848bf215546Sopenharmony_ci				return fold_alu_op1(n);
849bf215546Sopenharmony_ci			case ALU_OP2_AND_INT:
850bf215546Sopenharmony_ci			case ALU_OP2_MIN_UINT:
851bf215546Sopenharmony_ci			case ALU_OP2_MUL:
852bf215546Sopenharmony_ci			case ALU_OP2_MULHI_UINT:
853bf215546Sopenharmony_ci			case ALU_OP2_MULLO_UINT:
854bf215546Sopenharmony_ci				convert_to_mov(n, sh.get_const_value(literal(0)));
855bf215546Sopenharmony_ci				return fold_alu_op1(n);
856bf215546Sopenharmony_ci			}
857bf215546Sopenharmony_ci		} else if (isc0 && cv0 == literal(1.0f)) {
858bf215546Sopenharmony_ci			switch (n.bc.op) {
859bf215546Sopenharmony_ci			case ALU_OP2_MUL:
860bf215546Sopenharmony_ci			case ALU_OP2_MUL_IEEE:
861bf215546Sopenharmony_ci				convert_to_mov(n, n.src[1], n.bc.src[1].neg,  n.bc.src[1].abs);
862bf215546Sopenharmony_ci				return fold_alu_op1(n);
863bf215546Sopenharmony_ci			}
864bf215546Sopenharmony_ci		} else if (isc1 && cv1 == literal(1.0f)) {
865bf215546Sopenharmony_ci			switch (n.bc.op) {
866bf215546Sopenharmony_ci			case ALU_OP2_MUL:
867bf215546Sopenharmony_ci			case ALU_OP2_MUL_IEEE:
868bf215546Sopenharmony_ci				convert_to_mov(n, n.src[0], n.bc.src[0].neg,  n.bc.src[0].abs);
869bf215546Sopenharmony_ci				return fold_alu_op1(n);
870bf215546Sopenharmony_ci			}
871bf215546Sopenharmony_ci		}
872bf215546Sopenharmony_ci
873bf215546Sopenharmony_ci		return false;
874bf215546Sopenharmony_ci	}
875bf215546Sopenharmony_ci
876bf215546Sopenharmony_ci	apply_alu_dst_mod(n.bc, dv);
877bf215546Sopenharmony_ci	assign_source(n.dst[0], get_const(dv));
878bf215546Sopenharmony_ci	return true;
879bf215546Sopenharmony_ci}
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_cibool expr_handler::evaluate_condition(unsigned alu_cnd_flags,
882bf215546Sopenharmony_ci                                      literal s1, literal s2) {
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ci	unsigned cmp_type = alu_cnd_flags & AF_CMP_TYPE_MASK;
885bf215546Sopenharmony_ci	unsigned cc = alu_cnd_flags & AF_CC_MASK;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci	switch (cmp_type) {
888bf215546Sopenharmony_ci	case AF_FLOAT_CMP: {
889bf215546Sopenharmony_ci		switch (cc) {
890bf215546Sopenharmony_ci		case AF_CC_E : return s1.f == s2.f;
891bf215546Sopenharmony_ci		case AF_CC_GT: return s1.f >  s2.f;
892bf215546Sopenharmony_ci		case AF_CC_GE: return s1.f >= s2.f;
893bf215546Sopenharmony_ci		case AF_CC_NE: return s1.f != s2.f;
894bf215546Sopenharmony_ci		case AF_CC_LT: return s1.f <  s2.f;
895bf215546Sopenharmony_ci		case AF_CC_LE: return s1.f <= s2.f;
896bf215546Sopenharmony_ci		default:
897bf215546Sopenharmony_ci			assert(!"invalid condition code");
898bf215546Sopenharmony_ci			return false;
899bf215546Sopenharmony_ci		}
900bf215546Sopenharmony_ci	}
901bf215546Sopenharmony_ci	case AF_INT_CMP: {
902bf215546Sopenharmony_ci		switch (cc) {
903bf215546Sopenharmony_ci		case AF_CC_E : return s1.i == s2.i;
904bf215546Sopenharmony_ci		case AF_CC_GT: return s1.i >  s2.i;
905bf215546Sopenharmony_ci		case AF_CC_GE: return s1.i >= s2.i;
906bf215546Sopenharmony_ci		case AF_CC_NE: return s1.i != s2.i;
907bf215546Sopenharmony_ci		case AF_CC_LT: return s1.i <  s2.i;
908bf215546Sopenharmony_ci		case AF_CC_LE: return s1.i <= s2.i;
909bf215546Sopenharmony_ci		default:
910bf215546Sopenharmony_ci			assert(!"invalid condition code");
911bf215546Sopenharmony_ci			return false;
912bf215546Sopenharmony_ci		}
913bf215546Sopenharmony_ci	}
914bf215546Sopenharmony_ci	case AF_UINT_CMP: {
915bf215546Sopenharmony_ci		switch (cc) {
916bf215546Sopenharmony_ci		case AF_CC_E : return s1.u == s2.u;
917bf215546Sopenharmony_ci		case AF_CC_GT: return s1.u >  s2.u;
918bf215546Sopenharmony_ci		case AF_CC_GE: return s1.u >= s2.u;
919bf215546Sopenharmony_ci		case AF_CC_NE: return s1.u != s2.u;
920bf215546Sopenharmony_ci		case AF_CC_LT: return s1.u <  s2.u;
921bf215546Sopenharmony_ci		case AF_CC_LE: return s1.u <= s2.u;
922bf215546Sopenharmony_ci		default:
923bf215546Sopenharmony_ci			assert(!"invalid condition code");
924bf215546Sopenharmony_ci			return false;
925bf215546Sopenharmony_ci		}
926bf215546Sopenharmony_ci	}
927bf215546Sopenharmony_ci	default:
928bf215546Sopenharmony_ci		assert(!"invalid cmp_type");
929bf215546Sopenharmony_ci		return false;
930bf215546Sopenharmony_ci	}
931bf215546Sopenharmony_ci}
932bf215546Sopenharmony_ci
933bf215546Sopenharmony_cibool expr_handler::fold_alu_op3(alu_node& n) {
934bf215546Sopenharmony_ci
935bf215546Sopenharmony_ci	if (n.src.size() < 3)
936bf215546Sopenharmony_ci		return false;
937bf215546Sopenharmony_ci
938bf215546Sopenharmony_ci	if (!sh.safe_math && (n.bc.op_ptr->flags & AF_M_ASSOC)) {
939bf215546Sopenharmony_ci		if (fold_assoc(&n))
940bf215546Sopenharmony_ci			return true;
941bf215546Sopenharmony_ci		if (n.src.size() < 3)
942bf215546Sopenharmony_ci			return fold_alu_op2(n);
943bf215546Sopenharmony_ci	}
944bf215546Sopenharmony_ci
945bf215546Sopenharmony_ci	value* v0 = n.src[0]->gvalue();
946bf215546Sopenharmony_ci	value* v1 = n.src[1]->gvalue();
947bf215546Sopenharmony_ci	value* v2 = n.src[2]->gvalue();
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci	/* LDS instructions look like op3 with no dst - don't fold. */
950bf215546Sopenharmony_ci	if (!n.dst[0])
951bf215546Sopenharmony_ci		return false;
952bf215546Sopenharmony_ci	assert(v0 && v1 && v2 && n.dst[0]);
953bf215546Sopenharmony_ci
954bf215546Sopenharmony_ci	bool isc0 = v0->is_const();
955bf215546Sopenharmony_ci	bool isc1 = v1->is_const();
956bf215546Sopenharmony_ci	bool isc2 = v2->is_const();
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci	literal dv, cv0, cv1, cv2;
959bf215546Sopenharmony_ci
960bf215546Sopenharmony_ci	if (isc0) {
961bf215546Sopenharmony_ci		cv0 = v0->get_const_value();
962bf215546Sopenharmony_ci		apply_alu_src_mod(n.bc, 0, cv0);
963bf215546Sopenharmony_ci	}
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_ci	if (isc1) {
966bf215546Sopenharmony_ci		cv1 = v1->get_const_value();
967bf215546Sopenharmony_ci		apply_alu_src_mod(n.bc, 1, cv1);
968bf215546Sopenharmony_ci	}
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_ci	if (isc2) {
971bf215546Sopenharmony_ci		cv2 = v2->get_const_value();
972bf215546Sopenharmony_ci		apply_alu_src_mod(n.bc, 2, cv2);
973bf215546Sopenharmony_ci	}
974bf215546Sopenharmony_ci
975bf215546Sopenharmony_ci	unsigned flags = n.bc.op_ptr->flags;
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_ci	if (flags & AF_CMOV) {
978bf215546Sopenharmony_ci		int src = 0;
979bf215546Sopenharmony_ci
980bf215546Sopenharmony_ci		if (v1 == v2 && n.bc.src[1].neg == n.bc.src[2].neg) {
981bf215546Sopenharmony_ci			// result doesn't depend on condition, convert to MOV
982bf215546Sopenharmony_ci			src = 1;
983bf215546Sopenharmony_ci		} else if (isc0) {
984bf215546Sopenharmony_ci			// src0 is const, condition can be evaluated, convert to MOV
985bf215546Sopenharmony_ci			bool cond = evaluate_condition(n.bc.op_ptr->flags & (AF_CC_MASK |
986bf215546Sopenharmony_ci					AF_CMP_TYPE_MASK), cv0, literal(0));
987bf215546Sopenharmony_ci			src = cond ? 1 : 2;
988bf215546Sopenharmony_ci		}
989bf215546Sopenharmony_ci
990bf215546Sopenharmony_ci		if (src) {
991bf215546Sopenharmony_ci			// if src is selected, convert to MOV
992bf215546Sopenharmony_ci			convert_to_mov(n, n.src[src], n.bc.src[src].neg);
993bf215546Sopenharmony_ci			return fold_alu_op1(n);
994bf215546Sopenharmony_ci		}
995bf215546Sopenharmony_ci	}
996bf215546Sopenharmony_ci
997bf215546Sopenharmony_ci	// handle (MULADD a, x, MUL (x, b)) => (MUL x, ADD (a, b))
998bf215546Sopenharmony_ci	if (!sh.safe_math && (n.bc.op == ALU_OP3_MULADD ||
999bf215546Sopenharmony_ci			n.bc.op == ALU_OP3_MULADD_IEEE)) {
1000bf215546Sopenharmony_ci
1001bf215546Sopenharmony_ci		unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1002bf215546Sopenharmony_ci				ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci		if (!isc2 && v2->def && v2->def->is_alu_op(op)) {
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_ci			alu_node *md = static_cast<alu_node*>(v2->def);
1007bf215546Sopenharmony_ci			value *mv0 = md->src[0]->gvalue();
1008bf215546Sopenharmony_ci			value *mv1 = md->src[1]->gvalue();
1009bf215546Sopenharmony_ci
1010bf215546Sopenharmony_ci			int es0 = -1, es1 = -1;
1011bf215546Sopenharmony_ci
1012bf215546Sopenharmony_ci			if (v0 == mv0) {
1013bf215546Sopenharmony_ci				es0 = 0;
1014bf215546Sopenharmony_ci				es1 = 0;
1015bf215546Sopenharmony_ci			} else if (v0 == mv1) {
1016bf215546Sopenharmony_ci				es0 = 0;
1017bf215546Sopenharmony_ci				es1 = 1;
1018bf215546Sopenharmony_ci			} else if (v1 == mv0) {
1019bf215546Sopenharmony_ci				es0 = 1;
1020bf215546Sopenharmony_ci				es1 = 0;
1021bf215546Sopenharmony_ci			} else if (v1 == mv1) {
1022bf215546Sopenharmony_ci				es0 = 1;
1023bf215546Sopenharmony_ci				es1 = 1;
1024bf215546Sopenharmony_ci			}
1025bf215546Sopenharmony_ci
1026bf215546Sopenharmony_ci			value *va0 = es0 == 0 ? v1 : v0;
1027bf215546Sopenharmony_ci			value *va1 = es1 == 0 ? mv1 : mv0;
1028bf215546Sopenharmony_ci
1029bf215546Sopenharmony_ci			/* Don't fold if no equal multipliers were found.
1030bf215546Sopenharmony_ci			 * Also don#t fold if the operands of the to be created ADD are both
1031bf215546Sopenharmony_ci			 * relatively accessed with different AR values because that would
1032bf215546Sopenharmony_ci			 * create impossible code.
1033bf215546Sopenharmony_ci			 */
1034bf215546Sopenharmony_ci			if (es0 != -1 &&
1035bf215546Sopenharmony_ci			    (!va0->is_rel() || !va1->is_rel() ||
1036bf215546Sopenharmony_ci			     (va0->rel == va1->rel))) {
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci				alu_node *add = sh.create_alu();
1039bf215546Sopenharmony_ci				add->bc.set_op(ALU_OP2_ADD);
1040bf215546Sopenharmony_ci
1041bf215546Sopenharmony_ci				add->dst.resize(1);
1042bf215546Sopenharmony_ci				add->src.resize(2);
1043bf215546Sopenharmony_ci
1044bf215546Sopenharmony_ci				value *t = sh.create_temp_value();
1045bf215546Sopenharmony_ci				t->def = add;
1046bf215546Sopenharmony_ci				add->dst[0] = t;
1047bf215546Sopenharmony_ci				add->src[0] = va0;
1048bf215546Sopenharmony_ci				add->src[1] = va1;
1049bf215546Sopenharmony_ci				add->bc.src[0] = n.bc.src[!es0];
1050bf215546Sopenharmony_ci				add->bc.src[1] = md->bc.src[!es1];
1051bf215546Sopenharmony_ci
1052bf215546Sopenharmony_ci				add->bc.src[1].neg ^= n.bc.src[2].neg ^
1053bf215546Sopenharmony_ci						(n.bc.src[es0].neg != md->bc.src[es1].neg);
1054bf215546Sopenharmony_ci
1055bf215546Sopenharmony_ci				n.insert_before(add);
1056bf215546Sopenharmony_ci				vt.add_value(t);
1057bf215546Sopenharmony_ci
1058bf215546Sopenharmony_ci				t = t->gvalue();
1059bf215546Sopenharmony_ci
1060bf215546Sopenharmony_ci				if (es0 == 1) {
1061bf215546Sopenharmony_ci					n.src[0] = n.src[1];
1062bf215546Sopenharmony_ci					n.bc.src[0] = n.bc.src[1];
1063bf215546Sopenharmony_ci				}
1064bf215546Sopenharmony_ci
1065bf215546Sopenharmony_ci				n.src[1] = t;
1066bf215546Sopenharmony_ci				n.bc.src[1].clear();
1067bf215546Sopenharmony_ci
1068bf215546Sopenharmony_ci				n.src.resize(2);
1069bf215546Sopenharmony_ci
1070bf215546Sopenharmony_ci				n.bc.set_op(op);
1071bf215546Sopenharmony_ci				return fold_alu_op2(n);
1072bf215546Sopenharmony_ci			}
1073bf215546Sopenharmony_ci		}
1074bf215546Sopenharmony_ci	}
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_ci	if (!isc0 && !isc1 && !isc2)
1077bf215546Sopenharmony_ci		return false;
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci	if (isc0 && isc1 && isc2) {
1080bf215546Sopenharmony_ci		switch (n.bc.op) {
1081bf215546Sopenharmony_ci		case ALU_OP3_MULADD_IEEE:
1082bf215546Sopenharmony_ci		case ALU_OP3_MULADD: dv = cv0.f * cv1.f + cv2.f; break;
1083bf215546Sopenharmony_ci
1084bf215546Sopenharmony_ci		// TODO
1085bf215546Sopenharmony_ci
1086bf215546Sopenharmony_ci		default:
1087bf215546Sopenharmony_ci			return false;
1088bf215546Sopenharmony_ci		}
1089bf215546Sopenharmony_ci	} else {
1090bf215546Sopenharmony_ci		if (isc0 && isc1) {
1091bf215546Sopenharmony_ci			switch (n.bc.op) {
1092bf215546Sopenharmony_ci			case ALU_OP3_MULADD:
1093bf215546Sopenharmony_ci			case ALU_OP3_MULADD_IEEE:
1094bf215546Sopenharmony_ci				dv = cv0.f * cv1.f;
1095bf215546Sopenharmony_ci				n.bc.set_op(ALU_OP2_ADD);
1096bf215546Sopenharmony_ci				n.src[0] = sh.get_const_value(dv);
1097bf215546Sopenharmony_ci				n.bc.src[0].clear();
1098bf215546Sopenharmony_ci				n.src[1] = n.src[2];
1099bf215546Sopenharmony_ci				n.bc.src[1] = n.bc.src[2];
1100bf215546Sopenharmony_ci				n.src.resize(2);
1101bf215546Sopenharmony_ci				return fold_alu_op2(n);
1102bf215546Sopenharmony_ci			}
1103bf215546Sopenharmony_ci		}
1104bf215546Sopenharmony_ci
1105bf215546Sopenharmony_ci		if (n.bc.op == ALU_OP3_MULADD) {
1106bf215546Sopenharmony_ci			if ((isc0 && cv0 == literal(0)) || (isc1 && cv1 == literal(0))) {
1107bf215546Sopenharmony_ci				convert_to_mov(n, n.src[2], n.bc.src[2].neg,  n.bc.src[2].abs);
1108bf215546Sopenharmony_ci				return fold_alu_op1(n);
1109bf215546Sopenharmony_ci			}
1110bf215546Sopenharmony_ci		}
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci		if (n.bc.op == ALU_OP3_MULADD || n.bc.op == ALU_OP3_MULADD_IEEE) {
1113bf215546Sopenharmony_ci			unsigned op = n.bc.op == ALU_OP3_MULADD_IEEE ?
1114bf215546Sopenharmony_ci					ALU_OP2_MUL_IEEE : ALU_OP2_MUL;
1115bf215546Sopenharmony_ci
1116bf215546Sopenharmony_ci			if (isc1 && v0 == v2) {
1117bf215546Sopenharmony_ci				cv1.f += (n.bc.src[2].neg != n.bc.src[0].neg ? -1.0f : 1.0f);
1118bf215546Sopenharmony_ci				n.src[1] = sh.get_const_value(cv1);
1119bf215546Sopenharmony_ci				n.bc.src[1].neg = 0;
1120bf215546Sopenharmony_ci				n.bc.src[1].abs = 0;
1121bf215546Sopenharmony_ci				n.bc.set_op(op);
1122bf215546Sopenharmony_ci				n.src.resize(2);
1123bf215546Sopenharmony_ci				return fold_alu_op2(n);
1124bf215546Sopenharmony_ci			} else if (isc0 && v1 == v2) {
1125bf215546Sopenharmony_ci				cv0.f += (n.bc.src[2].neg != n.bc.src[1].neg ? -1.0f : 1.0f);
1126bf215546Sopenharmony_ci				n.src[0] = sh.get_const_value(cv0);
1127bf215546Sopenharmony_ci				n.bc.src[0].neg = 0;
1128bf215546Sopenharmony_ci				n.bc.src[0].abs = 0;
1129bf215546Sopenharmony_ci				n.bc.set_op(op);
1130bf215546Sopenharmony_ci				n.src.resize(2);
1131bf215546Sopenharmony_ci				return fold_alu_op2(n);
1132bf215546Sopenharmony_ci			}
1133bf215546Sopenharmony_ci		}
1134bf215546Sopenharmony_ci
1135bf215546Sopenharmony_ci		return false;
1136bf215546Sopenharmony_ci	}
1137bf215546Sopenharmony_ci
1138bf215546Sopenharmony_ci	apply_alu_dst_mod(n.bc, dv);
1139bf215546Sopenharmony_ci	assign_source(n.dst[0], get_const(dv));
1140bf215546Sopenharmony_ci	return true;
1141bf215546Sopenharmony_ci}
1142bf215546Sopenharmony_ci
1143bf215546Sopenharmony_ciunsigned invert_setcc_condition(unsigned cc, bool &swap_args) {
1144bf215546Sopenharmony_ci	unsigned ncc = 0;
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci	switch (cc) {
1147bf215546Sopenharmony_ci	case AF_CC_E: ncc = AF_CC_NE; break;
1148bf215546Sopenharmony_ci	case AF_CC_NE: ncc = AF_CC_E; break;
1149bf215546Sopenharmony_ci	case AF_CC_GE: ncc = AF_CC_GT; swap_args = true; break;
1150bf215546Sopenharmony_ci	case AF_CC_GT: ncc = AF_CC_GE; swap_args = true; break;
1151bf215546Sopenharmony_ci	default:
1152bf215546Sopenharmony_ci		assert(!"unexpected condition code");
1153bf215546Sopenharmony_ci		break;
1154bf215546Sopenharmony_ci	}
1155bf215546Sopenharmony_ci	return ncc;
1156bf215546Sopenharmony_ci}
1157bf215546Sopenharmony_ci
1158bf215546Sopenharmony_ciunsigned get_setcc_op(unsigned cc, unsigned cmp_type, bool int_dst) {
1159bf215546Sopenharmony_ci
1160bf215546Sopenharmony_ci	if (int_dst && cmp_type == AF_FLOAT_CMP) {
1161bf215546Sopenharmony_ci		switch (cc) {
1162bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP2_SETE_DX10;
1163bf215546Sopenharmony_ci		case AF_CC_NE: return ALU_OP2_SETNE_DX10;
1164bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP2_SETGT_DX10;
1165bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP2_SETGE_DX10;
1166bf215546Sopenharmony_ci		}
1167bf215546Sopenharmony_ci	} else {
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ci		switch(cmp_type) {
1170bf215546Sopenharmony_ci		case AF_FLOAT_CMP: {
1171bf215546Sopenharmony_ci			switch (cc) {
1172bf215546Sopenharmony_ci			case AF_CC_E: return ALU_OP2_SETE;
1173bf215546Sopenharmony_ci			case AF_CC_NE: return ALU_OP2_SETNE;
1174bf215546Sopenharmony_ci			case AF_CC_GT: return ALU_OP2_SETGT;
1175bf215546Sopenharmony_ci			case AF_CC_GE: return ALU_OP2_SETGE;
1176bf215546Sopenharmony_ci			}
1177bf215546Sopenharmony_ci			break;
1178bf215546Sopenharmony_ci		}
1179bf215546Sopenharmony_ci		case AF_INT_CMP: {
1180bf215546Sopenharmony_ci			switch (cc) {
1181bf215546Sopenharmony_ci			case AF_CC_E: return ALU_OP2_SETE_INT;
1182bf215546Sopenharmony_ci			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1183bf215546Sopenharmony_ci			case AF_CC_GT: return ALU_OP2_SETGT_INT;
1184bf215546Sopenharmony_ci			case AF_CC_GE: return ALU_OP2_SETGE_INT;
1185bf215546Sopenharmony_ci			}
1186bf215546Sopenharmony_ci			break;
1187bf215546Sopenharmony_ci		}
1188bf215546Sopenharmony_ci		case AF_UINT_CMP: {
1189bf215546Sopenharmony_ci			switch (cc) {
1190bf215546Sopenharmony_ci			case AF_CC_E: return ALU_OP2_SETE_INT;
1191bf215546Sopenharmony_ci			case AF_CC_NE: return ALU_OP2_SETNE_INT;
1192bf215546Sopenharmony_ci			case AF_CC_GT: return ALU_OP2_SETGT_UINT;
1193bf215546Sopenharmony_ci			case AF_CC_GE: return ALU_OP2_SETGE_UINT;
1194bf215546Sopenharmony_ci			}
1195bf215546Sopenharmony_ci			break;
1196bf215546Sopenharmony_ci		}
1197bf215546Sopenharmony_ci		}
1198bf215546Sopenharmony_ci	}
1199bf215546Sopenharmony_ci
1200bf215546Sopenharmony_ci	assert(!"unexpected cc&cmp_type combination");
1201bf215546Sopenharmony_ci	return ~0u;
1202bf215546Sopenharmony_ci}
1203bf215546Sopenharmony_ci
1204bf215546Sopenharmony_ciunsigned get_predsetcc_op(unsigned cc, unsigned cmp_type) {
1205bf215546Sopenharmony_ci
1206bf215546Sopenharmony_ci	switch(cmp_type) {
1207bf215546Sopenharmony_ci	case AF_FLOAT_CMP: {
1208bf215546Sopenharmony_ci		switch (cc) {
1209bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP2_PRED_SETE;
1210bf215546Sopenharmony_ci		case AF_CC_NE: return ALU_OP2_PRED_SETNE;
1211bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP2_PRED_SETGT;
1212bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP2_PRED_SETGE;
1213bf215546Sopenharmony_ci		}
1214bf215546Sopenharmony_ci		break;
1215bf215546Sopenharmony_ci	}
1216bf215546Sopenharmony_ci	case AF_INT_CMP: {
1217bf215546Sopenharmony_ci		switch (cc) {
1218bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1219bf215546Sopenharmony_ci		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1220bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP2_PRED_SETGT_INT;
1221bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP2_PRED_SETGE_INT;
1222bf215546Sopenharmony_ci		}
1223bf215546Sopenharmony_ci		break;
1224bf215546Sopenharmony_ci	}
1225bf215546Sopenharmony_ci	case AF_UINT_CMP: {
1226bf215546Sopenharmony_ci		switch (cc) {
1227bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP2_PRED_SETE_INT;
1228bf215546Sopenharmony_ci		case AF_CC_NE: return ALU_OP2_PRED_SETNE_INT;
1229bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP2_PRED_SETGT_UINT;
1230bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP2_PRED_SETGE_UINT;
1231bf215546Sopenharmony_ci		}
1232bf215546Sopenharmony_ci		break;
1233bf215546Sopenharmony_ci	}
1234bf215546Sopenharmony_ci	}
1235bf215546Sopenharmony_ci
1236bf215546Sopenharmony_ci	assert(!"unexpected cc&cmp_type combination");
1237bf215546Sopenharmony_ci	return ~0u;
1238bf215546Sopenharmony_ci}
1239bf215546Sopenharmony_ci
1240bf215546Sopenharmony_ciunsigned get_killcc_op(unsigned cc, unsigned cmp_type) {
1241bf215546Sopenharmony_ci
1242bf215546Sopenharmony_ci	switch(cmp_type) {
1243bf215546Sopenharmony_ci	case AF_FLOAT_CMP: {
1244bf215546Sopenharmony_ci		switch (cc) {
1245bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP2_KILLE;
1246bf215546Sopenharmony_ci		case AF_CC_NE: return ALU_OP2_KILLNE;
1247bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP2_KILLGT;
1248bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP2_KILLGE;
1249bf215546Sopenharmony_ci		}
1250bf215546Sopenharmony_ci		break;
1251bf215546Sopenharmony_ci	}
1252bf215546Sopenharmony_ci	case AF_INT_CMP: {
1253bf215546Sopenharmony_ci		switch (cc) {
1254bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP2_KILLE_INT;
1255bf215546Sopenharmony_ci		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1256bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP2_KILLGT_INT;
1257bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP2_KILLGE_INT;
1258bf215546Sopenharmony_ci		}
1259bf215546Sopenharmony_ci		break;
1260bf215546Sopenharmony_ci	}
1261bf215546Sopenharmony_ci	case AF_UINT_CMP: {
1262bf215546Sopenharmony_ci		switch (cc) {
1263bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP2_KILLE_INT;
1264bf215546Sopenharmony_ci		case AF_CC_NE: return ALU_OP2_KILLNE_INT;
1265bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP2_KILLGT_UINT;
1266bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP2_KILLGE_UINT;
1267bf215546Sopenharmony_ci		}
1268bf215546Sopenharmony_ci		break;
1269bf215546Sopenharmony_ci	}
1270bf215546Sopenharmony_ci	}
1271bf215546Sopenharmony_ci
1272bf215546Sopenharmony_ci	assert(!"unexpected cc&cmp_type combination");
1273bf215546Sopenharmony_ci	return ~0u;
1274bf215546Sopenharmony_ci}
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ciunsigned get_cndcc_op(unsigned cc, unsigned cmp_type) {
1277bf215546Sopenharmony_ci
1278bf215546Sopenharmony_ci	switch(cmp_type) {
1279bf215546Sopenharmony_ci	case AF_FLOAT_CMP: {
1280bf215546Sopenharmony_ci		switch (cc) {
1281bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP3_CNDE;
1282bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP3_CNDGT;
1283bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP3_CNDGE;
1284bf215546Sopenharmony_ci		}
1285bf215546Sopenharmony_ci		break;
1286bf215546Sopenharmony_ci	}
1287bf215546Sopenharmony_ci	case AF_INT_CMP: {
1288bf215546Sopenharmony_ci		switch (cc) {
1289bf215546Sopenharmony_ci		case AF_CC_E: return ALU_OP3_CNDE_INT;
1290bf215546Sopenharmony_ci		case AF_CC_GT: return ALU_OP3_CNDGT_INT;
1291bf215546Sopenharmony_ci		case AF_CC_GE: return ALU_OP3_CNDGE_INT;
1292bf215546Sopenharmony_ci		}
1293bf215546Sopenharmony_ci		break;
1294bf215546Sopenharmony_ci	}
1295bf215546Sopenharmony_ci	}
1296bf215546Sopenharmony_ci
1297bf215546Sopenharmony_ci	assert(!"unexpected cc&cmp_type combination");
1298bf215546Sopenharmony_ci	return ~0u;
1299bf215546Sopenharmony_ci}
1300bf215546Sopenharmony_ci
1301bf215546Sopenharmony_ci
1302bf215546Sopenharmony_civoid convert_predset_to_set(shader& sh, alu_node* a) {
1303bf215546Sopenharmony_ci
1304bf215546Sopenharmony_ci	unsigned flags = a->bc.op_ptr->flags;
1305bf215546Sopenharmony_ci	unsigned cc = flags & AF_CC_MASK;
1306bf215546Sopenharmony_ci	unsigned cmp_type = flags & AF_CMP_TYPE_MASK;
1307bf215546Sopenharmony_ci
1308bf215546Sopenharmony_ci	bool swap_args = false;
1309bf215546Sopenharmony_ci
1310bf215546Sopenharmony_ci	cc = invert_setcc_condition(cc, swap_args);
1311bf215546Sopenharmony_ci
1312bf215546Sopenharmony_ci	unsigned newop = get_setcc_op(cc, cmp_type, true);
1313bf215546Sopenharmony_ci
1314bf215546Sopenharmony_ci	a->dst.resize(1);
1315bf215546Sopenharmony_ci	a->bc.set_op(newop);
1316bf215546Sopenharmony_ci
1317bf215546Sopenharmony_ci	if (swap_args) {
1318bf215546Sopenharmony_ci		std::swap(a->src[0], a->src[1]);
1319bf215546Sopenharmony_ci		std::swap(a->bc.src[0], a->bc.src[1]);
1320bf215546Sopenharmony_ci	}
1321bf215546Sopenharmony_ci
1322bf215546Sopenharmony_ci	a->bc.update_exec_mask = 0;
1323bf215546Sopenharmony_ci	a->bc.update_pred = 0;
1324bf215546Sopenharmony_ci}
1325bf215546Sopenharmony_ci
1326bf215546Sopenharmony_ci} // namespace r600_sb
1327