1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#include "sb_bc.h"
28#include "sb_shader.h"
29#include "sb_pass.h"
30
31namespace r600_sb {
32
33shader::shader(sb_context &sctx, shader_target t, unsigned id)
34: ctx(sctx), next_temp_value_index(temp_regid_offset),
35  prep_regs_count(), pred_sels(),
36  regions(), inputs(), undef(), val_pool(sizeof(value)),
37  pool(), all_nodes(), src_stats(), opt_stats(), errors(),
38  optimized(), id(id),
39  coal(*this), bbs(),
40  target(t), vt(ex), ex(*this), root(),
41  compute_interferences(),
42  has_alu_predication(),
43  uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {}
44
45bool shader::assign_slot(alu_node* n, alu_node *slots[5]) {
46
47	unsigned slot_flags = ctx.alu_slots(n->bc.op);
48	unsigned slot = n->bc.dst_chan;
49
50	if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) &&
51			(slot_flags & AF_S))
52		slot = SLOT_TRANS;
53
54	if (slots[slot])
55		return false;
56
57	n->bc.slot = slot;
58	slots[slot] = n;
59	return true;
60}
61
62void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask,
63                            bool src) {
64	unsigned chan = 0;
65	while (comp_mask) {
66		if (comp_mask & 1) {
67			value *v = get_gpr_value(src, gpr, chan, false);
68			v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN);
69			if (!v->is_rel()) {
70				v->gpr = v->pin_gpr = v->select;
71				v->fix();
72			}
73			if (v->array && !v->array->gpr) {
74				// if pinned value can be accessed with indirect addressing
75				// pin the entire array to its original location
76				v->array->gpr = v->array->base_gpr;
77			}
78			vec.push_back(v);
79		}
80		comp_mask >>= 1;
81		++chan;
82	}
83}
84
85cf_node* shader::create_clause(node_subtype nst) {
86	cf_node *n = create_cf();
87
88	n->subtype = nst;
89
90	switch (nst) {
91	case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break;
92	case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break;
93	case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break;
94	case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break;
95	default: assert(!"invalid clause type"); break;
96	}
97
98	n->bc.barrier = 1;
99	return n;
100}
101
102void shader::create_bbs() {
103	create_bbs(root, bbs);
104}
105
106void shader::expand_bbs() {
107	expand_bbs(bbs);
108}
109
110alu_node* shader::create_mov(value* dst, value* src) {
111	alu_node *n = create_alu();
112	n->bc.set_op(ALU_OP1_MOV);
113	n->dst.push_back(dst);
114	n->src.push_back(src);
115	dst->def = n;
116
117	return n;
118}
119
120alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) {
121	alu_node *n = create_mov(dst, src);
122
123	dst->assign_source(src);
124	n->flags |= NF_COPY_MOV | NF_DONT_HOIST;
125
126	if (affcost && dst->is_sgpr() && src->is_sgpr())
127		coal.add_edge(src, dst, affcost);
128
129	return n;
130}
131
132value* shader::get_value(value_kind kind, sel_chan id,
133                         unsigned version) {
134	if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count)
135		return val_pool[id - 1];
136
137
138	unsigned key = (kind << 28) | (version << 16) | id;
139	value_map::iterator i = reg_values.find(key);
140	if (i != reg_values.end()) {
141		return i->second;
142	}
143	value *v = create_value(kind, id, version);
144	reg_values.insert(std::make_pair(key, v));
145	return v;
146}
147
148value* shader::get_special_value(unsigned sv_id, unsigned version) {
149	sel_chan id(sv_id, 0);
150	return get_value(VLK_SPECIAL_REG, id, version);
151}
152
153void shader::fill_array_values(gpr_array *a, vvec &vv) {
154	unsigned sz = a->array_size;
155	vv.resize(sz);
156	for (unsigned i = 0; i < a->array_size; ++i) {
157		vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(),
158		                      false);
159	}
160}
161
162value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
163                             unsigned version) {
164	sel_chan id(reg, chan);
165	value *v;
166	gpr_array *a = get_gpr_array(reg, chan);
167	if (rel) {
168		assert(a);
169		v = create_value(VLK_REL_REG, id, 0);
170		v->rel = get_special_value(SV_AR_INDEX);
171		fill_array_values(a, v->muse);
172		if (!src)
173			fill_array_values(a, v->mdef);
174	} else {
175		if (version == 0 && reg < prep_regs_count)
176			return (val_pool[id - 1]);
177
178		v = get_value(VLK_REG, id, version);
179	}
180
181	v->array = a;
182	v->pin_gpr = v->select;
183
184	return v;
185}
186
187value* shader::create_temp_value() {
188	sel_chan id(++next_temp_value_index, 0);
189	return get_value(VLK_TEMP, id, 0);
190}
191
192value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) {
193	return get_ro_value(kcache_values, VLK_KCACHE,
194			sel_chan(bank, index, chan, index_mode));
195}
196
197void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) {
198	if (inputs.size() <= gpr)
199		inputs.resize(gpr+1);
200
201	shader_input &i = inputs[gpr];
202	i.preloaded = preloaded;
203	i.comp_mask = comp_mask;
204
205	if (preloaded) {
206		add_pinned_gpr_values(root->dst, gpr, comp_mask, true);
207	}
208
209}
210
211void shader::init() {
212	assert(!root);
213	root = create_container();
214}
215
216void shader::init_call_fs(cf_node* cf) {
217	unsigned gpr = 0;
218
219	assert(target == TARGET_LS || target == TARGET_VS || target == TARGET_ES);
220
221	for(inputs_vec::const_iterator I = inputs.begin(),
222			E = inputs.end(); I != E; ++I, ++gpr) {
223		if (!I->preloaded)
224			add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false);
225		else
226			add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true);
227	}
228}
229
230void shader::set_undef(val_set& s) {
231	value *undefined = get_undef_value();
232	if (!undefined->gvn_source)
233		vt.add_value(undefined);
234
235	val_set &vs = s;
236
237	for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) {
238		value *v = *I;
239
240		assert(!v->is_readonly() && !v->is_rel());
241
242		v->gvn_source = undefined->gvn_source;
243	}
244}
245
246value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) {
247	value *v = val_pool.create(k, regid, ver);
248	return v;
249}
250
251value* shader::get_undef_value() {
252	if (!undef)
253		undef = create_value(VLK_UNDEF, 0, 0);
254	return undef;
255}
256
257node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) {
258	node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags);
259	all_nodes.push_back(n);
260	return n;
261}
262
263alu_node* shader::create_alu() {
264	alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node();
265	all_nodes.push_back(n);
266	return n;
267}
268
269alu_group_node* shader::create_alu_group() {
270	alu_group_node* n =
271			new (pool.allocate(sizeof(alu_group_node))) alu_group_node();
272	all_nodes.push_back(n);
273	return n;
274}
275
276alu_packed_node* shader::create_alu_packed() {
277	alu_packed_node* n =
278			new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node();
279	all_nodes.push_back(n);
280	return n;
281}
282
283cf_node* shader::create_cf() {
284	cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node();
285	n->bc.barrier = 1;
286	all_nodes.push_back(n);
287	return n;
288}
289
290fetch_node* shader::create_fetch() {
291	fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node();
292	all_nodes.push_back(n);
293	return n;
294}
295
296region_node* shader::create_region() {
297	region_node *n = new (pool.allocate(sizeof(region_node)))
298			region_node(regions.size());
299	regions.push_back(n);
300	all_nodes.push_back(n);
301	return n;
302}
303
304depart_node* shader::create_depart(region_node* target) {
305	depart_node* n = new (pool.allocate(sizeof(depart_node)))
306			depart_node(target, target->departs.size());
307	target->departs.push_back(n);
308	all_nodes.push_back(n);
309	return n;
310}
311
312repeat_node* shader::create_repeat(region_node* target) {
313	repeat_node* n = new (pool.allocate(sizeof(repeat_node)))
314			repeat_node(target, target->repeats.size() + 1);
315	target->repeats.push_back(n);
316	all_nodes.push_back(n);
317	return n;
318}
319
320container_node* shader::create_container(node_type nt, node_subtype nst,
321		                                 node_flags flags) {
322	container_node *n = new (pool.allocate(sizeof(container_node)))
323			container_node(nt, nst, flags);
324	all_nodes.push_back(n);
325	return n;
326}
327
328if_node* shader::create_if() {
329	if_node* n = new (pool.allocate(sizeof(if_node))) if_node();
330	all_nodes.push_back(n);
331	return n;
332}
333
334bb_node* shader::create_bb(unsigned id, unsigned loop_level) {
335	bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level);
336	all_nodes.push_back(n);
337	return n;
338}
339
340value* shader::get_special_ro_value(unsigned sel) {
341	return get_ro_value(special_ro_values, VLK_PARAM, sel);
342}
343
344value* shader::get_const_value(const literal &v) {
345	value *val = get_ro_value(const_values, VLK_CONST, v);
346	val->literal_value = v;
347	return val;
348}
349
350shader::~shader() {
351	for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end();
352			I != E; ++I)
353		(*I)->~node();
354
355	for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end();
356			I != E; ++I) {
357		delete *I;
358	}
359}
360
361void shader::dump_ir() {
362	if (ctx.dump_pass)
363		dump(*this).run();
364}
365
366value* shader::get_value_version(value* v, unsigned ver) {
367	assert(!v->is_readonly() && !v->is_rel());
368	value *vv = get_value(v->kind, v->select, ver);
369	assert(vv);
370
371	if (v->array) {
372		vv->array = v->array;
373	}
374
375	return vv;
376}
377
378gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) {
379
380	for (regarray_vec::iterator I = gpr_arrays.begin(),
381			E = gpr_arrays.end(); I != E; ++I) {
382		gpr_array* a = *I;
383		unsigned achan = a->base_gpr.chan();
384		unsigned areg = a->base_gpr.sel();
385		if (achan == chan && (reg >= areg && reg < areg+a->array_size))
386			return a;
387	}
388	return NULL;
389}
390
391void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count,
392					   unsigned comp_mask) {
393	unsigned chan = 0;
394	while (comp_mask) {
395		if (comp_mask & 1) {
396			gpr_array *a = new gpr_array(
397					sel_chan(gpr_start, chan), gpr_count);
398
399			SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr
400			         << " [" << a->array_size << "]\n";
401			);
402
403			gpr_arrays.push_back(a);
404		}
405		comp_mask >>= 1;
406		++chan;
407	}
408}
409
410value* shader::get_pred_sel(int sel) {
411	assert(sel == 0 || sel == 1);
412	if (!pred_sels[sel])
413		pred_sels[sel] = get_const_value(sel);
414
415	return pred_sels[sel];
416}
417
418cf_node* shader::create_cf(unsigned op) {
419	cf_node *c = create_cf();
420	c->bc.set_op(op);
421	c->bc.barrier = 1;
422	return c;
423}
424
425std::string shader::get_full_target_name() {
426	std::string s = get_shader_target_name();
427	s += "/";
428	s += ctx.get_hw_chip_name();
429	s += "/";
430	s += ctx.get_hw_class_name();
431	return s;
432}
433
434const char* shader::get_shader_target_name() {
435	switch (target) {
436		case TARGET_VS: return "VS";
437		case TARGET_ES: return "ES";
438		case TARGET_PS: return "PS";
439		case TARGET_GS: return "GS";
440		case TARGET_HS: return "HS";
441		case TARGET_LS: return "LS";
442		case TARGET_COMPUTE: return "COMPUTE";
443		case TARGET_FETCH: return "FETCH";
444		default:
445			return "INVALID_TARGET";
446	}
447}
448
449void shader::simplify_dep_rep(node* dr) {
450	container_node *p = dr->parent;
451	if (p->is_repeat()) {
452		repeat_node *r = static_cast<repeat_node*>(p);
453		r->target->expand_repeat(r);
454	} else if (p->is_depart()) {
455		depart_node *d = static_cast<depart_node*>(p);
456		d->target->expand_depart(d);
457	}
458	if (dr->next)
459		dr->parent->cut(dr->next, NULL);
460}
461
462
463// FIXME this is used in some places as the max non-temp gpr,
464// (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead.
465unsigned shader::first_temp_gpr() {
466	return MAX_GPR - ctx.alu_temp_gprs;
467}
468
469unsigned shader::num_nontemp_gpr() {
470	return MAX_GPR - 2 * ctx.alu_temp_gprs;
471}
472
473void shader::set_uses_kill() {
474	if (root->src.empty())
475		root->src.resize(1);
476
477	if (!root->src[0])
478		root->src[0] = get_special_value(SV_VALID_MASK);
479}
480
481alu_node* shader::clone(alu_node* n) {
482	alu_node *c = create_alu();
483
484	// FIXME: this may be wrong with indirect operands
485	c->src = n->src;
486	c->dst = n->dst;
487
488	c->bc = n->bc;
489	c->pred = n->pred;
490
491	return c;
492}
493
494void shader::collect_stats(bool opt) {
495	if (!sb_context::dump_stat)
496		return;
497
498	shader_stats &s = opt ? opt_stats : src_stats;
499
500	s.shaders = 1;
501	s.ngpr = ngpr;
502	s.nstack = nstack;
503	s.collect(root);
504
505	if (opt)
506		ctx.opt_stats.accumulate(s);
507	else
508		ctx.src_stats.accumulate(s);
509}
510
511value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) {
512	value_map::iterator I = vm.find(key);
513	if (I != vm.end())
514		return I->second;
515	value *v = create_value(vk, key, 0);
516	v->flags = VLF_READONLY;
517	vm.insert(std::make_pair(key, v));
518	return v;
519}
520
521void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) {
522
523	bool inside_bb = false;
524	bool last_inside_bb = true;
525	node_iterator bb_start(n->begin()), I(bb_start), E(n->end());
526
527	for (; I != E; ++I) {
528		node *k = *I;
529		inside_bb = k->type == NT_OP;
530
531		if (inside_bb && !last_inside_bb)
532			bb_start = I;
533		else if (!inside_bb) {
534			if (last_inside_bb
535					&& I->type != NT_REPEAT
536					&& I->type != NT_DEPART
537					&& I->type != NT_IF) {
538				bb_node *bb = create_bb(bbs.size(), loop_level);
539				bbs.push_back(bb);
540				n->insert_node_before(*bb_start, bb);
541				if (bb_start != I)
542					bb->move(bb_start, I);
543			}
544
545			if (k->is_container()) {
546
547				bool loop = false;
548				if (k->type == NT_REGION) {
549					loop = static_cast<region_node*>(k)->is_loop();
550				}
551
552				create_bbs(static_cast<container_node*>(k), bbs,
553				           loop_level + loop);
554			}
555		}
556
557		if (k->type == NT_DEPART)
558			return;
559
560		last_inside_bb = inside_bb;
561	}
562
563	if (last_inside_bb) {
564		bb_node *bb = create_bb(bbs.size(), loop_level);
565		bbs.push_back(bb);
566		if (n->empty())
567				n->push_back(bb);
568		else {
569			n->insert_node_before(*bb_start, bb);
570			if (bb_start != n->end())
571				bb->move(bb_start, n->end());
572		}
573	} else {
574		if (n->last && n->last->type == NT_IF) {
575			bb_node *bb = create_bb(bbs.size(), loop_level);
576			bbs.push_back(bb);
577			n->push_back(bb);
578		}
579	}
580}
581
582void shader::expand_bbs(bbs_vec &bbs) {
583
584	for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) {
585		bb_node *b = *I;
586		b->expand();
587	}
588}
589
590sched_queue_id shader::get_queue_id(node* n) {
591	switch (n->subtype) {
592		case NST_ALU_INST:
593		case NST_ALU_PACKED_INST:
594		case NST_COPY:
595		case NST_PSI:
596			return SQ_ALU;
597		case NST_FETCH_INST: {
598			fetch_node *f = static_cast<fetch_node*>(n);
599			if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX))
600				return SQ_VTX;
601			if (f->bc.op_ptr->flags & FF_GDS)
602				return SQ_GDS;
603			return SQ_TEX;
604		}
605		case NST_CF_INST:
606			return SQ_CF;
607		default:
608			assert(0);
609			return SQ_NUM;
610	}
611}
612
613void shader_stats::collect(node *n) {
614	if (n->is_alu_inst())
615		++alu;
616	else if (n->is_fetch_inst())
617		++fetch;
618	else if (n->is_container()) {
619		container_node *c = static_cast<container_node*>(n);
620
621		if (n->is_alu_group())
622			++alu_groups;
623		else if (n->is_alu_clause())
624			++alu_clauses;
625		else if (n->is_fetch_clause())
626			++fetch_clauses;
627		else if (n->is_cf_inst())
628			++cf;
629
630		if (!c->empty()) {
631			for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) {
632				collect(*I);
633			}
634		}
635	}
636}
637
638void shader_stats::accumulate(shader_stats& s) {
639	++shaders;
640	ndw += s.ndw;
641	ngpr += s.ngpr;
642	nstack += s.nstack;
643
644	alu += s.alu;
645	alu_groups += s.alu_groups;
646	alu_clauses += s.alu_clauses;
647	fetch += s.fetch;
648	fetch_clauses += s.fetch_clauses;
649	cf += s.cf;
650}
651
652void shader_stats::dump() {
653	sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack
654			<< ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses
655			<< ", alu:" << alu << ", fetch:" << fetch
656			<< ", fetch clauses:" << fetch_clauses
657			<< ", cf:" << cf;
658
659	if (shaders > 1)
660		sblog << ", shaders:" << shaders;
661
662	sblog << "\n";
663}
664
665static void print_diff(unsigned d1, unsigned d2) {
666	if (d1)
667		sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%";
668	else if (d2)
669		sblog << "N/A";
670	else
671		sblog << "0%";
672}
673
674void shader_stats::dump_diff(shader_stats& s) {
675	sblog << "dw:"; print_diff(ndw, s.ndw);
676	sblog << ", gpr:" ; print_diff(ngpr, s.ngpr);
677	sblog << ", stk:" ; print_diff(nstack, s.nstack);
678	sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups);
679	sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses);
680	sblog << ", alu:" ; print_diff(alu, s.alu);
681	sblog << ", fetch:" ; print_diff(fetch, s.fetch);
682	sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses);
683	sblog << ", cf:" ; print_diff(cf, s.cf);
684	sblog << "\n";
685}
686
687} // namespace r600_sb
688