1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#ifndef R600_SB_IR_H_
28#define R600_SB_IR_H_
29
30#include <algorithm>
31#include <stdint.h>
32#include <vector>
33#include <set>
34#include <algorithm>
35
36#include "sb_bc.h"
37
38namespace r600_sb {
39
40enum special_regs {
41	SV_ALU_PRED = 128,
42	SV_EXEC_MASK,
43	SV_AR_INDEX,
44	SV_VALID_MASK,
45	SV_GEOMETRY_EMIT,
46	SV_LDS_RW,
47	SV_LDS_OQA,
48	SV_LDS_OQB,
49	SV_SCRATCH
50};
51
52class node;
53class value;
54class shader;
55
56struct sel_chan
57{
58	unsigned id;
59
60	sel_chan(unsigned id = 0) : id(id) {}
61	sel_chan(unsigned sel, unsigned chan) : id(((sel << 2) | chan) + 1) {}
62
63	unsigned sel() const { return sel(id); }
64	unsigned chan() const {return chan(id); }
65	operator unsigned() const {return id;}
66
67	static unsigned sel(unsigned idx) { return (idx-1) >> 2; }
68	static unsigned chan(unsigned idx) { return (idx-1) & 3; }
69
70	sel_chan(unsigned bank, unsigned index,
71			 unsigned chan, alu_kcache_index_mode index_mode)
72		: id(sel_chan((bank << 12) | index | ((unsigned)index_mode << 28), chan).id) {}
73	unsigned kcache_index_mode() const { return sel() >> 28; }
74	unsigned kcache_sel() const { return sel() & 0x0fffffffu; }
75	unsigned kcache_bank() const { return kcache_sel() >> 12; }
76};
77
78inline sb_ostream& operator <<(sb_ostream& o, sel_chan r) {
79	static const char * ch = "xyzw";
80	o << r.sel() << "." << ch[r.chan()];
81	return o;
82}
83
84typedef std::vector<value*>  vvec;
85
86class sb_pool {
87protected:
88	static const unsigned SB_POOL_ALIGN = 8;
89	static const unsigned SB_POOL_DEFAULT_BLOCK_SIZE = (1 << 16);
90
91	typedef std::vector<void*> block_vector;
92
93	unsigned block_size;
94	block_vector blocks;
95	unsigned total_size;
96
97public:
98	sb_pool(unsigned block_size = SB_POOL_DEFAULT_BLOCK_SIZE)
99		: block_size(block_size), blocks(), total_size() {}
100
101	virtual ~sb_pool() { free_all(); }
102
103	void* allocate(unsigned sz);
104
105protected:
106	void free_all();
107};
108
109template <typename V, typename Comp = std::less<V> >
110class sb_set {
111	typedef std::vector<V> data_vector;
112	data_vector vec;
113public:
114
115	typedef typename data_vector::iterator iterator;
116	typedef typename data_vector::const_iterator const_iterator;
117
118	sb_set() : vec() {}
119	~sb_set() {  }
120
121	iterator begin() { return vec.begin(); }
122	iterator end() { return vec.end(); }
123	const_iterator begin() const { return vec.begin(); }
124	const_iterator end() const { return vec.end(); }
125
126	void add_set(const sb_set& s) {
127		data_vector t;
128		t.reserve(vec.size() + s.vec.size());
129		std::set_union(vec.begin(), vec.end(), s.vec.begin(), s.vec.end(),
130		          std::inserter(t, t.begin()), Comp());
131		vec.swap(t);
132	}
133
134	iterator lower_bound(const V& v) {
135		return std::lower_bound(vec.begin(), vec.end(), v, Comp());
136	}
137
138	std::pair<iterator, bool> insert(const V& v) {
139		iterator P = lower_bound(v);
140		if (P != vec.end() && is_equal(*P, v))
141			return std::make_pair(P, false);
142		return std::make_pair(vec.insert(P, v), true);
143	}
144
145	unsigned erase(const V&  v) {
146		iterator P = lower_bound(v);
147		if (P == vec.end() || !is_equal(*P, v))
148			return 0;
149		vec.erase(P);
150		return 1;
151	}
152
153	void clear() { vec.clear(); }
154
155	bool empty() { return vec.empty(); }
156
157	bool is_equal(const V& v1, const V& v2) {
158		return !Comp()(v1, v2) && !Comp()(v2, v1);
159	}
160
161	iterator find(const V& v) {
162		iterator P = lower_bound(v);
163		return (P != vec.end() && is_equal(*P, v)) ? P : vec.end();
164	}
165
166	unsigned size() { return vec.size(); }
167	void erase(iterator I) { vec.erase(I); }
168};
169
170template <typename K, typename V, typename KComp = std::less<K> >
171class sb_map {
172	typedef std::pair<K, V> datatype;
173
174	struct Comp {
175		bool operator()(const datatype &v1, const datatype &v2) {
176			return KComp()(v1.first, v2.first);
177		}
178	};
179
180	typedef sb_set<datatype, Comp> dataset;
181
182	dataset set;
183
184public:
185
186	sb_map() : set() {}
187
188	typedef typename dataset::iterator iterator;
189
190	iterator begin() { return set.begin(); }
191	iterator end() { return set.end(); }
192
193	void clear() { set.clear(); }
194
195	V& operator[](const K& key) {
196		datatype P = std::make_pair(key, V());
197		iterator F = set.find(P);
198		if (F == set.end()) {
199			return (*(set.insert(P).first)).second;
200		} else {
201			return (*F).second;
202		}
203	}
204
205	std::pair<iterator, bool> insert(const datatype& d) {
206		return set.insert(d);
207	}
208
209	iterator find(const K& key) {
210		return set.find(std::make_pair(key, V()));
211	}
212
213	unsigned erase(const K& key) {
214		return set.erase(std::make_pair(key, V()));
215	}
216
217	void erase(iterator I) {
218		set.erase(I);
219	}
220};
221
222class sb_bitset {
223	typedef uint32_t basetype;
224	static const unsigned bt_bits = sizeof(basetype) << 3;
225	std::vector<basetype> data;
226	unsigned bit_size;
227
228public:
229
230	sb_bitset() : data(), bit_size() {}
231
232	bool get(unsigned id);
233	void set(unsigned id, bool bit = true);
234	bool set_chk(unsigned id, bool bit = true);
235
236	void clear();
237	void resize(unsigned size);
238
239	unsigned size() { return bit_size; }
240
241	unsigned find_bit(unsigned start = 0);
242
243	void swap(sb_bitset & bs2);
244
245	bool operator==(const sb_bitset &bs2);
246	bool operator!=(const sb_bitset &bs2) { return !(*this == bs2); }
247
248	sb_bitset& operator|=(const sb_bitset &bs2) {
249		if (bit_size < bs2.bit_size) {
250			resize(bs2.bit_size);
251		}
252
253		for (unsigned i = 0, c = std::min(data.size(), bs2.data.size()); i < c;
254				++i) {
255			data[i] |= bs2.data[i];
256		}
257		return *this;
258	}
259
260	sb_bitset& operator&=(const sb_bitset &bs2);
261	sb_bitset& mask(const sb_bitset &bs2);
262
263	friend sb_bitset operator|(const sb_bitset &b1, const sb_bitset &b2) {
264			sb_bitset nbs(b1);
265			nbs |= b2;
266			return nbs;
267	}
268};
269
270enum value_kind {
271	VLK_REG,
272	VLK_REL_REG,
273	VLK_SPECIAL_REG,
274	VLK_TEMP,
275
276	VLK_CONST,
277	VLK_KCACHE,
278	VLK_PARAM,
279	VLK_SPECIAL_CONST,
280
281	VLK_UNDEF
282};
283
284
285
286class sb_value_pool : protected sb_pool {
287	unsigned aligned_elt_size;
288
289public:
290	sb_value_pool(unsigned elt_size, unsigned block_elts = 256)
291		: sb_pool(block_elts * (aligned_elt_size = ((elt_size +
292				SB_POOL_ALIGN - 1) & ~(SB_POOL_ALIGN - 1)))) {}
293
294	virtual ~sb_value_pool() { delete_all(); }
295
296	value* create(value_kind k, sel_chan regid, unsigned ver);
297
298	value* operator[](unsigned id) {
299		unsigned offset = id * aligned_elt_size;
300		unsigned block_id;
301		if (offset < block_size) {
302			block_id = 0;
303		} else {
304			block_id = offset / block_size;
305			offset = offset % block_size;
306		}
307		return (value*)((char*)blocks[block_id] + offset);
308	}
309
310	unsigned size() { return total_size / aligned_elt_size; }
311
312protected:
313	void delete_all();
314};
315
316
317
318
319
320class sb_value_set {
321
322	sb_bitset bs;
323
324public:
325	sb_value_set() : bs() {}
326
327	class iterator {
328		sb_value_pool &vp;
329		sb_value_set *s;
330		unsigned nb;
331	public:
332		iterator(shader &sh, sb_value_set *s, unsigned nb = 0);
333
334
335		iterator& operator++() {
336			if (nb + 1 < s->bs.size())
337				nb = s->bs.find_bit(nb + 1);
338			else
339				nb = s->bs.size();
340			return *this;
341		}
342		bool operator !=(const iterator &i) {
343			return s != i.s || nb != i.nb;
344		}
345		bool operator ==(const iterator &i) { return !(*this != i); }
346		value* operator *() {
347			 return vp[nb];
348		}
349
350
351	};
352
353	iterator begin(shader &sh) {
354		return iterator(sh, this, bs.size() ? bs.find_bit(0) : 0);
355	}
356	iterator end(shader &sh) { return iterator(sh, this, bs.size()); }
357
358	bool add_set_checked(sb_value_set & s2);
359
360	void add_set(sb_value_set & s2)  {
361		if (bs.size() < s2.bs.size())
362			bs.resize(s2.bs.size());
363		bs |= s2.bs;
364	}
365
366	void remove_set(sb_value_set & s2);
367
368	bool add_vec(vvec &vv);
369
370	bool add_val(value *v);
371	bool contains(value *v);
372
373	bool remove_val(value *v);
374
375	bool remove_vec(vvec &vv);
376
377	void clear();
378
379	bool empty();
380};
381
382typedef sb_value_set val_set;
383
384struct gpr_array {
385	sel_chan base_gpr; // original gpr
386	sel_chan gpr; // assigned by regalloc
387	unsigned array_size;
388
389	gpr_array(sel_chan base_gpr, unsigned array_size) : base_gpr(base_gpr),
390			array_size(array_size) {}
391
392	unsigned hash() { return (base_gpr << 10) * array_size; }
393
394	val_set interferences;
395	vvec refs;
396
397	bool is_dead();
398
399};
400
401typedef std::vector<gpr_array*> regarray_vec;
402
403enum value_flags {
404	VLF_UNDEF = (1 << 0),
405	VLF_READONLY = (1 << 1),
406	VLF_DEAD = (1 << 2),
407
408	VLF_PIN_REG = (1 << 3),
409	VLF_PIN_CHAN = (1 << 4),
410
411	// opposite to alu clause local value - goes through alu clause boundary
412	// (can't use temp gpr, can't recolor in the alu scheduler, etc)
413	VLF_GLOBAL = (1 << 5),
414	VLF_FIXED = (1 << 6),
415	VLF_PVPS = (1 << 7),
416
417	VLF_PREALLOC = (1 << 8)
418};
419
420inline value_flags operator |(value_flags l, value_flags r) {
421	return (value_flags)((unsigned)l|(unsigned)r);
422}
423inline value_flags operator &(value_flags l, value_flags r) {
424	return (value_flags)((unsigned)l&(unsigned)r);
425}
426inline value_flags operator ~(value_flags l) {
427	return (value_flags)(~(unsigned)l);
428}
429inline value_flags& operator |=(value_flags &l, value_flags r) {
430	l = l | r;
431	return l;
432}
433inline value_flags& operator &=(value_flags &l, value_flags r) {
434	l = l & r;
435	return l;
436}
437
438sb_ostream& operator << (sb_ostream &o, value &v);
439
440typedef uint32_t value_hash;
441
442typedef std::list< node * > uselist;
443
444enum constraint_kind {
445	CK_SAME_REG,
446	CK_PACKED_BS,
447	CK_PHI
448};
449
450class shader;
451class sb_value_pool;
452struct ra_chunk;
453class ra_constraint;
454
455class value {
456protected:
457	value(unsigned sh_id, value_kind k, sel_chan select, unsigned ver = 0)
458		: kind(k), flags(),
459			rel(), array(),
460			version(ver), select(select), pin_gpr(select), gpr(),
461			gvn_source(), ghash(),
462			def(), adef(), uses(), constraint(), chunk(),
463			literal_value(), uid(sh_id) {}
464
465	~value() { delete_uses(); }
466
467	friend class sb_value_pool;
468public:
469	value_kind kind;
470	value_flags flags;
471
472	vvec mdef;
473	vvec muse;
474	value *rel;
475	gpr_array *array;
476
477	unsigned version;
478
479	sel_chan select;
480	sel_chan pin_gpr;
481	sel_chan gpr;
482
483	value *gvn_source;
484	value_hash ghash;
485
486	node *def, *adef;
487	uselist uses;
488
489	ra_constraint *constraint;
490	ra_chunk *chunk;
491
492	literal literal_value;
493
494	bool is_const() { return kind == VLK_CONST || kind == VLK_UNDEF; }
495
496	bool is_AR() {
497		return is_special_reg() && select == sel_chan(SV_AR_INDEX, 0);
498	}
499	bool is_geometry_emit() {
500		return is_special_reg() && select == sel_chan(SV_GEOMETRY_EMIT, 0);
501	}
502	bool is_lds_access() {
503		return is_special_reg() && select == sel_chan(SV_LDS_RW, 0);
504	}
505	bool is_lds_oq() {
506		return is_special_reg() && (select == sel_chan(SV_LDS_OQA, 0) || select == sel_chan(SV_LDS_OQB, 0));
507	}
508
509	node* any_def() {
510		assert(!(def && adef));
511		return def ? def : adef;
512	}
513
514	value* gvalue() {
515		value *v = this;
516		while (v->gvn_source && v != v->gvn_source)
517			// FIXME we really shouldn't have such chains
518			v = v->gvn_source;
519		return v;
520	}
521	bool is_scratch() {
522		return is_special_reg() && select == sel_chan(SV_SCRATCH, 0);
523	}
524
525	bool is_float_0_or_1() {
526		value *v = gvalue();
527		return v->is_const() && (v->literal_value == literal(0)
528						|| v->literal_value == literal(1.0f));
529	}
530
531	bool is_undef() { return gvalue()->kind == VLK_UNDEF; }
532
533	bool is_any_gpr() {
534		return (kind == VLK_REG || kind == VLK_TEMP);
535	}
536
537	bool is_agpr() {
538		return array && is_any_gpr();
539	}
540
541	// scalar gpr, as opposed to element of gpr array
542	bool is_sgpr() {
543		return !array && is_any_gpr();
544	}
545
546	bool is_special_reg() {	return kind == VLK_SPECIAL_REG;	}
547	bool is_any_reg() { return is_any_gpr() || is_special_reg(); }
548	bool is_kcache() { return kind == VLK_KCACHE; }
549	bool is_rel() {	return kind == VLK_REL_REG; }
550	bool is_readonly() { return flags & VLF_READONLY; }
551
552	bool is_chan_pinned() { return flags & VLF_PIN_CHAN; }
553	bool is_reg_pinned() { return flags & VLF_PIN_REG; }
554
555	bool is_global();
556	void set_global();
557	void set_prealloc();
558
559	bool is_prealloc();
560
561	bool is_fixed();
562	void fix();
563
564	bool is_dead() { return flags & VLF_DEAD; }
565
566	literal & get_const_value() {
567		value *v = gvalue();
568		assert(v->is_const());
569		return v->literal_value;
570	}
571
572	// true if needs to be encoded as literal in alu
573	bool is_literal() {
574		return is_const()
575				&& literal_value != literal(0)
576				&& literal_value != literal(1)
577				&& literal_value != literal(-1)
578				&& literal_value != literal(0.5)
579				&& literal_value != literal(1.0);
580	}
581
582	void add_use(node *n);
583	void remove_use(const node *n);
584
585	value_hash hash();
586	value_hash rel_hash();
587
588	void assign_source(value *v) {
589		assert(!gvn_source || gvn_source == this);
590		gvn_source = v->gvalue();
591	}
592
593	bool v_equal(value *v) { return gvalue() == v->gvalue(); }
594
595	unsigned use_count();
596	void delete_uses();
597
598	sel_chan get_final_gpr() {
599		if (array && array->gpr) {
600			int reg_offset = select.sel() - array->base_gpr.sel();
601			if (rel && rel->is_const())
602				reg_offset += rel->get_const_value().i;
603			return array->gpr + (reg_offset << 2);
604		} else {
605			return gpr;
606		}
607	}
608
609	unsigned get_final_chan() {
610		if (array) {
611			assert(array->gpr);
612			return array->gpr.chan();
613		} else {
614			assert(gpr);
615			return gpr.chan();
616		}
617	}
618
619	/* Check whether copy-propagation of src into this would create an access
620	 * conflict with relative addressing, i.e. an operation that tries to access
621	 * array elements with different address register values.
622	 */
623	bool no_reladdr_conflict_with(value *src);
624
625	val_set interferences;
626	unsigned uid;
627};
628
629class expr_handler;
630
631class value_table {
632	typedef std::vector<value*> vt_item;
633	typedef std::vector<vt_item> vt_table;
634
635	expr_handler &ex;
636
637	unsigned size_bits;
638	unsigned size;
639	unsigned size_mask;
640
641	vt_table hashtable;
642
643	unsigned cnt;
644
645public:
646
647	value_table(expr_handler &ex, unsigned size_bits = 10)
648		: ex(ex), size_bits(size_bits), size(1u << size_bits),
649		  size_mask(size - 1), hashtable(size), cnt() {}
650
651	~value_table() {}
652
653	void add_value(value* v);
654
655	bool expr_equal(value* l, value* r);
656
657	unsigned count() { return cnt; }
658
659	void get_values(vvec & v);
660};
661
662class sb_context;
663
664enum node_type {
665	NT_UNKNOWN,
666	NT_LIST,
667	NT_OP,
668	NT_REGION,
669	NT_REPEAT,
670	NT_DEPART,
671	NT_IF,
672};
673
674enum node_subtype {
675	NST_UNKNOWN,
676	NST_LIST,
677	NST_ALU_GROUP,
678	NST_ALU_CLAUSE,
679	NST_ALU_INST,
680	NST_ALU_PACKED_INST,
681	NST_CF_INST,
682	NST_FETCH_INST,
683	NST_TEX_CLAUSE,
684	NST_VTX_CLAUSE,
685	NST_GDS_CLAUSE,
686
687	NST_BB,
688
689	NST_PHI,
690	NST_PSI,
691	NST_COPY,
692
693	NST_LOOP_PHI_CONTAINER,
694	NST_LOOP_CONTINUE,
695	NST_LOOP_BREAK
696};
697
698enum node_flags {
699	NF_EMPTY = 0,
700	NF_DEAD = (1 << 0),
701	NF_REG_CONSTRAINT = (1 << 1),
702	NF_CHAN_CONSTRAINT = (1 << 2),
703	NF_ALU_4SLOT = (1 << 3),
704	NF_CONTAINER = (1 << 4),
705
706	NF_COPY_MOV = (1 << 5),
707
708	NF_DONT_KILL = (1 << 6),
709	NF_DONT_HOIST = (1 << 7),
710	NF_DONT_MOVE = (1 << 8),
711
712	// for KILLxx - we want to schedule them as early as possible
713	NF_SCHEDULE_EARLY = (1 << 9),
714
715	// for ALU_PUSH_BEFORE - when set, replace with PUSH + ALU
716	NF_ALU_STACK_WORKAROUND = (1 << 10),
717	NF_ALU_2SLOT = (1 << 11),
718};
719
720inline node_flags operator |(node_flags l, node_flags r) {
721	return (node_flags)((unsigned)l|(unsigned)r);
722}
723inline node_flags& operator |=(node_flags &l, node_flags r) {
724	l = l | r;
725	return l;
726}
727
728inline node_flags& operator &=(node_flags &l, node_flags r) {
729	l = (node_flags)((unsigned)l & (unsigned)r);
730	return l;
731}
732
733inline node_flags operator ~(node_flags r) {
734	return (node_flags)~(unsigned)r;
735}
736
737struct node_stats {
738	unsigned alu_count;
739	unsigned alu_kill_count;
740	unsigned alu_copy_mov_count;
741	unsigned cf_count;
742	unsigned fetch_count;
743	unsigned region_count;
744	unsigned loop_count;
745	unsigned phi_count;
746	unsigned loop_phi_count;
747	unsigned depart_count;
748	unsigned repeat_count;
749	unsigned if_count;
750       bool uses_ar;
751
752	node_stats() : alu_count(), alu_kill_count(), alu_copy_mov_count(),
753			cf_count(), fetch_count(), region_count(),
754			loop_count(), phi_count(), loop_phi_count(), depart_count(),
755                       repeat_count(), if_count(), uses_ar(false) {}
756
757	void dump();
758};
759
760class shader;
761
762class vpass;
763
764class container_node;
765class region_node;
766
767class node {
768
769protected:
770	node(node_type nt, node_subtype nst, node_flags flags = NF_EMPTY)
771	: prev(), next(), parent(),
772	  type(nt), subtype(nst), flags(flags),
773	  pred(), dst(), src() {}
774
775	virtual ~node() {};
776
777public:
778	node *prev, *next;
779	container_node *parent;
780
781	node_type type;
782	node_subtype subtype;
783	node_flags flags;
784
785	value *pred;
786
787	vvec dst;
788	vvec src;
789
790	virtual bool is_valid() { return true; }
791	virtual bool accept(vpass &p, bool enter);
792
793	void insert_before(node *n);
794	void insert_after(node *n);
795	void replace_with(node *n);
796	void remove();
797
798	virtual value_hash hash() const;
799	value_hash hash_src() const;
800
801	virtual bool fold_dispatch(expr_handler *ex);
802
803	bool is_container() { return flags & NF_CONTAINER; }
804
805	bool is_alu_packed() { return subtype == NST_ALU_PACKED_INST; }
806	bool is_alu_inst() { return subtype == NST_ALU_INST; }
807	bool is_alu_group() { return subtype == NST_ALU_GROUP; }
808	bool is_alu_clause() { return subtype == NST_ALU_CLAUSE; }
809
810	bool is_fetch_clause() {
811		return subtype == NST_TEX_CLAUSE || subtype == NST_VTX_CLAUSE || subtype == NST_GDS_CLAUSE;
812	}
813
814	bool is_copy() { return subtype == NST_COPY; }
815	bool is_copy_mov() { return flags & NF_COPY_MOV; }
816	bool is_any_alu() { return is_alu_inst() || is_alu_packed() || is_copy(); }
817
818	bool is_fetch_inst() { return subtype == NST_FETCH_INST; }
819	bool is_cf_inst() { return subtype == NST_CF_INST; }
820
821	bool is_region() { return type == NT_REGION; }
822	bool is_depart() { return type == NT_DEPART; }
823	bool is_repeat() { return type == NT_REPEAT; }
824	bool is_if() { return type == NT_IF; }
825	bool is_bb() { return subtype == NST_BB; }
826
827	bool is_phi() { return subtype == NST_PHI; }
828
829	bool is_dead() { return flags & NF_DEAD; }
830
831	bool is_cf_op(unsigned op);
832	bool is_alu_op(unsigned op);
833	bool is_fetch_op(unsigned op);
834
835	unsigned cf_op_flags();
836	unsigned alu_op_flags();
837	unsigned alu_op_slot_flags();
838	unsigned fetch_op_flags();
839
840	bool is_mova();
841	bool is_pred_set();
842
843	bool vec_uses_ar(vvec &vv) {
844		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
845			value *v = *I;
846			if (v && v->rel && !v->rel->is_const())
847				return true;
848		}
849		return false;
850	}
851
852	bool uses_ar() {
853		return vec_uses_ar(dst) || vec_uses_ar(src);
854	}
855
856	bool vec_uses_lds_oq(vvec &vv) {
857		for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; ++I) {
858			value *v = *I;
859			if (v && v->is_lds_oq())
860				return true;
861		}
862		return false;
863	}
864
865	bool consumes_lds_oq() {
866		return vec_uses_lds_oq(src);
867	}
868
869	bool produces_lds_oq() {
870		return vec_uses_lds_oq(dst);
871	}
872
873	region_node* get_parent_region();
874
875	friend class shader;
876};
877
878class container_node : public node {
879public:
880
881	container_node(node_type nt = NT_LIST, node_subtype nst = NST_LIST,
882	               node_flags flags = NF_EMPTY)
883	: node(nt, nst, flags | NF_CONTAINER), first(), last(),
884	  live_after(), live_before() {}
885
886	// child items list
887	node *first, *last;
888
889	val_set live_after;
890	val_set live_before;
891
892	class iterator {
893		node *p;
894	public:
895		iterator(node *pp = NULL) : p(pp) {}
896		iterator & operator ++() { p = p->next; return *this;}
897		iterator & operator --() { p = p->prev; return *this;}
898		node* operator *() { return p; }
899		node* operator ->() { return p; }
900		const iterator advance(int n) {
901			if (!n) return *this;
902			iterator I(p);
903			if (n > 0) while (n--) ++I;
904			else while (n++) --I;
905			return I;
906		}
907		const iterator operator +(int n) { return advance(n); }
908		const iterator operator -(int n) { return advance(-n); }
909		bool operator !=(const iterator &i) { return p != i.p; }
910		bool operator ==(const iterator &i) { return p == i.p; }
911	};
912
913	class riterator {
914		iterator i;
915	public:
916		riterator(node *p = NULL) : i(p) {}
917		riterator & operator ++() { --i; return *this;}
918		riterator & operator --() { ++i; return *this;}
919		node* operator *() { return *i; }
920		node* operator ->() { return *i; }
921		bool operator !=(const riterator &r) { return i != r.i; }
922		bool operator ==(const riterator &r) { return i == r.i; }
923	};
924
925	iterator begin() { return first; }
926	iterator end() { return NULL; }
927	riterator rbegin() { return last; }
928	riterator rend() { return NULL; }
929
930	bool empty() { assert(first != NULL || first == last); return !first; }
931	unsigned count();
932
933	// used with node containers that represent scheduling queues
934	// ignores copies and takes into account alu_packed_node items
935	unsigned real_alu_count();
936
937	void push_back(node *n);
938	void push_front(node *n);
939
940	void insert_node_before(node *s, node *n);
941	void insert_node_after(node *s, node *n);
942
943	void append_from(container_node *c);
944
945	// remove range [b..e) from some container and assign to this container
946	void move(iterator b, iterator e);
947
948	void expand();
949	void expand(container_node *n);
950	void remove_node(node *n);
951
952	node *cut(iterator b, iterator e);
953
954	void clear() { first = last = NULL; }
955
956	virtual bool is_valid() { return true; }
957	virtual bool accept(vpass &p, bool enter);
958	virtual bool fold_dispatch(expr_handler *ex);
959
960	node* front() { return first; }
961	node* back() { return last; }
962
963	void collect_stats(node_stats &s);
964
965	friend class shader;
966
967
968};
969
970typedef container_node::iterator node_iterator;
971typedef container_node::riterator node_riterator;
972
973class alu_group_node : public container_node {
974protected:
975	alu_group_node() : container_node(NT_LIST, NST_ALU_GROUP), literals() {}
976public:
977
978	std::vector<literal> literals;
979
980	virtual bool is_valid() { return subtype == NST_ALU_GROUP; }
981	virtual bool accept(vpass &p, bool enter);
982
983
984	unsigned literal_chan(literal l) {
985		std::vector<literal>::iterator F =
986				std::find(literals.begin(), literals.end(), l);
987		assert(F != literals.end());
988		return F - literals.begin();
989	}
990
991	friend class shader;
992};
993
994class cf_node : public container_node {
995protected:
996	cf_node() : container_node(NT_OP, NST_CF_INST), jump_target(),
997		jump_after_target() { memset(&bc, 0, sizeof(bc_cf)); };
998public:
999	bc_cf bc;
1000
1001	cf_node *jump_target;
1002	bool jump_after_target;
1003
1004	virtual bool is_valid() { return subtype == NST_CF_INST; }
1005	virtual bool accept(vpass &p, bool enter);
1006	virtual bool fold_dispatch(expr_handler *ex);
1007
1008	void jump(cf_node *c) { jump_target = c; jump_after_target = false; }
1009	void jump_after(cf_node *c) { jump_target = c; jump_after_target = true; }
1010
1011	friend class shader;
1012};
1013
1014class alu_node : public node {
1015protected:
1016	alu_node() : node(NT_OP, NST_ALU_INST) {  }
1017public:
1018	bc_alu bc;
1019
1020	virtual bool is_valid() { return subtype == NST_ALU_INST; }
1021	virtual bool accept(vpass &p, bool enter);
1022	virtual bool fold_dispatch(expr_handler *ex);
1023
1024	unsigned forced_bank_swizzle() {
1025		return ((bc.op_ptr->flags & AF_INTERP) &&
1026			((bc.slot_flags == AF_4V) ||
1027			 (bc.slot_flags == AF_2V))) ? VEC_210 : 0;
1028	}
1029
1030	// return param index + 1 if instruction references interpolation param,
1031	// otherwise 0
1032	unsigned interp_param();
1033
1034	alu_group_node *get_alu_group_node();
1035
1036	friend class shader;
1037};
1038
1039// for multi-slot instrs - DOT/INTERP/... (maybe useful for 64bit pairs later)
1040class alu_packed_node : public container_node {
1041protected:
1042	alu_packed_node() : container_node(NT_OP, NST_ALU_PACKED_INST) {}
1043public:
1044
1045	const alu_op_info* op_ptr() {
1046		return static_cast<alu_node*>(first)->bc.op_ptr;
1047	}
1048	unsigned op() { return static_cast<alu_node*>(first)->bc.op; }
1049	void init_args(bool repl);
1050
1051	virtual bool is_valid() { return subtype == NST_ALU_PACKED_INST; }
1052	virtual bool accept(vpass &p, bool enter);
1053	virtual bool fold_dispatch(expr_handler *ex);
1054
1055	unsigned get_slot_mask();
1056	void update_packed_items(sb_context &ctx);
1057
1058	friend class shader;
1059};
1060
1061class fetch_node : public node {
1062protected:
1063	fetch_node() : node(NT_OP, NST_FETCH_INST) { memset(&bc, 0, sizeof(bc_fetch)); };
1064public:
1065	bc_fetch bc;
1066
1067	virtual bool is_valid() { return subtype == NST_FETCH_INST; }
1068	virtual bool accept(vpass &p, bool enter);
1069	virtual bool fold_dispatch(expr_handler *ex);
1070
1071	bool uses_grad() { return bc.op_ptr->flags & FF_USEGRAD; }
1072
1073	friend class shader;
1074};
1075
1076class region_node;
1077
1078class repeat_node : public container_node {
1079protected:
1080	repeat_node(region_node *target, unsigned id)
1081	: container_node(NT_REPEAT, NST_LIST), target(target), rep_id(id) {}
1082public:
1083	region_node *target;
1084	unsigned rep_id;
1085
1086	virtual bool accept(vpass &p, bool enter);
1087
1088	friend class shader;
1089};
1090
1091class depart_node : public container_node {
1092protected:
1093	depart_node(region_node *target, unsigned id)
1094	: container_node(NT_DEPART, NST_LIST), target(target), dep_id(id) {}
1095public:
1096	region_node *target;
1097	unsigned dep_id;
1098
1099	virtual bool accept(vpass &p, bool enter);
1100
1101	friend class shader;
1102};
1103
1104class if_node : public container_node {
1105protected:
1106	if_node() : container_node(NT_IF, NST_LIST), cond() {};
1107public:
1108	value *cond; // glued to pseudo output (dst[2]) of the PRED_SETxxx
1109
1110	virtual bool accept(vpass &p, bool enter);
1111
1112	friend class shader;
1113};
1114
1115typedef std::vector<depart_node*> depart_vec;
1116typedef std::vector<repeat_node*> repeat_vec;
1117
1118class region_node : public container_node {
1119protected:
1120	region_node(unsigned id) : container_node(NT_REGION, NST_LIST), region_id(id),
1121			loop_phi(), phi(), vars_defined(), departs(), repeats(), src_loop()
1122			{}
1123public:
1124	unsigned region_id;
1125
1126	container_node *loop_phi;
1127	container_node *phi;
1128
1129	val_set vars_defined;
1130
1131	depart_vec departs;
1132	repeat_vec repeats;
1133
1134	// true if region was created for loop in the parser, sometimes repeat_node
1135	// may be optimized away so we need to remember this information
1136	bool src_loop;
1137
1138	virtual bool accept(vpass &p, bool enter);
1139
1140	unsigned dep_count() { return departs.size(); }
1141	unsigned rep_count() { return repeats.size() + 1; }
1142
1143	bool is_loop() { return src_loop || !repeats.empty(); }
1144
1145	container_node* get_entry_code_location() {
1146		node *p = first;
1147		while (p && (p->is_depart() || p->is_repeat()))
1148			p = static_cast<container_node*>(p)->first;
1149
1150		container_node *c = static_cast<container_node*>(p);
1151		if (c->is_bb())
1152			return c;
1153		else
1154			return c->parent;
1155	}
1156
1157	void expand_depart(depart_node *d);
1158	void expand_repeat(repeat_node *r);
1159
1160	friend class shader;
1161};
1162
1163class bb_node : public container_node {
1164protected:
1165	bb_node(unsigned id, unsigned loop_level)
1166		: container_node(NT_LIST, NST_BB), id(id), loop_level(loop_level) {}
1167public:
1168	unsigned id;
1169	unsigned loop_level;
1170
1171	virtual bool accept(vpass &p, bool enter);
1172
1173	friend class shader;
1174};
1175
1176
1177typedef std::vector<region_node*> regions_vec;
1178typedef std::vector<bb_node*> bbs_vec;
1179typedef std::list<node*> sched_queue;
1180typedef sched_queue::iterator sq_iterator;
1181typedef std::vector<node*> node_vec;
1182typedef std::list<node*> node_list;
1183typedef std::set<node*> node_set;
1184
1185
1186
1187} // namespace r600_sb
1188
1189#endif /* R600_SB_IR_H_ */
1190