1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#ifndef SB_SHADER_H_
28#define SB_SHADER_H_
29
30#include <list>
31#include <string>
32#include <map>
33
34#include "sb_ir.h"
35#include "sb_expr.h"
36
37namespace r600_sb {
38
39struct shader_input {
40	unsigned comp_mask;
41	unsigned preloaded;
42};
43
44struct error_info {
45	node *n;
46	unsigned arg_index;
47	std::string message;
48};
49
50typedef std::multimap<node*, error_info> error_map;
51
52class sb_context;
53
54typedef std::vector<shader_input> inputs_vec;
55typedef std::vector<gpr_array*> gpr_array_vec;
56
57struct ra_edge {
58	value *a, *b;
59	unsigned cost;
60
61	ra_edge(value *a, value *b, unsigned cost) : a(a), b(b), cost(cost) {}
62};
63
64enum chunk_flags {
65	RCF_GLOBAL = (1 << 0),
66	RCF_PIN_CHAN = (1 << 1),
67	RCF_PIN_REG = (1 << 2),
68
69	RCF_FIXED = (1 << 3),
70
71	RCF_PREALLOC = (1 << 4)
72};
73
74enum dce_flags {
75	DF_REMOVE_DEAD  = (1 << 0),
76	DF_REMOVE_UNUSED = (1 << 1),
77	DF_EXPAND = (1 << 2),
78};
79
80inline dce_flags operator |(dce_flags l, dce_flags r) {
81	return (dce_flags)((unsigned)l|(unsigned)r);
82}
83
84inline chunk_flags operator |(chunk_flags l, chunk_flags r) {
85	return (chunk_flags)((unsigned)l|(unsigned)r);
86}
87inline chunk_flags& operator |=(chunk_flags &l, chunk_flags r) {
88	l = l | r;
89	return l;
90}
91
92inline chunk_flags& operator &=(chunk_flags &l, chunk_flags r) {
93	l = (chunk_flags)((unsigned)l & (unsigned)r);
94	return l;
95}
96
97inline chunk_flags operator ~(chunk_flags r) {
98	return (chunk_flags)~(unsigned)r;
99}
100
101struct ra_chunk {
102	vvec values;
103	chunk_flags flags;
104	unsigned cost;
105	sel_chan pin;
106
107	ra_chunk() : values(), flags(), cost(), pin() {}
108
109	bool is_fixed() { return flags & RCF_FIXED; }
110	void fix() { flags |= RCF_FIXED; }
111
112	bool is_global() { return flags & RCF_GLOBAL; }
113	void set_global() {	flags |= RCF_GLOBAL; }
114
115	bool is_reg_pinned() { return flags & RCF_PIN_REG; }
116	bool is_chan_pinned() { return flags & RCF_PIN_CHAN; }
117
118	bool is_prealloc() { return flags & RCF_PREALLOC; }
119	void set_prealloc() { flags |= RCF_PREALLOC; }
120};
121
122typedef std::vector<ra_chunk*> chunk_vector;
123
124class ra_constraint {
125public:
126	ra_constraint(constraint_kind kind) : kind(kind), cost(0) {}
127
128	constraint_kind kind;
129	vvec values;
130	unsigned cost;
131
132	void update_values();
133	bool check();
134};
135
136typedef std::vector<ra_constraint*> constraint_vec;
137typedef std::vector<ra_chunk*> chunk_vec;
138
139// priority queue
140// FIXME use something more suitale or custom class ?
141
142template <class T>
143struct cost_compare {
144	bool operator ()(const T& t1, const T& t2) {
145		return t1->cost > t2->cost;
146	}
147};
148
149template <class T, class Comp>
150class queue {
151	typedef std::vector<T> container;
152	container cont;
153
154public:
155	queue() : cont() {}
156
157	typedef typename container::iterator iterator;
158
159	iterator begin() { return cont.begin(); }
160	iterator end() { return cont.end(); }
161
162	iterator insert(const T& t) {
163		iterator I = std::upper_bound(begin(), end(), t, Comp());
164		if (I == end())
165			cont.push_back(t);
166		else
167			cont.insert(I, t);
168
169		return I;
170	}
171
172	void erase(const T& t) {
173		std::pair<iterator, iterator> R =
174				std::equal_range(begin(), end(), t, Comp());
175		iterator F = std::find(R.first, R.second, t);
176		if (F != R.second)
177			cont.erase(F);
178	}
179};
180
181typedef queue<ra_chunk*, cost_compare<ra_chunk*> > chunk_queue;
182typedef queue<ra_edge*, cost_compare<ra_edge*> > edge_queue;
183typedef queue<ra_constraint*, cost_compare<ra_constraint*> > constraint_queue;
184
185typedef std::set<ra_chunk*> chunk_set;
186
187class shader;
188
189class coalescer {
190
191	shader &sh;
192
193	edge_queue edges;
194	chunk_queue chunks;
195	constraint_queue constraints;
196
197	constraint_vec all_constraints;
198	chunk_vec all_chunks;
199
200public:
201
202	coalescer(shader &sh) : sh(sh), edges(), chunks(), constraints() {}
203	~coalescer();
204
205	int run();
206
207	void add_edge(value *a, value *b, unsigned cost);
208	void build_chunks();
209	void build_constraint_queue();
210	void build_chunk_queue();
211	int color_constraints();
212	int color_chunks();
213
214	ra_constraint* create_constraint(constraint_kind kind);
215
216	enum ac_cost {
217		phi_cost = 10000,
218		copy_cost = 1,
219	};
220
221	void dump_edges();
222	void dump_chunks();
223	void dump_constraint_queue();
224
225	static void dump_chunk(ra_chunk *c);
226	static void dump_constraint(ra_constraint* c);
227
228	void get_chunk_interferences(ra_chunk *c, val_set &s);
229
230private:
231
232	void create_chunk(value *v);
233	void unify_chunks(ra_edge *e);
234	bool chunks_interference(ra_chunk *c1, ra_chunk *c2);
235
236	int color_reg_constraint(ra_constraint *c);
237	void color_phi_constraint(ra_constraint *c);
238
239
240	void init_reg_bitset(sb_bitset &bs, val_set &vs);
241
242	void color_chunk(ra_chunk *c, sel_chan color);
243
244	ra_chunk* detach_value(value *v);
245};
246
247
248
249class shader {
250
251	sb_context &ctx;
252
253	typedef sb_map<uint32_t, value*> value_map;
254	value_map reg_values;
255
256	// read-only values
257	value_map const_values; // immediate constants key -const  value (uint32_t)
258	value_map special_ro_values; //  key - hw alu_sel & chan
259	value_map kcache_values;
260
261	gpr_array_vec gpr_arrays;
262
263	unsigned next_temp_value_index;
264
265	unsigned prep_regs_count;
266
267	value* pred_sels[2];
268
269	regions_vec regions;
270	inputs_vec inputs;
271
272	value *undef;
273
274	sb_value_pool val_pool;
275	sb_pool pool;
276
277	std::vector<node*> all_nodes;
278
279public:
280	shader_stats src_stats, opt_stats;
281
282	error_map errors;
283
284	bool optimized;
285
286	unsigned id;
287
288	coalescer coal;
289
290	static const unsigned temp_regid_offset = 512;
291
292	bbs_vec bbs;
293
294	const shader_target target;
295
296	value_table vt;
297	expr_handler ex;
298
299	container_node *root;
300
301	bool compute_interferences;
302
303	bool has_alu_predication;
304	bool uses_gradients;
305
306	bool safe_math;
307
308	unsigned ngpr, nstack;
309
310	unsigned dce_flags;
311
312	shader(sb_context &sctx, shader_target t, unsigned id);
313
314	~shader();
315
316	sb_context &get_ctx() const { return ctx; }
317
318	value* get_const_value(const literal & v);
319	value* get_special_value(unsigned sv_id, unsigned version = 0);
320	value* create_temp_value();
321	value* get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel,
322                         unsigned version = 0);
323
324
325	value* get_special_ro_value(unsigned sel);
326	value* get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode);
327
328	value* get_value_version(value* v, unsigned ver);
329
330	void init();
331	void add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, bool src);
332
333	void dump_ir();
334
335	void add_gpr_array(unsigned gpr_start, unsigned gpr_count,
336	                   unsigned comp_mask);
337
338	value* get_pred_sel(int sel);
339	bool assign_slot(alu_node *n, alu_node *slots[5]);
340
341	gpr_array* get_gpr_array(unsigned reg, unsigned chan);
342
343	void add_input(unsigned gpr, bool preloaded = false,
344	               unsigned comp_mask = 0xF);
345
346	const inputs_vec & get_inputs() {return inputs; }
347
348	regions_vec & get_regions() { return regions; }
349
350	void init_call_fs(cf_node *cf);
351
352	value *get_undef_value();
353	void set_undef(val_set &s);
354
355	node* create_node(node_type nt, node_subtype nst,
356	                  node_flags flags = NF_EMPTY);
357	alu_node* create_alu();
358	alu_group_node* create_alu_group();
359	alu_packed_node* create_alu_packed();
360	cf_node* create_cf();
361	cf_node* create_cf(unsigned op);
362	fetch_node* create_fetch();
363	region_node* create_region();
364	depart_node* create_depart(region_node *target);
365	repeat_node* create_repeat(region_node *target);
366	container_node* create_container(node_type nt = NT_LIST,
367	                                 node_subtype nst = NST_LIST,
368	                                 node_flags flags = NF_EMPTY);
369	if_node* create_if();
370	bb_node* create_bb(unsigned id, unsigned loop_level);
371
372	value* get_value_by_uid(unsigned id) { return val_pool[id - 1]; }
373
374	cf_node* create_clause(node_subtype nst);
375
376	void create_bbs();
377	void expand_bbs();
378
379	alu_node* create_mov(value* dst, value* src);
380	alu_node* create_copy_mov(value *dst, value *src, unsigned affcost = 1);
381
382	const char * get_shader_target_name();
383
384	std::string get_full_target_name();
385
386	void create_bbs(container_node* n, bbs_vec &bbs, int loop_level = 0);
387	void expand_bbs(bbs_vec &bbs);
388
389	sched_queue_id get_queue_id(node* n);
390
391	void simplify_dep_rep(node *dr);
392
393	unsigned first_temp_gpr();
394	unsigned num_nontemp_gpr();
395
396	gpr_array_vec& arrays() { return gpr_arrays; }
397
398	void set_uses_kill();
399
400	void fill_array_values(gpr_array *a, vvec &vv);
401
402	alu_node* clone(alu_node *n);
403
404	sb_value_pool& get_value_pool() { return val_pool; }
405
406	void collect_stats(bool opt);
407
408private:
409	value* create_value(value_kind k, sel_chan regid, unsigned ver);
410	value* get_value(value_kind kind, sel_chan id,
411	                         unsigned version = 0);
412	value* get_ro_value(value_map &vm, value_kind vk, unsigned key);
413};
414
415}
416
417#endif /* SHADER_H_ */
418