1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#ifndef SB_SCHED_H_
28#define SB_SCHED_H_
29
30namespace r600_sb {
31
32typedef sb_map<node*, unsigned> uc_map;
33
34// resource trackers for scheduler
35// rp = read port
36// uc = use count
37
38typedef sb_set<unsigned> kc_lines;
39
40class rp_kcache_tracker {
41	unsigned rp[4];
42	unsigned uc[4];
43	const unsigned sel_count;
44
45	unsigned kc_sel(sel_chan r) {
46		return sel_count == 4 ? (unsigned)r : ((r - 1) >> 1) + 1;
47	}
48
49public:
50	rp_kcache_tracker(shader &sh);
51
52	bool try_reserve(node *n);
53	void unreserve(node *n);
54
55
56	bool try_reserve(sel_chan r);
57	void unreserve(sel_chan r);
58
59	void reset();
60
61	unsigned num_sels() { return !!rp[0] + !!rp[1] + !!rp[2] + !!rp[3]; }
62
63	unsigned get_lines(kc_lines &lines);
64};
65
66class literal_tracker {
67	literal lt[4];
68	unsigned uc[4];
69
70public:
71	literal_tracker() : lt(), uc() {}
72
73	bool try_reserve(alu_node *n);
74	void unreserve(alu_node *n);
75
76	bool try_reserve(literal l);
77	void unreserve(literal l);
78
79	void reset();
80
81	unsigned count() { return !!uc[0] + !!uc[1] + !!uc[2] + !!uc[3]; }
82
83	void init_group_literals(alu_group_node *g);
84
85};
86
87class rp_gpr_tracker {
88	// rp[cycle][elem]
89	unsigned rp[3][4];
90	unsigned uc[3][4];
91
92public:
93	rp_gpr_tracker() : rp(), uc() {}
94
95	bool try_reserve(alu_node *n);
96	void unreserve(alu_node *n);
97
98	bool try_reserve(unsigned cycle, unsigned sel, unsigned chan);
99	void unreserve(unsigned cycle, unsigned sel, unsigned chan);
100
101	void reset();
102
103	void dump();
104};
105
106class alu_group_tracker {
107
108	shader &sh;
109
110	rp_kcache_tracker kc;
111	rp_gpr_tracker gpr;
112	literal_tracker lt;
113
114	alu_node * slots[5];
115
116	unsigned available_slots;
117
118	unsigned max_slots;
119
120	typedef std::map<value*, unsigned> value_index_map;
121
122	value_index_map vmap;
123
124	bool has_mova;
125	bool uses_ar;
126	bool has_predset;
127	bool has_kill;
128	bool updates_exec_mask;
129
130	bool consumes_lds_oqa;
131	bool produces_lds_oqa;
132	unsigned chan_count[4];
133
134	// param index + 1 (0 means that group doesn't refer to Params)
135	// we can't use more than one param index in a group
136	unsigned interp_param;
137
138	unsigned next_id;
139
140	node_vec packed_ops;
141
142	void assign_slot(unsigned slot, alu_node *n);
143
144public:
145	alu_group_tracker(shader &sh);
146
147	// FIXME use fast bs correctness check (values for same chan <= 3) ??
148	bool try_reserve(alu_node *n);
149	bool try_reserve(alu_packed_node *p);
150
151	void reinit();
152	void reset(bool keep_packed = false);
153
154	sel_chan get_value_id(value *v);
155	void update_flags(alu_node *n);
156
157	alu_node* slot(unsigned i) { return slots[i]; }
158
159	unsigned used_slots() {
160		return (~available_slots) & ((1 << max_slots) - 1);
161	}
162
163	unsigned inst_count() {
164		return __builtin_popcount(used_slots());
165	}
166
167	unsigned literal_count() { return lt.count(); }
168	unsigned literal_slot_count() { return (literal_count() + 1) >> 1; };
169	unsigned slot_count() { return inst_count() + literal_slot_count(); }
170
171	bool get_consumes_lds_oqa() { return consumes_lds_oqa; }
172	bool get_produces_lds_oqa() { return produces_lds_oqa; }
173	alu_group_node* emit();
174
175	rp_kcache_tracker& kcache() { return kc; }
176
177	bool has_update_exec_mask() { return updates_exec_mask; }
178	unsigned avail_slots() { return available_slots; }
179
180	void discard_all_slots(container_node &removed_nodes);
181	void discard_slots(unsigned slot_mask, container_node &removed_nodes);
182
183	bool has_ar_load() { return has_mova; }
184};
185
186class alu_kcache_tracker {
187	bc_kcache kc[4];
188	sb_set<unsigned> lines;
189	unsigned max_kcs;
190
191public:
192
193	alu_kcache_tracker(sb_hw_class hc)
194		: kc(), lines(), max_kcs(hc >= HW_CLASS_EVERGREEN ? 4 : 2) {}
195
196	void reset();
197	bool try_reserve(alu_group_tracker &gt);
198	bool update_kc();
199	void init_clause(bc_cf &bc) {
200		memcpy(bc.kc, kc, sizeof(kc));
201	}
202};
203
204class alu_clause_tracker {
205	shader &sh;
206
207	alu_kcache_tracker kt;
208	unsigned slot_count;
209
210	alu_group_tracker grp0;
211	alu_group_tracker grp1;
212
213	unsigned group;
214
215	cf_node *clause;
216
217	bool push_exec_mask;
218
219	unsigned outstanding_lds_oqa_reads;
220public:
221	container_node conflict_nodes;
222
223	// current values of AR and PR registers that we have to preload
224	// till the end of clause (in fact, beginning, because we're scheduling
225	// bottom-up)
226	value *current_ar;
227	value *current_pr;
228	// current values of CF_IDX registers that need preloading
229	value *current_idx[2];
230
231	alu_clause_tracker(shader &sh);
232
233	void reset();
234
235	// current group
236	alu_group_tracker& grp() { return group ? grp1 : grp0; }
237	// previous group
238	alu_group_tracker& prev_grp() { return group ? grp0 : grp1; }
239
240	void emit_group();
241	void emit_clause(container_node *c);
242	bool check_clause_limits();
243	void new_group();
244	bool is_empty();
245
246	alu_node* create_ar_load(value *v, chan_select ar_channel);
247
248	void discard_current_group();
249
250	unsigned total_slots() { return slot_count; }
251};
252
253class post_scheduler : public pass {
254
255	container_node ready, ready_copies; // alu only
256	container_node pending, bb_pending;
257	bb_node *cur_bb;
258	val_set live; // values live at the end of the alu clause
259	uc_map ucm;
260	alu_clause_tracker alu;
261
262	typedef std::map<sel_chan, value*> rv_map;
263	rv_map regmap, prev_regmap;
264
265	val_set cleared_interf;
266
267	void emit_index_registers();
268public:
269
270	post_scheduler(shader &sh) : pass(sh),
271		ready(), ready_copies(), pending(), cur_bb(),
272		live(), ucm(), alu(sh),	regmap(), cleared_interf() {}
273
274	virtual int run();
275	bool run_on(container_node *n);
276	bool schedule_bb(bb_node *bb);
277
278	void load_index_register(value *v, unsigned idx);
279	void process_fetch(container_node *c);
280
281	bool process_alu(container_node *c);
282	bool schedule_alu(container_node *c);
283	bool prepare_alu_group();
284
285	void release_op(node *n);
286
287	void release_src_values(node *n);
288	void release_src_vec(vvec &vv, bool src);
289	void release_src_val(value *v);
290
291	void init_uc_val(container_node *c, value *v);
292	void init_uc_vec(container_node *c, vvec &vv, bool src);
293	unsigned init_ucm(container_node *c, node *n);
294
295	void init_regmap();
296
297	bool check_interferences();
298
299	unsigned try_add_instruction(node *n);
300
301	bool check_copy(node *n);
302	void dump_group(alu_group_tracker &rt);
303
304	bool unmap_dst(alu_node *n);
305	bool unmap_dst_val(value *d);
306
307	bool map_src(alu_node *n);
308	bool map_src_vec(vvec &vv, bool src);
309	bool map_src_val(value *v);
310
311	bool recolor_local(value *v);
312
313	void update_local_interferences();
314	void update_live_src_vec(vvec &vv, val_set *born, bool src);
315	void update_live_dst_vec(vvec &vv);
316	void update_live(node *n, val_set *born);
317	void process_group();
318
319	void set_color_local_val(value *v, sel_chan color);
320	void set_color_local(value *v, sel_chan color);
321
322	void add_interferences(value *v, sb_bitset &rb, val_set &vs);
323
324	void init_globals(val_set &s, bool prealloc);
325
326	void recolor_locals();
327
328	void dump_regmap();
329
330	void emit_load_ar();
331	void emit_clause();
332
333	void process_ready_copies();
334};
335
336} // namespace r600_sb
337
338#endif /* SB_SCHED_H_ */
339