1/*
2 * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 *      Vadim Girlin
25 */
26
27#ifndef SB_BC_H_
28#define SB_BC_H_
29
30#include <stdint.h>
31#include "r600_isa.h"
32
33#include <cstdio>
34#include <string>
35#include <vector>
36#include <stack>
37
38struct r600_bytecode;
39struct r600_shader;
40
41namespace r600_sb {
42
43class hw_encoding_format;
44class node;
45class alu_node;
46class cf_node;
47class fetch_node;
48class alu_group_node;
49class region_node;
50class shader;
51class value;
52
53class sb_ostream {
54public:
55	sb_ostream() {}
56
57	virtual void write(const char *s) = 0;
58
59	sb_ostream& operator <<(const char *s) {
60		write(s);
61		return *this;
62	}
63
64	sb_ostream& operator <<(const std::string& s) {
65		return *this << s.c_str();
66	}
67
68	sb_ostream& operator <<(void *p) {
69		char b[32];
70		sprintf(b, "%p", p);
71		return *this << b;
72	}
73
74	sb_ostream& operator <<(char c) {
75		char b[2];
76		sprintf(b, "%c", c);
77		return *this << b;
78	}
79
80	sb_ostream& operator <<(int n) {
81		char b[32];
82		sprintf(b, "%d", n);
83		return *this << b;
84	}
85
86	sb_ostream& operator <<(unsigned n) {
87		char b[32];
88		sprintf(b, "%u", n);
89		return *this << b;
90	}
91
92	sb_ostream& operator <<(double d) {
93		char b[32];
94		snprintf(b, 32, "%g", d);
95		return *this << b;
96	}
97
98	// print as field of specified width, right aligned
99	void print_w(int n, int width) {
100		char b[256],f[8];
101		sprintf(f, "%%%dd", width);
102		snprintf(b, 256, f, n);
103		write(b);
104	}
105
106	// print as field of specified width, left aligned
107	void print_wl(int n, int width) {
108		char b[256],f[8];
109		sprintf(f, "%%-%dd", width);
110		snprintf(b, 256, f, n);
111		write(b);
112	}
113
114	// print as field of specified width, left aligned
115	void print_wl(const std::string &s, int width) {
116		write(s.c_str());
117		int l = s.length();
118		while (l++ < width) {
119			write(" ");
120		}
121	}
122
123	// print int as field of specified width, right aligned, zero-padded
124	void print_zw(int n, int width) {
125		char b[256],f[8];
126		sprintf(f, "%%0%dd", width);
127		snprintf(b, 256, f, n);
128		write(b);
129	}
130
131	// print int as field of specified width, right aligned, zero-padded, hex
132	void print_zw_hex(int n, int width) {
133		char b[256],f[8];
134		sprintf(f, "%%0%dx", width);
135		snprintf(b, 256, f, n);
136		write(b);
137	}
138};
139
140class sb_ostringstream : public sb_ostream {
141	std::string data;
142public:
143	sb_ostringstream() : data() {}
144
145	virtual void write(const char *s) {
146		data += s;
147	}
148
149	void clear() { data.clear(); }
150
151	const char* c_str() { return data.c_str(); }
152	std::string& str() { return data; }
153};
154
155class sb_log : public sb_ostream {
156	FILE *o;
157public:
158	sb_log() : o(stderr) {}
159
160	virtual void write(const char *s) {
161		fputs(s, o);
162	}
163};
164
165extern sb_log sblog;
166
167enum shader_target
168{
169	TARGET_UNKNOWN,
170	TARGET_VS,
171	TARGET_ES,
172	TARGET_PS,
173	TARGET_GS,
174	TARGET_GS_COPY,
175	TARGET_COMPUTE,
176	TARGET_FETCH,
177	TARGET_HS,
178	TARGET_LS,
179
180	TARGET_NUM
181};
182
183enum sb_hw_class_bits
184{
185	HB_R6	= (1<<0),
186	HB_R7	= (1<<1),
187	HB_EG	= (1<<2),
188	HB_CM	= (1<<3),
189
190	HB_R6R7 = (HB_R6 | HB_R7),
191	HB_EGCM = (HB_EG | HB_CM),
192	HB_R6R7EG = (HB_R6 | HB_R7 | HB_EG),
193	HB_R7EGCM = (HB_R7 | HB_EG | HB_CM),
194
195	HB_ALL = (HB_R6 | HB_R7 | HB_EG | HB_CM)
196};
197
198enum sb_hw_chip
199{
200	HW_CHIP_UNKNOWN,
201	HW_CHIP_R600,
202	HW_CHIP_RV610,
203	HW_CHIP_RV630,
204	HW_CHIP_RV670,
205	HW_CHIP_RV620,
206	HW_CHIP_RV635,
207	HW_CHIP_RS780,
208	HW_CHIP_RS880,
209	HW_CHIP_RV770,
210	HW_CHIP_RV730,
211	HW_CHIP_RV710,
212	HW_CHIP_RV740,
213	HW_CHIP_CEDAR,
214	HW_CHIP_REDWOOD,
215	HW_CHIP_JUNIPER,
216	HW_CHIP_CYPRESS,
217	HW_CHIP_HEMLOCK,
218	HW_CHIP_PALM,
219	HW_CHIP_SUMO,
220	HW_CHIP_SUMO2,
221	HW_CHIP_BARTS,
222	HW_CHIP_TURKS,
223	HW_CHIP_CAICOS,
224	HW_CHIP_CAYMAN,
225	HW_CHIP_ARUBA
226};
227
228enum sb_hw_class
229{
230	HW_CLASS_UNKNOWN,
231	HW_CLASS_R600,
232	HW_CLASS_R700,
233	HW_CLASS_EVERGREEN,
234	HW_CLASS_CAYMAN
235};
236
237enum alu_slots {
238	SLOT_X = 0,
239	SLOT_Y = 1,
240	SLOT_Z = 2,
241	SLOT_W = 3,
242	SLOT_TRANS = 4
243};
244
245enum misc_consts {
246	MAX_ALU_LITERALS = 4,
247	MAX_ALU_SLOTS = 128,
248	MAX_GPR = 128,
249	MAX_CHAN = 4
250
251};
252
253enum alu_src_sel {
254
255	ALU_SRC_LDS_OQ_A = 219,
256	ALU_SRC_LDS_OQ_B = 220,
257	ALU_SRC_LDS_OQ_A_POP = 221,
258	ALU_SRC_LDS_OQ_B_POP = 222,
259	ALU_SRC_LDS_DIRECT_A = 223,
260	ALU_SRC_LDS_DIRECT_B = 224,
261	ALU_SRC_TIME_HI = 227,
262	ALU_SRC_TIME_LO = 228,
263	ALU_SRC_MASK_HI = 229,
264	ALU_SRC_MASK_LO = 230,
265	ALU_SRC_HW_WAVE_ID = 231,
266	ALU_SRC_SIMD_ID = 232,
267	ALU_SRC_SE_ID = 233,
268	ALU_SRC_HW_THREADGRP_ID = 234,
269	ALU_SRC_WAVE_ID_IN_GRP = 235,
270	ALU_SRC_NUM_THREADGRP_WAVES = 236,
271	ALU_SRC_HW_ALU_ODD = 237,
272	ALU_SRC_LOOP_IDX = 238,
273	ALU_SRC_PARAM_BASE_ADDR = 240,
274	ALU_SRC_NEW_PRIM_MASK = 241,
275	ALU_SRC_PRIM_MASK_HI = 242,
276	ALU_SRC_PRIM_MASK_LO = 243,
277	ALU_SRC_1_DBL_L = 244,
278	ALU_SRC_1_DBL_M = 245,
279	ALU_SRC_0_5_DBL_L = 246,
280	ALU_SRC_0_5_DBL_M = 247,
281	ALU_SRC_0 = 248,
282	ALU_SRC_1 = 249,
283	ALU_SRC_1_INT = 250,
284	ALU_SRC_M_1_INT = 251,
285	ALU_SRC_0_5 = 252,
286	ALU_SRC_LITERAL = 253,
287	ALU_SRC_PV = 254,
288	ALU_SRC_PS = 255,
289
290	ALU_SRC_PARAM_OFFSET = 448
291};
292
293enum alu_predicate_select
294{
295	PRED_SEL_OFF	= 0,
296//	RESERVED		= 1,
297	PRED_SEL_0		= 2,
298	PRED_SEL_1		= 3
299};
300
301
302enum alu_omod {
303	OMOD_OFF  = 0,
304	OMOD_M2   = 1,
305	OMOD_M4   = 2,
306	OMOD_D2   = 3
307};
308
309enum alu_index_mode {
310	INDEX_AR_X        = 0,
311	INDEX_AR_Y_R600   = 1,
312	INDEX_AR_Z_R600   = 2,
313	INDEX_AR_W_R600   = 3,
314
315	INDEX_LOOP        = 4,
316	INDEX_GLOBAL      = 5,
317	INDEX_GLOBAL_AR_X = 6
318};
319
320enum alu_cayman_mova_dst {
321	CM_MOVADST_AR_X,
322	CM_MOVADST_PC,
323	CM_MOVADST_IDX0,
324	CM_MOVADST_IDX1,
325	CM_MOVADST_CG0,		// clause-global byte 0
326	CM_MOVADST_CG1,
327	CM_MOVADST_CG2,
328	CM_MOVADST_CG3
329};
330
331enum alu_cayman_exec_mask_op {
332	CM_EMO_DEACTIVATE,
333	CM_EMO_BREAK,
334	CM_EMO_CONTINUE,
335	CM_EMO_KILL
336};
337
338
339enum cf_exp_type {
340	EXP_PIXEL,
341	EXP_POS,
342	EXP_PARAM,
343
344	EXP_TYPE_COUNT
345};
346
347enum cf_mem_type {
348	MEM_WRITE,
349	MEM_WRITE_IND,
350	MEM_WRITE_ACK,
351	MEM_WRITE_IND_ACK
352};
353
354
355enum alu_kcache_mode {
356	KC_LOCK_NONE,
357	KC_LOCK_1,
358	KC_LOCK_2,
359	KC_LOCK_LOOP
360};
361
362enum alu_kcache_index_mode {
363	KC_INDEX_NONE,
364	KC_INDEX_0,
365	KC_INDEX_1,
366	KC_INDEX_INVALID
367};
368
369enum chan_select {
370	SEL_X	= 0,
371	SEL_Y	= 1,
372	SEL_Z	= 2,
373	SEL_W	= 3,
374	SEL_0	= 4,
375	SEL_1	= 5,
376//	RESERVED = 6,
377	SEL_MASK = 7
378};
379
380enum bank_swizzle {
381	VEC_012 = 0,
382	VEC_021 = 1,
383	VEC_120 = 2,
384	VEC_102 = 3,
385	VEC_201 = 4,
386	VEC_210 = 5,
387
388	VEC_NUM = 6,
389
390	SCL_210 = 0,
391	SCL_122 = 1,
392	SCL_212 = 2,
393	SCL_221 = 3,
394
395	SCL_NUM = 4
396
397};
398
399enum sched_queue_id {
400	SQ_CF,
401	SQ_ALU,
402	SQ_TEX,
403	SQ_VTX,
404	SQ_GDS,
405
406	SQ_NUM
407};
408
409struct literal {
410	union {
411		int32_t i;
412		uint32_t u;
413		float f;
414	};
415
416	literal(int32_t i = 0) : i(i) {}
417	literal(uint32_t u) : u(u) {}
418	literal(float f) : f(f) {}
419	literal(double f) : f(f) {}
420	operator uint32_t() const { return u; }
421	bool operator ==(literal l) { return u == l.u; }
422	bool operator ==(int v_int) { return i == v_int; }
423	bool operator ==(unsigned v_uns) { return u == v_uns; }
424};
425
426struct bc_kcache {
427	unsigned mode;
428	unsigned bank;
429	unsigned addr;
430	unsigned index_mode;
431} ;
432
433// TODO optimize bc structures
434
435struct bc_cf {
436
437	bc_kcache kc[4];
438
439	unsigned id;
440
441
442	const cf_op_info * op_ptr;
443	unsigned op;
444
445	unsigned addr:32;
446
447	unsigned alt_const:1;
448	unsigned uses_waterfall:1;
449
450	unsigned barrier:1;
451	unsigned count:7;
452	unsigned pop_count:3;
453	unsigned call_count:6;
454	unsigned whole_quad_mode:1;
455	unsigned valid_pixel_mode:1;
456
457	unsigned jumptable_sel:3;
458	unsigned cf_const:5;
459	unsigned cond:2;
460	unsigned end_of_program:1;
461
462	unsigned array_base:13;
463	unsigned elem_size:2;
464	unsigned index_gpr:7;
465	unsigned rw_gpr:7;
466	unsigned rw_rel:1;
467	unsigned type:2;
468
469	unsigned burst_count:4;
470	unsigned mark:1;
471	unsigned sel[4];
472
473	unsigned array_size:12;
474	unsigned comp_mask:4;
475
476	unsigned rat_id:4;
477	unsigned rat_inst:6;
478	unsigned rat_index_mode:2;
479
480	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_cf(op); }
481
482	bool is_alu_extended() {
483		assert(op_ptr->flags & CF_ALU);
484		return kc[2].mode != KC_LOCK_NONE || kc[3].mode != KC_LOCK_NONE ||
485			kc[0].index_mode != KC_INDEX_NONE || kc[1].index_mode != KC_INDEX_NONE ||
486			kc[2].index_mode != KC_INDEX_NONE || kc[3].index_mode != KC_INDEX_NONE;
487	}
488
489};
490
491struct bc_alu_src {
492	unsigned sel:9;
493	unsigned chan:2;
494	unsigned neg:1;
495	unsigned abs:1;
496	unsigned rel:1;
497	literal value;
498
499	void clear() {
500		sel = 0;
501		chan = 0;
502		neg = 0;
503		abs = 0;
504		rel = 0;
505		value = 0;
506	}
507};
508
509struct bc_alu {
510	const alu_op_info * op_ptr;
511	unsigned op;
512
513	bc_alu_src src[3];
514
515	unsigned dst_gpr:7;
516	unsigned dst_chan:2;
517	unsigned dst_rel:1;
518	unsigned clamp:1;
519	unsigned omod:2;
520	unsigned bank_swizzle:3;
521
522	unsigned index_mode:3;
523	unsigned last:1;
524	unsigned pred_sel:2;
525
526	unsigned fog_merge:1;
527	unsigned write_mask:1;
528	unsigned update_exec_mask:1;
529	unsigned update_pred:1;
530
531	unsigned slot:3;
532
533	unsigned lds_idx_offset:6;
534
535	alu_op_flags slot_flags;
536
537	void set_op(unsigned op) {
538		this->op = op;
539		op_ptr = r600_isa_alu(op);
540	}
541	void clear() {
542		op_ptr = nullptr;
543		op = 0;
544		for (int i = 0; i < 3; ++i)
545			src[i].clear();
546		dst_gpr = 0;
547		dst_chan = 0;
548		dst_rel = 0;
549		clamp = 0;
550		omod = 0;
551		bank_swizzle = 0;
552		index_mode = 0;
553		last = 0;
554		pred_sel = 0;
555		fog_merge = 0;
556		write_mask = 0;
557		update_exec_mask = 0;
558		update_pred = 0;
559		slot = 0;
560		lds_idx_offset = 0;
561		slot_flags = AF_NONE;
562	}
563	bc_alu() {
564		clear();
565	}
566};
567
568struct bc_fetch {
569	const fetch_op_info * op_ptr;
570	unsigned op;
571
572	unsigned bc_frac_mode:1;
573	unsigned fetch_whole_quad:1;
574	unsigned resource_id:8;
575
576	unsigned src_gpr:7;
577	unsigned src_rel:1;
578	unsigned src_rel_global:1; /* for GDS ops */
579	unsigned src_sel[4];
580
581	unsigned dst_gpr:7;
582	unsigned dst_rel:1;
583	unsigned dst_rel_global:1; /* for GDS ops */
584	unsigned dst_sel[4];
585
586	unsigned alt_const:1;
587
588	unsigned inst_mod:2;
589	unsigned resource_index_mode:2;
590	unsigned sampler_index_mode:2;
591
592	unsigned coord_type[4];
593	unsigned lod_bias:7;
594
595	unsigned offset[3];
596
597	unsigned sampler_id:5;
598
599
600	unsigned fetch_type:2;
601	unsigned mega_fetch_count:6;
602	unsigned coalesced_read:1;
603	unsigned structured_read:2;
604	unsigned lds_req:1;
605
606	unsigned data_format:6;
607	unsigned format_comp_all:1;
608	unsigned num_format_all:2;
609	unsigned semantic_id:8;
610	unsigned srf_mode_all:1;
611	unsigned use_const_fields:1;
612
613	unsigned const_buf_no_stride:1;
614	unsigned endian_swap:2;
615	unsigned mega_fetch:1;
616
617	unsigned src2_gpr:7; /* for GDS */
618	unsigned alloc_consume:1;
619	unsigned uav_id:4;
620	unsigned uav_index_mode:2;
621	unsigned bcast_first_req:1;
622
623	/* for MEM ops */
624	unsigned elem_size:2;
625	unsigned uncached:1;
626	unsigned indexed:1;
627	unsigned burst_count:4;
628	unsigned array_base:13;
629	unsigned array_size:12;
630
631	void set_op(unsigned op) { this->op = op; op_ptr = r600_isa_fetch(op); }
632};
633
634struct shader_stats {
635	unsigned	ndw;
636	unsigned	ngpr;
637	unsigned	nstack;
638
639	unsigned	cf; // clause instructions not included
640	unsigned	alu;
641	unsigned	alu_clauses;
642	unsigned	fetch_clauses;
643	unsigned	fetch;
644	unsigned	alu_groups;
645
646	unsigned	shaders;		// number of shaders (for accumulated stats)
647
648	shader_stats() : ndw(), ngpr(), nstack(), cf(), alu(), alu_clauses(),
649			fetch_clauses(), fetch(), alu_groups(), shaders() {}
650
651	void collect(node *n);
652	void accumulate(shader_stats &s);
653	void dump();
654	void dump_diff(shader_stats &s);
655};
656
657class sb_context {
658
659public:
660
661	shader_stats src_stats, opt_stats;
662
663	r600_isa *isa;
664
665	sb_hw_chip hw_chip;
666	sb_hw_class hw_class;
667
668	unsigned alu_temp_gprs;
669	unsigned max_fetch;
670	bool has_trans;
671	unsigned vtx_src_num;
672	unsigned num_slots;
673	bool uses_mova_gpr;
674
675	bool r6xx_gpr_index_workaround;
676
677	bool stack_workaround_8xx;
678	bool stack_workaround_9xx;
679
680	unsigned wavefront_size;
681	unsigned stack_entry_size;
682
683	static unsigned dump_pass;
684	static unsigned dump_stat;
685
686	static unsigned dry_run;
687	static unsigned no_fallback;
688	static unsigned safe_math;
689
690	static unsigned dskip_start;
691	static unsigned dskip_end;
692	static unsigned dskip_mode;
693
694	sb_context() : src_stats(), opt_stats(), isa(0),
695			hw_chip(HW_CHIP_UNKNOWN), hw_class(HW_CLASS_UNKNOWN),
696			alu_temp_gprs(0), max_fetch(0), has_trans(false), vtx_src_num(0),
697			num_slots(0), uses_mova_gpr(false),
698			r6xx_gpr_index_workaround(false), stack_workaround_8xx(false),
699			stack_workaround_9xx(false), wavefront_size(0),
700			stack_entry_size(0) {}
701
702	int init(r600_isa *isa, sb_hw_chip chip, sb_hw_class cclass);
703
704	bool is_r600() {return hw_class == HW_CLASS_R600;}
705	bool is_r700() {return hw_class == HW_CLASS_R700;}
706	bool is_evergreen() {return hw_class == HW_CLASS_EVERGREEN;}
707	bool is_cayman() {return hw_class == HW_CLASS_CAYMAN;}
708	bool is_egcm() {return hw_class >= HW_CLASS_EVERGREEN;}
709
710	bool needs_8xx_stack_workaround() {
711		if (!is_evergreen())
712			return false;
713
714		switch (hw_chip) {
715		case HW_CHIP_HEMLOCK:
716		case HW_CHIP_CYPRESS:
717		case HW_CHIP_JUNIPER:
718			return false;
719		default:
720			return true;
721		}
722	}
723
724	bool needs_9xx_stack_workaround() {
725		return is_cayman();
726	}
727
728	sb_hw_class_bits hw_class_bit() {
729		switch (hw_class) {
730		case HW_CLASS_R600:return HB_R6;
731		case HW_CLASS_R700:return HB_R7;
732		case HW_CLASS_EVERGREEN:return HB_EG;
733		case HW_CLASS_CAYMAN:return HB_CM;
734		default: assert(!"unknown hw class"); return (sb_hw_class_bits)0;
735
736		}
737	}
738
739	unsigned cf_opcode(unsigned op) {
740		return r600_isa_cf_opcode(isa->hw_class, op);
741	}
742
743	unsigned alu_opcode(unsigned op) {
744		return r600_isa_alu_opcode(isa->hw_class, op);
745	}
746
747	unsigned alu_slots(unsigned op) {
748		return r600_isa_alu_slots(isa->hw_class, op);
749	}
750
751	unsigned alu_slots(const alu_op_info * op_ptr) {
752		return op_ptr->slots[isa->hw_class];
753	}
754
755	unsigned alu_slots_mask(const alu_op_info * op_ptr) {
756		unsigned mask = 0;
757		unsigned slot_flags = alu_slots(op_ptr);
758		if (slot_flags & AF_V)
759			mask = 0x0F;
760		if (!is_cayman() && (slot_flags & AF_S))
761			mask |= 0x10;
762		/* Force LDS_IDX ops into SLOT_X */
763		if (op_ptr->opcode[0] == -1 && ((op_ptr->opcode[1] & 0xFF) == 0x11))
764			mask = 0x01;
765		return mask;
766	}
767
768	unsigned fetch_opcode(unsigned op) {
769		return r600_isa_fetch_opcode(isa->hw_class, op);
770	}
771
772	bool is_kcache_sel(unsigned sel) {
773		return ((sel >= 128 && sel < 192) || (sel >= 256 && sel < 320));
774	}
775
776	bool is_lds_oq(unsigned sel) {
777		return (sel >= 0xdb && sel <= 0xde);
778	}
779
780	const char * get_hw_class_name();
781	const char * get_hw_chip_name();
782
783};
784
785#define SB_DUMP_STAT(a) do { if (sb_context::dump_stat) { a } } while (0)
786#define SB_DUMP_PASS(a) do { if (sb_context::dump_pass) { a } } while (0)
787
788class bc_decoder {
789
790	sb_context &ctx;
791
792	uint32_t* dw;
793	unsigned ndw;
794
795public:
796
797	bc_decoder(sb_context &sctx, uint32_t *data, unsigned size)
798		: ctx(sctx), dw(data), ndw(size) {}
799
800	int decode_cf(unsigned &i, bc_cf &bc);
801	int decode_alu(unsigned &i, bc_alu &bc);
802	int decode_fetch(unsigned &i, bc_fetch &bc);
803
804private:
805	int decode_cf_alu(unsigned &i, bc_cf &bc);
806	int decode_cf_exp(unsigned &i, bc_cf &bc);
807	int decode_cf_mem(unsigned &i, bc_cf &bc);
808
809	int decode_fetch_vtx(unsigned &i, bc_fetch &bc);
810	int decode_fetch_gds(unsigned &i, bc_fetch &bc);
811	int decode_fetch_mem(unsigned &i, bc_fetch &bc);
812};
813
814// bytecode format definition
815
816class hw_encoding_format {
817	const sb_hw_class_bits hw_target; //FIXME: debug - remove after testing
818	hw_encoding_format();
819protected:
820	uint32_t value;
821public:
822	hw_encoding_format(sb_hw_class_bits hw)
823		: hw_target(hw), value(0) {}
824	hw_encoding_format(uint32_t v, sb_hw_class_bits hw)
825		: hw_target(hw), value(v) {}
826	uint32_t get_value(sb_hw_class_bits hw) const {
827		assert((hw & hw_target) == hw);
828		return value;
829	}
830};
831
832#define BC_FORMAT_BEGIN_HW(fmt, hwset) \
833class fmt##_##hwset : public hw_encoding_format {\
834	typedef fmt##_##hwset thistype; \
835public: \
836	fmt##_##hwset() : hw_encoding_format(HB_##hwset) {}; \
837	fmt##_##hwset(uint32_t v) : hw_encoding_format(v, HB_##hwset) {};
838
839#define BC_FORMAT_BEGIN(fmt) BC_FORMAT_BEGIN_HW(fmt, ALL)
840
841#define BC_FORMAT_END(fmt) };
842
843// bytecode format field definition
844
845#define BC_FIELD(fmt, name, shortname, last_bit, first_bit) \
846	thistype & name(unsigned v) { \
847		value |= ((v&((1ull<<((last_bit)-(first_bit)+1))-1))<<(first_bit)); \
848		return *this; \
849	} \
850	unsigned get_##name() const { \
851		return (value>>(first_bit))&((1ull<<((last_bit)-(first_bit)+1))-1); \
852	}
853
854#define BC_RSRVD(fmt, last_bit, first_bit)
855
856// CLAMP macro defined elsewhere interferes with bytecode field name
857#undef CLAMP
858#include "sb_bc_fmt_def.inc"
859
860#undef BC_FORMAT_BEGIN
861#undef BC_FORMAT_END
862#undef BC_FIELD
863#undef BC_RSRVD
864
865class bc_parser {
866	sb_context & ctx;
867
868	bc_decoder *dec;
869
870	r600_bytecode *bc;
871	r600_shader *pshader;
872
873	uint32_t *dw;
874	unsigned bc_ndw;
875
876	unsigned max_cf;
877
878	shader *sh;
879
880	int error;
881
882	alu_node *slots[2][5];
883	unsigned cgroup;
884
885	typedef std::vector<cf_node*> id_cf_map;
886	id_cf_map cf_map;
887
888	typedef std::stack<region_node*> region_stack;
889	region_stack loop_stack;
890
891	bool gpr_reladdr;
892
893	// Note: currently relies on input emitting SET_CF in same basic block as uses
894	value *cf_index_value[2];
895	alu_node *mova;
896public:
897
898	bc_parser(sb_context &sctx, r600_bytecode *bc, r600_shader* pshader) :
899		ctx(sctx), dec(), bc(bc), pshader(pshader),
900		dw(), bc_ndw(), max_cf(),
901		sh(), error(), slots(), cgroup(),
902		cf_map(), loop_stack(), gpr_reladdr(), cf_index_value(), mova() { }
903
904	int decode();
905	int prepare();
906
907	shader* get_shader() { assert(!error); return sh; }
908
909private:
910
911	int decode_shader();
912
913	int parse_decls();
914
915	int decode_cf(unsigned &i, bool &eop);
916
917	int decode_alu_clause(cf_node *cf);
918	int decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt);
919
920	int decode_fetch_clause(cf_node *cf);
921
922	int prepare_ir();
923	int prepare_alu_clause(cf_node *cf);
924	int prepare_alu_group(cf_node* cf, alu_group_node *g);
925	int prepare_fetch_clause(cf_node *cf);
926
927	int prepare_loop(cf_node *c);
928	int prepare_if(cf_node *c);
929
930	void save_set_cf_index(value *val, unsigned idx);
931	value *get_cf_index_value(unsigned idx);
932	void save_mova(alu_node *mova);
933	alu_node *get_mova();
934};
935
936
937
938
939class bytecode {
940	typedef std::vector<uint32_t> bc_vector;
941	sb_hw_class_bits hw_class_bit;
942
943	bc_vector bc;
944
945	unsigned pos;
946
947public:
948
949	bytecode(sb_hw_class_bits hw, unsigned rdw = 256)
950		: hw_class_bit(hw), pos(0) { bc.reserve(rdw); }
951
952	unsigned ndw() { return bc.size(); }
953
954	void write_data(uint32_t* dst) {
955		std::copy(bc.begin(), bc.end(), dst);
956	}
957
958	void align(unsigned a) {
959		unsigned size = bc.size();
960		size = (size + a - 1) & ~(a-1);
961		bc.resize(size);
962	}
963
964	void set_size(unsigned sz) {
965		assert(sz >= bc.size());
966		bc.resize(sz);
967	}
968
969	void seek(unsigned p) {
970		if (p != pos) {
971			if (p > bc.size()) {
972				bc.resize(p);
973			}
974			pos = p;
975		}
976	}
977
978	unsigned get_pos() { return pos; }
979	uint32_t *data() { return &bc[0]; }
980
981	bytecode & operator <<(uint32_t v) {
982		if (pos == ndw()) {
983			bc.push_back(v);
984		} else
985			bc.at(pos) = v;
986		++pos;
987		return *this;
988	}
989
990	bytecode & operator <<(const hw_encoding_format &e) {
991		*this << e.get_value(hw_class_bit);
992		return *this;
993	}
994
995	bytecode & operator <<(const bytecode &b) {
996		bc.insert(bc.end(), b.bc.begin(), b.bc.end());
997		return *this;
998	}
999
1000	uint32_t at(unsigned dw_id) { return bc.at(dw_id); }
1001};
1002
1003
1004class bc_builder {
1005	shader &sh;
1006	sb_context &ctx;
1007	bytecode bb;
1008	int error;
1009
1010public:
1011
1012	bc_builder(shader &s);
1013	int build();
1014	bytecode& get_bytecode() { assert(!error); return bb; }
1015
1016private:
1017
1018	int build_cf(cf_node *n);
1019
1020	int build_cf_alu(cf_node *n);
1021	int build_cf_mem(cf_node *n);
1022	int build_cf_exp(cf_node *n);
1023
1024	int build_alu_clause(cf_node *n);
1025	int build_alu_group(alu_group_node *n);
1026	int build_alu(alu_node *n);
1027
1028	int build_fetch_clause(cf_node *n);
1029	int build_fetch_tex(fetch_node *n);
1030	int build_fetch_vtx(fetch_node *n);
1031	int build_fetch_gds(fetch_node *n);
1032	int build_fetch_mem(fetch_node* n);
1033};
1034
1035} // namespace r600_sb
1036
1037#endif /* SB_BC_H_ */
1038