1/*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23#ifndef R600_ASM_H
24#define R600_ASM_H
25
26#include "r600_pipe.h"
27#include "r600_isa.h"
28#include "tgsi/tgsi_exec.h"
29
30#ifdef __cplusplus
31extern "C" {
32#endif
33
34struct r600_bytecode_alu_src {
35	unsigned			sel;
36	unsigned			chan;
37	unsigned			neg;
38	unsigned			abs;
39	unsigned			rel;
40	unsigned			kc_bank;
41	unsigned			kc_rel;
42	uint32_t			value;
43};
44
45struct r600_bytecode_alu_dst {
46	unsigned			sel;
47	unsigned			chan;
48	unsigned			clamp;
49	unsigned			write;
50	unsigned			rel;
51};
52
53struct r600_bytecode_alu {
54	struct list_head		list;
55	struct r600_bytecode_alu_src		src[3];
56	struct r600_bytecode_alu_dst		dst;
57	unsigned			op;
58	unsigned			last;
59	unsigned			is_op3;
60	unsigned			is_lds_idx_op;
61	unsigned			execute_mask;
62	unsigned			update_pred;
63	unsigned			pred_sel;
64	unsigned			bank_swizzle;
65	unsigned			bank_swizzle_force;
66	unsigned			omod;
67	unsigned                        index_mode;
68	unsigned                        lds_idx;
69};
70
71struct r600_bytecode_tex {
72	struct list_head		list;
73	unsigned			op;
74	unsigned			inst_mod;
75	unsigned			resource_id;
76	unsigned			src_gpr;
77	unsigned			src_rel;
78	unsigned			dst_gpr;
79	unsigned			dst_rel;
80	unsigned			dst_sel_x;
81	unsigned			dst_sel_y;
82	unsigned			dst_sel_z;
83	unsigned			dst_sel_w;
84	unsigned			lod_bias;
85	unsigned			coord_type_x;
86	unsigned			coord_type_y;
87	unsigned			coord_type_z;
88	unsigned			coord_type_w;
89	int				offset_x;
90	int				offset_y;
91	int				offset_z;
92	unsigned			sampler_id;
93	unsigned			src_sel_x;
94	unsigned			src_sel_y;
95	unsigned			src_sel_z;
96	unsigned			src_sel_w;
97	/* indexed samplers/resources only on evergreen/cayman */
98	unsigned			sampler_index_mode;
99	unsigned			resource_index_mode;
100};
101
102struct r600_bytecode_vtx {
103	struct list_head		list;
104	unsigned			op;
105	unsigned			fetch_type;
106	unsigned			buffer_id;
107	unsigned			src_gpr;
108	unsigned			src_sel_x;
109	unsigned			mega_fetch_count;
110	unsigned			dst_gpr;
111	unsigned			dst_sel_x;
112	unsigned			dst_sel_y;
113	unsigned			dst_sel_z;
114	unsigned			dst_sel_w;
115	unsigned			use_const_fields;
116	unsigned			data_format;
117	unsigned			num_format_all;
118	unsigned			format_comp_all;
119	unsigned			srf_mode_all;
120	unsigned			offset;
121	unsigned			endian;
122	unsigned			buffer_index_mode;
123
124	// READ_SCRATCH fields
125	unsigned			uncached;
126	unsigned			indexed;
127	unsigned			src_sel_y;
128	unsigned			src_rel;
129	unsigned			elem_size;
130	unsigned			array_size;
131	unsigned			array_base;
132	unsigned			burst_count;
133	unsigned			dst_rel;
134};
135
136struct r600_bytecode_gds {
137	struct list_head		list;
138	unsigned			op;
139	unsigned			src_gpr;
140	unsigned			src_rel;
141	unsigned			src_sel_x;
142	unsigned			src_sel_y;
143	unsigned			src_sel_z;
144	unsigned			src_gpr2;
145	unsigned			dst_gpr;
146	unsigned			dst_rel;
147	unsigned			dst_sel_x;
148	unsigned			dst_sel_y;
149	unsigned			dst_sel_z;
150	unsigned			dst_sel_w;
151	unsigned			uav_index_mode;
152	unsigned                        uav_id;
153	unsigned                        alloc_consume;
154	unsigned                        bcast_first_req;
155};
156
157struct r600_bytecode_output {
158	unsigned			array_base;
159	unsigned			array_size;
160	unsigned			comp_mask;
161	unsigned			type;
162
163	unsigned			op;
164
165	unsigned			elem_size;
166	unsigned			gpr;
167	unsigned			swizzle_x;
168	unsigned			swizzle_y;
169	unsigned			swizzle_z;
170	unsigned			swizzle_w;
171	unsigned			burst_count;
172	unsigned			index_gpr;
173	unsigned			mark; /* used by MEM_SCRATCH */
174};
175
176struct r600_bytecode_rat {
177	unsigned			id;
178	unsigned			inst;
179	unsigned			index_mode;
180};
181
182struct r600_bytecode_kcache {
183	unsigned			bank;
184	unsigned			mode;
185	unsigned			addr;
186	unsigned			index_mode;
187};
188
189struct r600_bytecode_cf {
190	struct list_head		list;
191
192	unsigned			op;
193	unsigned			addr;
194	unsigned			ndw;
195	unsigned			id;
196	unsigned			cond;
197	unsigned			pop_count;
198	unsigned			count;
199	unsigned			cf_addr; /* control flow addr */
200	struct r600_bytecode_kcache		kcache[4];
201	unsigned			r6xx_uses_waterfall;
202	unsigned			eg_alu_extended;
203	unsigned			barrier;
204	unsigned			end_of_program;
205	unsigned                        mark;
206	unsigned                        vpm;
207	struct list_head		alu;
208	struct list_head		tex;
209	struct list_head		vtx;
210	struct list_head		gds;
211	struct r600_bytecode_output		output;
212	struct r600_bytecode_rat		rat;
213	struct r600_bytecode_alu		*curr_bs_head;
214	struct r600_bytecode_alu		*prev_bs_head;
215	struct r600_bytecode_alu		*prev2_bs_head;
216	unsigned isa[2];
217	unsigned nlds_read;
218	unsigned nqueue_read;
219};
220
221#define FC_NONE				0
222#define FC_IF				1
223#define FC_LOOP				2
224#define FC_REP				3
225#define FC_PUSH_VPM			4
226#define FC_PUSH_WQM			5
227
228struct r600_cf_stack_entry {
229	int				type;
230	struct r600_bytecode_cf		*start;
231	struct r600_bytecode_cf		**mid; /* used to store the else point */
232	int				num_mid;
233};
234
235#define SQ_MAX_CALL_DEPTH 0x00000020
236
237#define AR_HANDLE_NORMAL 0
238#define AR_HANDLE_RV6XX 1 /* except RV670 */
239
240struct r600_stack_info {
241	/* current level of non-WQM PUSH operations
242	 * (PUSH, PUSH_ELSE, ALU_PUSH_BEFORE) */
243	int push;
244	/* current level of WQM PUSH operations
245	 * (PUSH, PUSH_ELSE, PUSH_WQM) */
246	int push_wqm;
247	/* current loop level */
248	int loop;
249
250	/* required depth */
251	int max_entries;
252	/* subentries per entry */
253	int entry_size;
254};
255
256struct r600_bytecode {
257	enum amd_gfx_level			gfx_level;
258	enum radeon_family		family;
259	bool				has_compressed_msaa_texturing;
260	int				type;
261	struct list_head		cf;
262	struct r600_bytecode_cf		*cf_last;
263	unsigned			ndw;
264	unsigned			ncf;
265	unsigned			nalu_groups;
266	unsigned			ngpr;
267	unsigned			nstack;
268	unsigned			nlds_dw;
269	unsigned			nresource;
270	unsigned			force_add_cf;
271	uint32_t			*bytecode;
272	uint32_t			fc_sp;
273	struct r600_cf_stack_entry	fc_stack[256];
274	struct r600_stack_info		stack;
275	unsigned	ar_loaded;
276	unsigned	ar_reg;
277	unsigned	ar_chan;
278	unsigned        ar_handling;
279	unsigned        r6xx_nop_after_rel_dst;
280	bool            index_loaded[2];
281	unsigned        index_reg[2]; /* indexing register CF_INDEX_[01] */
282	unsigned        index_reg_chan[2]; /* indexing register chanel CF_INDEX_[01] */
283	unsigned        debug_id;
284	struct r600_isa* isa;
285	struct r600_bytecode_output pending_outputs[5];
286	int n_pending_outputs;
287	boolean			need_wait_ack; /* emit a pending WAIT_ACK prior to control flow */
288	boolean			precise;
289};
290
291/* eg_asm.c */
292int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf);
293int egcm_load_index_reg(struct r600_bytecode *bc, unsigned id, bool inside_alu_clause);
294int eg_bytecode_gds_build(struct r600_bytecode *bc, struct r600_bytecode_gds *gds, unsigned id);
295int eg_bytecode_alu_build(struct r600_bytecode *bc,
296			  struct r600_bytecode_alu *alu, unsigned id);
297/* r600_asm.c */
298void r600_bytecode_init(struct r600_bytecode *bc,
299			enum amd_gfx_level gfx_level,
300			enum radeon_family family,
301			bool has_compressed_msaa_texturing);
302void r600_bytecode_clear(struct r600_bytecode *bc);
303int r600_bytecode_add_alu(struct r600_bytecode *bc,
304		const struct r600_bytecode_alu *alu);
305int r600_bytecode_add_vtx(struct r600_bytecode *bc,
306		const struct r600_bytecode_vtx *vtx);
307int r600_bytecode_add_vtx_tc(struct r600_bytecode *bc,
308			     const struct r600_bytecode_vtx *vtx);
309int r600_bytecode_add_tex(struct r600_bytecode *bc,
310		const struct r600_bytecode_tex *tex);
311int r600_bytecode_add_gds(struct r600_bytecode *bc,
312		const struct r600_bytecode_gds *gds);
313int r600_bytecode_add_output(struct r600_bytecode *bc,
314		const struct r600_bytecode_output *output);
315int r600_bytecode_add_pending_output(struct r600_bytecode *bc,
316		const struct r600_bytecode_output *output);
317
318void r600_bytecode_add_ack(struct r600_bytecode *bc);
319int r600_bytecode_wait_acks(struct r600_bytecode *bc);
320uint32_t r600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect);
321
322int r600_bytecode_build(struct r600_bytecode *bc);
323int r600_bytecode_add_cf(struct r600_bytecode *bc);
324int r600_bytecode_add_cfinst(struct r600_bytecode *bc,
325		unsigned op);
326int r600_bytecode_add_alu_type(struct r600_bytecode *bc,
327		const struct r600_bytecode_alu *alu, unsigned type);
328void r600_bytecode_special_constants(uint32_t value, unsigned *sel);
329void r600_bytecode_disasm(struct r600_bytecode *bc);
330void r600_bytecode_alu_read(struct r600_bytecode *bc,
331		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
332int r600_load_ar(struct r600_bytecode *bc, bool for_src);
333
334int cm_bytecode_add_cf_end(struct r600_bytecode *bc);
335
336void *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
337				      unsigned count,
338				      const struct pipe_vertex_element *elements);
339
340/* r700_asm.c */
341void r700_bytecode_cf_vtx_build(uint32_t *bytecode,
342		const struct r600_bytecode_cf *cf);
343int r700_bytecode_alu_build(struct r600_bytecode *bc,
344		struct r600_bytecode_alu *alu, unsigned id);
345void r700_bytecode_alu_read(struct r600_bytecode *bc,
346		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1);
347int r700_bytecode_fetch_mem_build(struct r600_bytecode *bc,
348		struct r600_bytecode_vtx *mem, unsigned id);
349
350void r600_bytecode_export_read(struct r600_bytecode *bc,
351		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
352void eg_bytecode_export_read(struct r600_bytecode *bc,
353		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1);
354
355void r600_vertex_data_type(enum pipe_format pformat, unsigned *format,
356			   unsigned *num_format, unsigned *format_comp, unsigned *endian);
357
358int r600_load_ar(struct r600_bytecode *bc, bool for_src);
359
360static inline int fp64_switch(int i)
361{
362	switch (i) {
363	case 0:
364		return 1;
365	case 1:
366		return 0;
367	case 2:
368		return 3;
369	case 3:
370		return 2;
371	}
372	return 0;
373}
374
375#ifdef __cplusplus
376}
377#endif
378
379#endif
380