1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
8bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
9bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci#include "r600_sq.h"
24bf215546Sopenharmony_ci#include "r600_opcodes.h"
25bf215546Sopenharmony_ci#include "r600_formats.h"
26bf215546Sopenharmony_ci#include "r600_shader.h"
27bf215546Sopenharmony_ci#include "r600d.h"
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ci#include <errno.h>
30bf215546Sopenharmony_ci#include "util/u_bitcast.h"
31bf215546Sopenharmony_ci#include "util/u_dump.h"
32bf215546Sopenharmony_ci#include "util/u_memory.h"
33bf215546Sopenharmony_ci#include "util/u_math.h"
34bf215546Sopenharmony_ci#include "pipe/p_shader_tokens.h"
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci#include "sb/sb_public.h"
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#define NUM_OF_CYCLES 3
39bf215546Sopenharmony_ci#define NUM_OF_COMPONENTS 4
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_cistatic inline bool alu_writes(struct r600_bytecode_alu *alu)
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci	return alu->dst.write || alu->is_op3;
44bf215546Sopenharmony_ci}
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_cistatic inline unsigned int r600_bytecode_get_num_operands(const struct r600_bytecode_alu *alu)
47bf215546Sopenharmony_ci{
48bf215546Sopenharmony_ci	return r600_isa_alu(alu->op)->src_count;
49bf215546Sopenharmony_ci}
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_cistatic struct r600_bytecode_cf *r600_bytecode_cf(void)
52bf215546Sopenharmony_ci{
53bf215546Sopenharmony_ci	struct r600_bytecode_cf *cf = CALLOC_STRUCT(r600_bytecode_cf);
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci	if (!cf)
56bf215546Sopenharmony_ci		return NULL;
57bf215546Sopenharmony_ci	list_inithead(&cf->list);
58bf215546Sopenharmony_ci	list_inithead(&cf->alu);
59bf215546Sopenharmony_ci	list_inithead(&cf->vtx);
60bf215546Sopenharmony_ci	list_inithead(&cf->tex);
61bf215546Sopenharmony_ci	list_inithead(&cf->gds);
62bf215546Sopenharmony_ci	return cf;
63bf215546Sopenharmony_ci}
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_cistatic struct r600_bytecode_alu *r600_bytecode_alu(void)
66bf215546Sopenharmony_ci{
67bf215546Sopenharmony_ci	struct r600_bytecode_alu *alu = CALLOC_STRUCT(r600_bytecode_alu);
68bf215546Sopenharmony_ci
69bf215546Sopenharmony_ci	if (!alu)
70bf215546Sopenharmony_ci		return NULL;
71bf215546Sopenharmony_ci	list_inithead(&alu->list);
72bf215546Sopenharmony_ci	return alu;
73bf215546Sopenharmony_ci}
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_cistatic struct r600_bytecode_vtx *r600_bytecode_vtx(void)
76bf215546Sopenharmony_ci{
77bf215546Sopenharmony_ci	struct r600_bytecode_vtx *vtx = CALLOC_STRUCT(r600_bytecode_vtx);
78bf215546Sopenharmony_ci
79bf215546Sopenharmony_ci	if (!vtx)
80bf215546Sopenharmony_ci		return NULL;
81bf215546Sopenharmony_ci	list_inithead(&vtx->list);
82bf215546Sopenharmony_ci	return vtx;
83bf215546Sopenharmony_ci}
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_cistatic struct r600_bytecode_tex *r600_bytecode_tex(void)
86bf215546Sopenharmony_ci{
87bf215546Sopenharmony_ci	struct r600_bytecode_tex *tex = CALLOC_STRUCT(r600_bytecode_tex);
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_ci	if (!tex)
90bf215546Sopenharmony_ci		return NULL;
91bf215546Sopenharmony_ci	list_inithead(&tex->list);
92bf215546Sopenharmony_ci	return tex;
93bf215546Sopenharmony_ci}
94bf215546Sopenharmony_ci
95bf215546Sopenharmony_cistatic struct r600_bytecode_gds *r600_bytecode_gds(void)
96bf215546Sopenharmony_ci{
97bf215546Sopenharmony_ci	struct r600_bytecode_gds *gds = CALLOC_STRUCT(r600_bytecode_gds);
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci	if (gds == NULL)
100bf215546Sopenharmony_ci		return NULL;
101bf215546Sopenharmony_ci	list_inithead(&gds->list);
102bf215546Sopenharmony_ci	return gds;
103bf215546Sopenharmony_ci}
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_cistatic unsigned stack_entry_size(enum radeon_family chip) {
106bf215546Sopenharmony_ci	/* Wavefront size:
107bf215546Sopenharmony_ci	 *   64: R600/RV670/RV770/Cypress/R740/Barts/Turks/Caicos/
108bf215546Sopenharmony_ci	 *       Aruba/Sumo/Sumo2/redwood/juniper
109bf215546Sopenharmony_ci	 *   32: R630/R730/R710/Palm/Cedar
110bf215546Sopenharmony_ci	 *   16: R610/Rs780
111bf215546Sopenharmony_ci	 *
112bf215546Sopenharmony_ci	 * Stack row size:
113bf215546Sopenharmony_ci	 * 	Wavefront Size                        16  32  48  64
114bf215546Sopenharmony_ci	 * 	Columns per Row (R6xx/R7xx/R8xx only)  8   8   4   4
115bf215546Sopenharmony_ci	 * 	Columns per Row (R9xx+)                8   4   4   4 */
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci	switch (chip) {
118bf215546Sopenharmony_ci	/* FIXME: are some chips missing here? */
119bf215546Sopenharmony_ci	/* wavefront size 16 */
120bf215546Sopenharmony_ci	case CHIP_RV610:
121bf215546Sopenharmony_ci	case CHIP_RS780:
122bf215546Sopenharmony_ci	case CHIP_RV620:
123bf215546Sopenharmony_ci	case CHIP_RS880:
124bf215546Sopenharmony_ci	/* wavefront size 32 */
125bf215546Sopenharmony_ci	case CHIP_RV630:
126bf215546Sopenharmony_ci	case CHIP_RV635:
127bf215546Sopenharmony_ci	case CHIP_RV730:
128bf215546Sopenharmony_ci	case CHIP_RV710:
129bf215546Sopenharmony_ci	case CHIP_PALM:
130bf215546Sopenharmony_ci	case CHIP_CEDAR:
131bf215546Sopenharmony_ci		return 8;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci	/* wavefront size 64 */
134bf215546Sopenharmony_ci	default:
135bf215546Sopenharmony_ci		return 4;
136bf215546Sopenharmony_ci	}
137bf215546Sopenharmony_ci}
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_civoid r600_bytecode_init(struct r600_bytecode *bc,
140bf215546Sopenharmony_ci			enum amd_gfx_level gfx_level,
141bf215546Sopenharmony_ci			enum radeon_family family,
142bf215546Sopenharmony_ci			bool has_compressed_msaa_texturing)
143bf215546Sopenharmony_ci{
144bf215546Sopenharmony_ci	static unsigned next_shader_id = 0;
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci	bc->debug_id = ++next_shader_id;
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci	if ((gfx_level == R600) &&
149bf215546Sopenharmony_ci	    (family != CHIP_RV670 && family != CHIP_RS780 && family != CHIP_RS880)) {
150bf215546Sopenharmony_ci		bc->ar_handling = AR_HANDLE_RV6XX;
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci		/* Insert a nop after a relative temp write so that a read in
153bf215546Sopenharmony_ci		 * the following instruction group gets the right value.  The
154bf215546Sopenharmony_ci		 * r600 and EG ISA specs both say that read-after-rel-write of a
155bf215546Sopenharmony_ci		 * register in the next instr group is illegal, but apparently
156bf215546Sopenharmony_ci		 * that's not true on all chips (see commit
157bf215546Sopenharmony_ci		 * c96b9834032952492efbd2d1f5511fe225704918).
158bf215546Sopenharmony_ci		 */
159bf215546Sopenharmony_ci		bc->r6xx_nop_after_rel_dst = 1;
160bf215546Sopenharmony_ci	} else if (family == CHIP_RV770) {
161bf215546Sopenharmony_ci		bc->ar_handling = AR_HANDLE_NORMAL;
162bf215546Sopenharmony_ci		bc->r6xx_nop_after_rel_dst = 1;
163bf215546Sopenharmony_ci	} else {
164bf215546Sopenharmony_ci		bc->ar_handling = AR_HANDLE_NORMAL;
165bf215546Sopenharmony_ci		bc->r6xx_nop_after_rel_dst = 0;
166bf215546Sopenharmony_ci	}
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci	list_inithead(&bc->cf);
169bf215546Sopenharmony_ci	bc->gfx_level = gfx_level;
170bf215546Sopenharmony_ci	bc->family = family;
171bf215546Sopenharmony_ci	bc->has_compressed_msaa_texturing = has_compressed_msaa_texturing;
172bf215546Sopenharmony_ci	bc->stack.entry_size = stack_entry_size(family);
173bf215546Sopenharmony_ci}
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ciint r600_bytecode_add_cf(struct r600_bytecode *bc)
176bf215546Sopenharmony_ci{
177bf215546Sopenharmony_ci	struct r600_bytecode_cf *cf = r600_bytecode_cf();
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci	if (!cf)
180bf215546Sopenharmony_ci		return -ENOMEM;
181bf215546Sopenharmony_ci	list_addtail(&cf->list, &bc->cf);
182bf215546Sopenharmony_ci	if (bc->cf_last) {
183bf215546Sopenharmony_ci		cf->id = bc->cf_last->id + 2;
184bf215546Sopenharmony_ci		if (bc->cf_last->eg_alu_extended) {
185bf215546Sopenharmony_ci			/* take into account extended alu size */
186bf215546Sopenharmony_ci			cf->id += 2;
187bf215546Sopenharmony_ci			bc->ndw += 2;
188bf215546Sopenharmony_ci		}
189bf215546Sopenharmony_ci	}
190bf215546Sopenharmony_ci	bc->cf_last = cf;
191bf215546Sopenharmony_ci	bc->ncf++;
192bf215546Sopenharmony_ci	bc->ndw += 2;
193bf215546Sopenharmony_ci	bc->force_add_cf = 0;
194bf215546Sopenharmony_ci	bc->ar_loaded = 0;
195bf215546Sopenharmony_ci	return 0;
196bf215546Sopenharmony_ci}
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ciint r600_bytecode_add_output(struct r600_bytecode *bc,
199bf215546Sopenharmony_ci		const struct r600_bytecode_output *output)
200bf215546Sopenharmony_ci{
201bf215546Sopenharmony_ci	int r;
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci	if (output->gpr >= bc->ngpr)
204bf215546Sopenharmony_ci		bc->ngpr = output->gpr + 1;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci	if (bc->cf_last && (bc->cf_last->op == output->op ||
207bf215546Sopenharmony_ci		(bc->cf_last->op == CF_OP_EXPORT &&
208bf215546Sopenharmony_ci		output->op == CF_OP_EXPORT_DONE)) &&
209bf215546Sopenharmony_ci		output->type == bc->cf_last->output.type &&
210bf215546Sopenharmony_ci		output->elem_size == bc->cf_last->output.elem_size &&
211bf215546Sopenharmony_ci		output->swizzle_x == bc->cf_last->output.swizzle_x &&
212bf215546Sopenharmony_ci		output->swizzle_y == bc->cf_last->output.swizzle_y &&
213bf215546Sopenharmony_ci		output->swizzle_z == bc->cf_last->output.swizzle_z &&
214bf215546Sopenharmony_ci		output->swizzle_w == bc->cf_last->output.swizzle_w &&
215bf215546Sopenharmony_ci		output->comp_mask == bc->cf_last->output.comp_mask &&
216bf215546Sopenharmony_ci		(output->burst_count + bc->cf_last->output.burst_count) <= 16) {
217bf215546Sopenharmony_ci
218bf215546Sopenharmony_ci		if ((output->gpr + output->burst_count) == bc->cf_last->output.gpr &&
219bf215546Sopenharmony_ci			(output->array_base + output->burst_count) == bc->cf_last->output.array_base) {
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci			bc->cf_last->op = bc->cf_last->output.op = output->op;
222bf215546Sopenharmony_ci			bc->cf_last->output.gpr = output->gpr;
223bf215546Sopenharmony_ci			bc->cf_last->output.array_base = output->array_base;
224bf215546Sopenharmony_ci			bc->cf_last->output.burst_count += output->burst_count;
225bf215546Sopenharmony_ci			return 0;
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci		} else if (output->gpr == (bc->cf_last->output.gpr + bc->cf_last->output.burst_count) &&
228bf215546Sopenharmony_ci			output->array_base == (bc->cf_last->output.array_base + bc->cf_last->output.burst_count)) {
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci			bc->cf_last->op = bc->cf_last->output.op = output->op;
231bf215546Sopenharmony_ci			bc->cf_last->output.burst_count += output->burst_count;
232bf215546Sopenharmony_ci			return 0;
233bf215546Sopenharmony_ci		}
234bf215546Sopenharmony_ci	}
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci	r = r600_bytecode_add_cf(bc);
237bf215546Sopenharmony_ci	if (r)
238bf215546Sopenharmony_ci		return r;
239bf215546Sopenharmony_ci	bc->cf_last->op = output->op;
240bf215546Sopenharmony_ci	memcpy(&bc->cf_last->output, output, sizeof(struct r600_bytecode_output));
241bf215546Sopenharmony_ci	bc->cf_last->barrier = 1;
242bf215546Sopenharmony_ci	return 0;
243bf215546Sopenharmony_ci}
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ciint r600_bytecode_add_pending_output(struct r600_bytecode *bc,
246bf215546Sopenharmony_ci		const struct r600_bytecode_output *output)
247bf215546Sopenharmony_ci{
248bf215546Sopenharmony_ci	assert(bc->n_pending_outputs + 1 < ARRAY_SIZE(bc->pending_outputs));
249bf215546Sopenharmony_ci	bc->pending_outputs[bc->n_pending_outputs++] = *output;
250bf215546Sopenharmony_ci
251bf215546Sopenharmony_ci	return 0;
252bf215546Sopenharmony_ci}
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_civoid
255bf215546Sopenharmony_cir600_bytecode_add_ack(struct r600_bytecode *bc)
256bf215546Sopenharmony_ci{
257bf215546Sopenharmony_ci	bc->need_wait_ack = true;
258bf215546Sopenharmony_ci}
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ciint
261bf215546Sopenharmony_cir600_bytecode_wait_acks(struct r600_bytecode *bc)
262bf215546Sopenharmony_ci{
263bf215546Sopenharmony_ci	/* Store acks are an R700+ feature. */
264bf215546Sopenharmony_ci	if (bc->gfx_level < R700)
265bf215546Sopenharmony_ci		return 0;
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci	if (!bc->need_wait_ack)
268bf215546Sopenharmony_ci		return 0;
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci	int ret = r600_bytecode_add_cfinst(bc, CF_OP_WAIT_ACK);
271bf215546Sopenharmony_ci	if (ret != 0)
272bf215546Sopenharmony_ci		return ret;
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci	struct r600_bytecode_cf *cf = bc->cf_last;
275bf215546Sopenharmony_ci	cf->barrier = 1;
276bf215546Sopenharmony_ci	/* Request a wait if the number of outstanding acks is > 0 */
277bf215546Sopenharmony_ci	cf->cf_addr = 0;
278bf215546Sopenharmony_ci
279bf215546Sopenharmony_ci	return 0;
280bf215546Sopenharmony_ci}
281bf215546Sopenharmony_ci
282bf215546Sopenharmony_ciuint32_t
283bf215546Sopenharmony_cir600_bytecode_write_export_ack_type(struct r600_bytecode *bc, bool indirect)
284bf215546Sopenharmony_ci{
285bf215546Sopenharmony_ci	if (bc->gfx_level >= R700) {
286bf215546Sopenharmony_ci		if (indirect)
287bf215546Sopenharmony_ci			return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND_ACK_EG;
288bf215546Sopenharmony_ci		else
289bf215546Sopenharmony_ci			return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_ACK_EG;
290bf215546Sopenharmony_ci	} else {
291bf215546Sopenharmony_ci		if (indirect)
292bf215546Sopenharmony_ci			return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND;
293bf215546Sopenharmony_ci		else
294bf215546Sopenharmony_ci			return V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
295bf215546Sopenharmony_ci	}
296bf215546Sopenharmony_ci}
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ci/* alu instructions that can ony exits once per group */
299bf215546Sopenharmony_cistatic int is_alu_once_inst(struct r600_bytecode_alu *alu)
300bf215546Sopenharmony_ci{
301bf215546Sopenharmony_ci	return r600_isa_alu(alu->op)->flags & (AF_KILL | AF_PRED) || alu->is_lds_idx_op || alu->op == ALU_OP0_GROUP_BARRIER;
302bf215546Sopenharmony_ci}
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_cistatic int is_alu_reduction_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
305bf215546Sopenharmony_ci{
306bf215546Sopenharmony_ci	return (r600_isa_alu(alu->op)->flags & AF_REPL) &&
307bf215546Sopenharmony_ci			(r600_isa_alu_slots(bc->isa->hw_class, alu->op) == AF_4V);
308bf215546Sopenharmony_ci}
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_cistatic int is_alu_mova_inst(struct r600_bytecode_alu *alu)
311bf215546Sopenharmony_ci{
312bf215546Sopenharmony_ci	return r600_isa_alu(alu->op)->flags & AF_MOVA;
313bf215546Sopenharmony_ci}
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_cistatic int alu_uses_rel(struct r600_bytecode_alu *alu)
316bf215546Sopenharmony_ci{
317bf215546Sopenharmony_ci	unsigned num_src = r600_bytecode_get_num_operands(alu);
318bf215546Sopenharmony_ci	unsigned src;
319bf215546Sopenharmony_ci
320bf215546Sopenharmony_ci	if (alu->dst.rel) {
321bf215546Sopenharmony_ci		return 1;
322bf215546Sopenharmony_ci	}
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_ci	for (src = 0; src < num_src; ++src) {
325bf215546Sopenharmony_ci		if (alu->src[src].rel) {
326bf215546Sopenharmony_ci			return 1;
327bf215546Sopenharmony_ci		}
328bf215546Sopenharmony_ci	}
329bf215546Sopenharmony_ci	return 0;
330bf215546Sopenharmony_ci}
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_cistatic int is_lds_read(int sel)
333bf215546Sopenharmony_ci{
334bf215546Sopenharmony_ci  return sel == EG_V_SQ_ALU_SRC_LDS_OQ_A_POP || sel == EG_V_SQ_ALU_SRC_LDS_OQ_B_POP;
335bf215546Sopenharmony_ci}
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_cistatic int alu_uses_lds(struct r600_bytecode_alu *alu)
338bf215546Sopenharmony_ci{
339bf215546Sopenharmony_ci	unsigned num_src = r600_bytecode_get_num_operands(alu);
340bf215546Sopenharmony_ci	unsigned src;
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci	for (src = 0; src < num_src; ++src) {
343bf215546Sopenharmony_ci		if (is_lds_read(alu->src[src].sel)) {
344bf215546Sopenharmony_ci			return 1;
345bf215546Sopenharmony_ci		}
346bf215546Sopenharmony_ci	}
347bf215546Sopenharmony_ci	return 0;
348bf215546Sopenharmony_ci}
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_cistatic int is_alu_64bit_inst(struct r600_bytecode_alu *alu)
351bf215546Sopenharmony_ci{
352bf215546Sopenharmony_ci	const struct alu_op_info *op = r600_isa_alu(alu->op);
353bf215546Sopenharmony_ci	return (op->flags & AF_64);
354bf215546Sopenharmony_ci}
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_cistatic int is_alu_vec_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
357bf215546Sopenharmony_ci{
358bf215546Sopenharmony_ci	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
359bf215546Sopenharmony_ci	return !(slots & AF_S);
360bf215546Sopenharmony_ci}
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_cistatic int is_alu_trans_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
363bf215546Sopenharmony_ci{
364bf215546Sopenharmony_ci	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
365bf215546Sopenharmony_ci	return !(slots & AF_V);
366bf215546Sopenharmony_ci}
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci/* alu instructions that can execute on any unit */
369bf215546Sopenharmony_cistatic int is_alu_any_unit_inst(struct r600_bytecode *bc, struct r600_bytecode_alu *alu)
370bf215546Sopenharmony_ci{
371bf215546Sopenharmony_ci	unsigned slots = r600_isa_alu_slots(bc->isa->hw_class, alu->op);
372bf215546Sopenharmony_ci	return slots == AF_VS;
373bf215546Sopenharmony_ci}
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_cistatic int is_nop_inst(struct r600_bytecode_alu *alu)
376bf215546Sopenharmony_ci{
377bf215546Sopenharmony_ci	return alu->op == ALU_OP0_NOP;
378bf215546Sopenharmony_ci}
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_cistatic int assign_alu_units(struct r600_bytecode *bc, struct r600_bytecode_alu *alu_first,
381bf215546Sopenharmony_ci			    struct r600_bytecode_alu *assignment[5])
382bf215546Sopenharmony_ci{
383bf215546Sopenharmony_ci	struct r600_bytecode_alu *alu;
384bf215546Sopenharmony_ci	unsigned i, chan, trans;
385bf215546Sopenharmony_ci	int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
386bf215546Sopenharmony_ci
387bf215546Sopenharmony_ci	for (i = 0; i < max_slots; i++)
388bf215546Sopenharmony_ci		assignment[i] = NULL;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci	for (alu = alu_first; alu; alu = list_entry(alu->list.next, struct r600_bytecode_alu, list)) {
391bf215546Sopenharmony_ci		chan = alu->dst.chan;
392bf215546Sopenharmony_ci		if (max_slots == 4)
393bf215546Sopenharmony_ci			trans = 0;
394bf215546Sopenharmony_ci		else if (is_alu_trans_unit_inst(bc, alu))
395bf215546Sopenharmony_ci			trans = 1;
396bf215546Sopenharmony_ci		else if (is_alu_vec_unit_inst(bc, alu))
397bf215546Sopenharmony_ci			trans = 0;
398bf215546Sopenharmony_ci		else if (assignment[chan])
399bf215546Sopenharmony_ci			trans = 1; /* Assume ALU_INST_PREFER_VECTOR. */
400bf215546Sopenharmony_ci		else
401bf215546Sopenharmony_ci			trans = 0;
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci		if (trans) {
404bf215546Sopenharmony_ci			if (assignment[4]) {
405bf215546Sopenharmony_ci				assert(0); /* ALU.Trans has already been allocated. */
406bf215546Sopenharmony_ci				return -1;
407bf215546Sopenharmony_ci			}
408bf215546Sopenharmony_ci			assignment[4] = alu;
409bf215546Sopenharmony_ci		} else {
410bf215546Sopenharmony_ci                        if (assignment[chan]) {
411bf215546Sopenharmony_ci			 	assert(0); /* ALU.chan has already been allocated. */
412bf215546Sopenharmony_ci				return -1;
413bf215546Sopenharmony_ci			}
414bf215546Sopenharmony_ci			assignment[chan] = alu;
415bf215546Sopenharmony_ci		}
416bf215546Sopenharmony_ci
417bf215546Sopenharmony_ci		if (alu->last)
418bf215546Sopenharmony_ci			break;
419bf215546Sopenharmony_ci	}
420bf215546Sopenharmony_ci	return 0;
421bf215546Sopenharmony_ci}
422bf215546Sopenharmony_ci
423bf215546Sopenharmony_cistruct alu_bank_swizzle {
424bf215546Sopenharmony_ci	int	hw_gpr[NUM_OF_CYCLES][NUM_OF_COMPONENTS];
425bf215546Sopenharmony_ci	int	hw_cfile_addr[4];
426bf215546Sopenharmony_ci	int	hw_cfile_elem[4];
427bf215546Sopenharmony_ci};
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_cistatic const unsigned cycle_for_bank_swizzle_vec[][3] = {
430bf215546Sopenharmony_ci	[SQ_ALU_VEC_012] = { 0, 1, 2 },
431bf215546Sopenharmony_ci	[SQ_ALU_VEC_021] = { 0, 2, 1 },
432bf215546Sopenharmony_ci	[SQ_ALU_VEC_120] = { 1, 2, 0 },
433bf215546Sopenharmony_ci	[SQ_ALU_VEC_102] = { 1, 0, 2 },
434bf215546Sopenharmony_ci	[SQ_ALU_VEC_201] = { 2, 0, 1 },
435bf215546Sopenharmony_ci	[SQ_ALU_VEC_210] = { 2, 1, 0 }
436bf215546Sopenharmony_ci};
437bf215546Sopenharmony_ci
438bf215546Sopenharmony_cistatic const unsigned cycle_for_bank_swizzle_scl[][3] = {
439bf215546Sopenharmony_ci	[SQ_ALU_SCL_210] = { 2, 1, 0 },
440bf215546Sopenharmony_ci	[SQ_ALU_SCL_122] = { 1, 2, 2 },
441bf215546Sopenharmony_ci	[SQ_ALU_SCL_212] = { 2, 1, 2 },
442bf215546Sopenharmony_ci	[SQ_ALU_SCL_221] = { 2, 2, 1 }
443bf215546Sopenharmony_ci};
444bf215546Sopenharmony_ci
445bf215546Sopenharmony_cistatic void init_bank_swizzle(struct alu_bank_swizzle *bs)
446bf215546Sopenharmony_ci{
447bf215546Sopenharmony_ci	int i, cycle, component;
448bf215546Sopenharmony_ci	/* set up gpr use */
449bf215546Sopenharmony_ci	for (cycle = 0; cycle < NUM_OF_CYCLES; cycle++)
450bf215546Sopenharmony_ci		for (component = 0; component < NUM_OF_COMPONENTS; component++)
451bf215546Sopenharmony_ci			 bs->hw_gpr[cycle][component] = -1;
452bf215546Sopenharmony_ci	for (i = 0; i < 4; i++)
453bf215546Sopenharmony_ci		bs->hw_cfile_addr[i] = -1;
454bf215546Sopenharmony_ci	for (i = 0; i < 4; i++)
455bf215546Sopenharmony_ci		bs->hw_cfile_elem[i] = -1;
456bf215546Sopenharmony_ci}
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_cistatic int reserve_gpr(struct alu_bank_swizzle *bs, unsigned sel, unsigned chan, unsigned cycle)
459bf215546Sopenharmony_ci{
460bf215546Sopenharmony_ci	if (bs->hw_gpr[cycle][chan] == -1)
461bf215546Sopenharmony_ci		bs->hw_gpr[cycle][chan] = sel;
462bf215546Sopenharmony_ci	else if (bs->hw_gpr[cycle][chan] != (int)sel) {
463bf215546Sopenharmony_ci		/* Another scalar operation has already used the GPR read port for the channel. */
464bf215546Sopenharmony_ci		return -1;
465bf215546Sopenharmony_ci	}
466bf215546Sopenharmony_ci	return 0;
467bf215546Sopenharmony_ci}
468bf215546Sopenharmony_ci
469bf215546Sopenharmony_cistatic int reserve_cfile(const struct r600_bytecode *bc,
470bf215546Sopenharmony_ci			 struct alu_bank_swizzle *bs, unsigned sel, unsigned chan)
471bf215546Sopenharmony_ci{
472bf215546Sopenharmony_ci	int res, num_res = 4;
473bf215546Sopenharmony_ci	if (bc->gfx_level >= R700) {
474bf215546Sopenharmony_ci		num_res = 2;
475bf215546Sopenharmony_ci		chan /= 2;
476bf215546Sopenharmony_ci	}
477bf215546Sopenharmony_ci	for (res = 0; res < num_res; ++res) {
478bf215546Sopenharmony_ci		if (bs->hw_cfile_addr[res] == -1) {
479bf215546Sopenharmony_ci			bs->hw_cfile_addr[res] = sel;
480bf215546Sopenharmony_ci			bs->hw_cfile_elem[res] = chan;
481bf215546Sopenharmony_ci			return 0;
482bf215546Sopenharmony_ci		} else if (bs->hw_cfile_addr[res] == sel &&
483bf215546Sopenharmony_ci			bs->hw_cfile_elem[res] == chan)
484bf215546Sopenharmony_ci			return 0; /* Read for this scalar element already reserved, nothing to do here. */
485bf215546Sopenharmony_ci	}
486bf215546Sopenharmony_ci	/* All cfile read ports are used, cannot reference vector element. */
487bf215546Sopenharmony_ci	return -1;
488bf215546Sopenharmony_ci}
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_cistatic int is_gpr(unsigned sel)
491bf215546Sopenharmony_ci{
492bf215546Sopenharmony_ci	return (sel <= 127);
493bf215546Sopenharmony_ci}
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci/* CB constants start at 512, and get translated to a kcache index when ALU
496bf215546Sopenharmony_ci * clauses are constructed. Note that we handle kcache constants the same way
497bf215546Sopenharmony_ci * as (the now gone) cfile constants, is that really required? */
498bf215546Sopenharmony_cistatic int is_kcache(unsigned sel)
499bf215546Sopenharmony_ci{
500bf215546Sopenharmony_ci   return (sel > 511 && sel < 4607) || /* Kcache before translation. */
501bf215546Sopenharmony_ci         (sel > 127 && sel < 192) || /* Kcache 0 & 1 after translation. */
502bf215546Sopenharmony_ci         (sel > 256  && sel < 320);  /* Kcache 2 & 3 after translation (EG). */
503bf215546Sopenharmony_ci}
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_cistatic int is_const(int sel)
506bf215546Sopenharmony_ci{
507bf215546Sopenharmony_ci   return is_kcache(sel) ||
508bf215546Sopenharmony_ci		(sel >= V_SQ_ALU_SRC_0 &&
509bf215546Sopenharmony_ci		sel <= V_SQ_ALU_SRC_LITERAL);
510bf215546Sopenharmony_ci}
511bf215546Sopenharmony_ci
512bf215546Sopenharmony_cistatic int check_vector(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
513bf215546Sopenharmony_ci			struct alu_bank_swizzle *bs, int bank_swizzle)
514bf215546Sopenharmony_ci{
515bf215546Sopenharmony_ci	int r, src, num_src, sel, elem, cycle;
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci	num_src = r600_bytecode_get_num_operands(alu);
518bf215546Sopenharmony_ci	for (src = 0; src < num_src; src++) {
519bf215546Sopenharmony_ci		sel = alu->src[src].sel;
520bf215546Sopenharmony_ci		elem = alu->src[src].chan;
521bf215546Sopenharmony_ci		if (is_gpr(sel)) {
522bf215546Sopenharmony_ci			cycle = cycle_for_bank_swizzle_vec[bank_swizzle][src];
523bf215546Sopenharmony_ci			if (src == 1 && sel == alu->src[0].sel && elem == alu->src[0].chan)
524bf215546Sopenharmony_ci				/* Nothing to do; special-case optimization,
525bf215546Sopenharmony_ci				 * second source uses first source’s reservation. */
526bf215546Sopenharmony_ci				continue;
527bf215546Sopenharmony_ci			else {
528bf215546Sopenharmony_ci				r = reserve_gpr(bs, sel, elem, cycle);
529bf215546Sopenharmony_ci				if (r)
530bf215546Sopenharmony_ci					return r;
531bf215546Sopenharmony_ci			}
532bf215546Sopenharmony_ci      } else if (is_kcache(sel)) {
533bf215546Sopenharmony_ci			r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
534bf215546Sopenharmony_ci			if (r)
535bf215546Sopenharmony_ci				return r;
536bf215546Sopenharmony_ci		}
537bf215546Sopenharmony_ci		/* No restrictions on PV, PS, literal or special constants. */
538bf215546Sopenharmony_ci	}
539bf215546Sopenharmony_ci	return 0;
540bf215546Sopenharmony_ci}
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_cistatic int check_scalar(const struct r600_bytecode *bc, const struct r600_bytecode_alu *alu,
543bf215546Sopenharmony_ci			struct alu_bank_swizzle *bs, int bank_swizzle)
544bf215546Sopenharmony_ci{
545bf215546Sopenharmony_ci	int r, src, num_src, const_count, sel, elem, cycle;
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci	num_src = r600_bytecode_get_num_operands(alu);
548bf215546Sopenharmony_ci	for (const_count = 0, src = 0; src < num_src; ++src) {
549bf215546Sopenharmony_ci		sel = alu->src[src].sel;
550bf215546Sopenharmony_ci		elem = alu->src[src].chan;
551bf215546Sopenharmony_ci		if (is_const(sel)) { /* Any constant, including literal and inline constants. */
552bf215546Sopenharmony_ci			if (const_count >= 2)
553bf215546Sopenharmony_ci				/* More than two references to a constant in
554bf215546Sopenharmony_ci				 * transcendental operation. */
555bf215546Sopenharmony_ci				return -1;
556bf215546Sopenharmony_ci			else
557bf215546Sopenharmony_ci				const_count++;
558bf215546Sopenharmony_ci		}
559bf215546Sopenharmony_ci      if (is_kcache(sel)) {
560bf215546Sopenharmony_ci			r = reserve_cfile(bc, bs, (alu->src[src].kc_bank<<16) + sel, elem);
561bf215546Sopenharmony_ci			if (r)
562bf215546Sopenharmony_ci				return r;
563bf215546Sopenharmony_ci		}
564bf215546Sopenharmony_ci	}
565bf215546Sopenharmony_ci	for (src = 0; src < num_src; ++src) {
566bf215546Sopenharmony_ci		sel = alu->src[src].sel;
567bf215546Sopenharmony_ci		elem = alu->src[src].chan;
568bf215546Sopenharmony_ci		if (is_gpr(sel)) {
569bf215546Sopenharmony_ci			cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
570bf215546Sopenharmony_ci			if (cycle < const_count)
571bf215546Sopenharmony_ci				/* Cycle for GPR load conflicts with
572bf215546Sopenharmony_ci				 * constant load in transcendental operation. */
573bf215546Sopenharmony_ci				return -1;
574bf215546Sopenharmony_ci			r = reserve_gpr(bs, sel, elem, cycle);
575bf215546Sopenharmony_ci			if (r)
576bf215546Sopenharmony_ci				return r;
577bf215546Sopenharmony_ci		}
578bf215546Sopenharmony_ci		/* PV PS restrictions */
579bf215546Sopenharmony_ci		if (const_count && (sel == 254 || sel == 255)) {
580bf215546Sopenharmony_ci			cycle = cycle_for_bank_swizzle_scl[bank_swizzle][src];
581bf215546Sopenharmony_ci			if (cycle < const_count)
582bf215546Sopenharmony_ci				return -1;
583bf215546Sopenharmony_ci		}
584bf215546Sopenharmony_ci	}
585bf215546Sopenharmony_ci	return 0;
586bf215546Sopenharmony_ci}
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_cistatic int check_and_set_bank_swizzle(const struct r600_bytecode *bc,
589bf215546Sopenharmony_ci				      struct r600_bytecode_alu *slots[5])
590bf215546Sopenharmony_ci{
591bf215546Sopenharmony_ci	struct alu_bank_swizzle bs;
592bf215546Sopenharmony_ci	int bank_swizzle[5];
593bf215546Sopenharmony_ci	int i, r = 0, forced = 1;
594bf215546Sopenharmony_ci	boolean scalar_only = bc->gfx_level == CAYMAN ? false : true;
595bf215546Sopenharmony_ci	int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
596bf215546Sopenharmony_ci	int max_checks = max_slots * 1000;
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci	for (i = 0; i < max_slots; i++) {
599bf215546Sopenharmony_ci		if (slots[i]) {
600bf215546Sopenharmony_ci			if (slots[i]->bank_swizzle_force) {
601bf215546Sopenharmony_ci				slots[i]->bank_swizzle = slots[i]->bank_swizzle_force;
602bf215546Sopenharmony_ci			} else {
603bf215546Sopenharmony_ci				forced = 0;
604bf215546Sopenharmony_ci			}
605bf215546Sopenharmony_ci		}
606bf215546Sopenharmony_ci
607bf215546Sopenharmony_ci		if (i < 4 && slots[i])
608bf215546Sopenharmony_ci			scalar_only = false;
609bf215546Sopenharmony_ci	}
610bf215546Sopenharmony_ci	if (forced)
611bf215546Sopenharmony_ci		return 0;
612bf215546Sopenharmony_ci
613bf215546Sopenharmony_ci	/* Just check every possible combination of bank swizzle.
614bf215546Sopenharmony_ci	 * Not very efficent, but works on the first try in most of the cases. */
615bf215546Sopenharmony_ci	for (i = 0; i < 4; i++)
616bf215546Sopenharmony_ci		if (!slots[i] || !slots[i]->bank_swizzle_force || slots[i]->is_lds_idx_op)
617bf215546Sopenharmony_ci			bank_swizzle[i] = SQ_ALU_VEC_012;
618bf215546Sopenharmony_ci		else
619bf215546Sopenharmony_ci			bank_swizzle[i] = slots[i]->bank_swizzle;
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci	bank_swizzle[4] = SQ_ALU_SCL_210;
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci	while(bank_swizzle[4] <= SQ_ALU_SCL_221 && max_checks--) {
624bf215546Sopenharmony_ci		init_bank_swizzle(&bs);
625bf215546Sopenharmony_ci		if (scalar_only == false) {
626bf215546Sopenharmony_ci			for (i = 0; i < 4; i++) {
627bf215546Sopenharmony_ci				if (slots[i]) {
628bf215546Sopenharmony_ci					r = check_vector(bc, slots[i], &bs, bank_swizzle[i]);
629bf215546Sopenharmony_ci					if (r)
630bf215546Sopenharmony_ci						break;
631bf215546Sopenharmony_ci				}
632bf215546Sopenharmony_ci			}
633bf215546Sopenharmony_ci		} else
634bf215546Sopenharmony_ci			r = 0;
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci		if (!r && max_slots == 5 && slots[4]) {
637bf215546Sopenharmony_ci			r = check_scalar(bc, slots[4], &bs, bank_swizzle[4]);
638bf215546Sopenharmony_ci		}
639bf215546Sopenharmony_ci		if (!r) {
640bf215546Sopenharmony_ci			for (i = 0; i < max_slots; i++) {
641bf215546Sopenharmony_ci				if (slots[i])
642bf215546Sopenharmony_ci					slots[i]->bank_swizzle = bank_swizzle[i];
643bf215546Sopenharmony_ci			}
644bf215546Sopenharmony_ci			return 0;
645bf215546Sopenharmony_ci		}
646bf215546Sopenharmony_ci
647bf215546Sopenharmony_ci		if (scalar_only) {
648bf215546Sopenharmony_ci			bank_swizzle[4]++;
649bf215546Sopenharmony_ci		} else {
650bf215546Sopenharmony_ci			for (i = 0; i < max_slots; i++) {
651bf215546Sopenharmony_ci				if (!slots[i] || (!slots[i]->bank_swizzle_force && !slots[i]->is_lds_idx_op)) {
652bf215546Sopenharmony_ci					bank_swizzle[i]++;
653bf215546Sopenharmony_ci					if (bank_swizzle[i] <= SQ_ALU_VEC_210)
654bf215546Sopenharmony_ci						break;
655bf215546Sopenharmony_ci					else if (i < max_slots - 1)
656bf215546Sopenharmony_ci						bank_swizzle[i] = SQ_ALU_VEC_012;
657bf215546Sopenharmony_ci					else
658bf215546Sopenharmony_ci						return -1;
659bf215546Sopenharmony_ci				}
660bf215546Sopenharmony_ci			}
661bf215546Sopenharmony_ci		}
662bf215546Sopenharmony_ci	}
663bf215546Sopenharmony_ci
664bf215546Sopenharmony_ci	/* Couldn't find a working swizzle. */
665bf215546Sopenharmony_ci	return -1;
666bf215546Sopenharmony_ci}
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_cistatic int replace_gpr_with_pv_ps(struct r600_bytecode *bc,
669bf215546Sopenharmony_ci				  struct r600_bytecode_alu *slots[5], struct r600_bytecode_alu *alu_prev)
670bf215546Sopenharmony_ci{
671bf215546Sopenharmony_ci	struct r600_bytecode_alu *prev[5];
672bf215546Sopenharmony_ci	int gpr[5], chan[5];
673bf215546Sopenharmony_ci	int i, j, r, src, num_src;
674bf215546Sopenharmony_ci	int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
675bf215546Sopenharmony_ci
676bf215546Sopenharmony_ci	r = assign_alu_units(bc, alu_prev, prev);
677bf215546Sopenharmony_ci	if (r)
678bf215546Sopenharmony_ci		return r;
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci	for (i = 0; i < max_slots; ++i) {
681bf215546Sopenharmony_ci		if (prev[i] && alu_writes(prev[i]) && !prev[i]->dst.rel) {
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci			if (is_alu_64bit_inst(prev[i])) {
684bf215546Sopenharmony_ci				gpr[i] = -1;
685bf215546Sopenharmony_ci				continue;
686bf215546Sopenharmony_ci			}
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_ci			gpr[i] = prev[i]->dst.sel;
689bf215546Sopenharmony_ci			/* cube writes more than PV.X */
690bf215546Sopenharmony_ci			if (is_alu_reduction_inst(bc, prev[i]))
691bf215546Sopenharmony_ci				chan[i] = 0;
692bf215546Sopenharmony_ci			else
693bf215546Sopenharmony_ci				chan[i] = prev[i]->dst.chan;
694bf215546Sopenharmony_ci		} else
695bf215546Sopenharmony_ci			gpr[i] = -1;
696bf215546Sopenharmony_ci	}
697bf215546Sopenharmony_ci
698bf215546Sopenharmony_ci	for (i = 0; i < max_slots; ++i) {
699bf215546Sopenharmony_ci		struct r600_bytecode_alu *alu = slots[i];
700bf215546Sopenharmony_ci		if (!alu)
701bf215546Sopenharmony_ci			continue;
702bf215546Sopenharmony_ci
703bf215546Sopenharmony_ci		if (is_alu_64bit_inst(alu))
704bf215546Sopenharmony_ci			continue;
705bf215546Sopenharmony_ci		num_src = r600_bytecode_get_num_operands(alu);
706bf215546Sopenharmony_ci		for (src = 0; src < num_src; ++src) {
707bf215546Sopenharmony_ci			if (!is_gpr(alu->src[src].sel) || alu->src[src].rel)
708bf215546Sopenharmony_ci				continue;
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci			if (bc->gfx_level < CAYMAN) {
711bf215546Sopenharmony_ci				if (alu->src[src].sel == gpr[4] &&
712bf215546Sopenharmony_ci				    alu->src[src].chan == chan[4] &&
713bf215546Sopenharmony_ci				    alu_prev->pred_sel == alu->pred_sel) {
714bf215546Sopenharmony_ci					alu->src[src].sel = V_SQ_ALU_SRC_PS;
715bf215546Sopenharmony_ci					alu->src[src].chan = 0;
716bf215546Sopenharmony_ci					continue;
717bf215546Sopenharmony_ci				}
718bf215546Sopenharmony_ci			}
719bf215546Sopenharmony_ci
720bf215546Sopenharmony_ci			for (j = 0; j < 4; ++j) {
721bf215546Sopenharmony_ci				if (alu->src[src].sel == gpr[j] &&
722bf215546Sopenharmony_ci					alu->src[src].chan == j &&
723bf215546Sopenharmony_ci				      alu_prev->pred_sel == alu->pred_sel) {
724bf215546Sopenharmony_ci					alu->src[src].sel = V_SQ_ALU_SRC_PV;
725bf215546Sopenharmony_ci					alu->src[src].chan = chan[j];
726bf215546Sopenharmony_ci					break;
727bf215546Sopenharmony_ci				}
728bf215546Sopenharmony_ci			}
729bf215546Sopenharmony_ci		}
730bf215546Sopenharmony_ci	}
731bf215546Sopenharmony_ci
732bf215546Sopenharmony_ci	return 0;
733bf215546Sopenharmony_ci}
734bf215546Sopenharmony_ci
735bf215546Sopenharmony_civoid r600_bytecode_special_constants(uint32_t value, unsigned *sel)
736bf215546Sopenharmony_ci{
737bf215546Sopenharmony_ci	switch(value) {
738bf215546Sopenharmony_ci	case 0:
739bf215546Sopenharmony_ci		*sel = V_SQ_ALU_SRC_0;
740bf215546Sopenharmony_ci		break;
741bf215546Sopenharmony_ci	case 1:
742bf215546Sopenharmony_ci		*sel = V_SQ_ALU_SRC_1_INT;
743bf215546Sopenharmony_ci		break;
744bf215546Sopenharmony_ci	case -1:
745bf215546Sopenharmony_ci		*sel = V_SQ_ALU_SRC_M_1_INT;
746bf215546Sopenharmony_ci		break;
747bf215546Sopenharmony_ci	case 0x3F800000: /* 1.0f */
748bf215546Sopenharmony_ci		*sel = V_SQ_ALU_SRC_1;
749bf215546Sopenharmony_ci		break;
750bf215546Sopenharmony_ci	case 0x3F000000: /* 0.5f */
751bf215546Sopenharmony_ci		*sel = V_SQ_ALU_SRC_0_5;
752bf215546Sopenharmony_ci		break;
753bf215546Sopenharmony_ci	default:
754bf215546Sopenharmony_ci		*sel = V_SQ_ALU_SRC_LITERAL;
755bf215546Sopenharmony_ci		break;
756bf215546Sopenharmony_ci	}
757bf215546Sopenharmony_ci}
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci/* compute how many literal are needed */
760bf215546Sopenharmony_cistatic int r600_bytecode_alu_nliterals(struct r600_bytecode_alu *alu,
761bf215546Sopenharmony_ci				 uint32_t literal[4], unsigned *nliteral)
762bf215546Sopenharmony_ci{
763bf215546Sopenharmony_ci	unsigned num_src = r600_bytecode_get_num_operands(alu);
764bf215546Sopenharmony_ci	unsigned i, j;
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci	for (i = 0; i < num_src; ++i) {
767bf215546Sopenharmony_ci		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
768bf215546Sopenharmony_ci			uint32_t value = alu->src[i].value;
769bf215546Sopenharmony_ci			unsigned found = 0;
770bf215546Sopenharmony_ci			for (j = 0; j < *nliteral; ++j) {
771bf215546Sopenharmony_ci				if (literal[j] == value) {
772bf215546Sopenharmony_ci					found = 1;
773bf215546Sopenharmony_ci					break;
774bf215546Sopenharmony_ci				}
775bf215546Sopenharmony_ci			}
776bf215546Sopenharmony_ci			if (!found) {
777bf215546Sopenharmony_ci				if (*nliteral >= 4)
778bf215546Sopenharmony_ci					return -EINVAL;
779bf215546Sopenharmony_ci				literal[(*nliteral)++] = value;
780bf215546Sopenharmony_ci			}
781bf215546Sopenharmony_ci		}
782bf215546Sopenharmony_ci	}
783bf215546Sopenharmony_ci	return 0;
784bf215546Sopenharmony_ci}
785bf215546Sopenharmony_ci
786bf215546Sopenharmony_cistatic void r600_bytecode_alu_adjust_literals(struct r600_bytecode_alu *alu,
787bf215546Sopenharmony_ci					      uint32_t literal[4], unsigned nliteral)
788bf215546Sopenharmony_ci{
789bf215546Sopenharmony_ci	unsigned num_src = r600_bytecode_get_num_operands(alu);
790bf215546Sopenharmony_ci	unsigned i, j;
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci	for (i = 0; i < num_src; ++i) {
793bf215546Sopenharmony_ci		if (alu->src[i].sel == V_SQ_ALU_SRC_LITERAL) {
794bf215546Sopenharmony_ci			uint32_t value = alu->src[i].value;
795bf215546Sopenharmony_ci			for (j = 0; j < nliteral; ++j) {
796bf215546Sopenharmony_ci				if (literal[j] == value) {
797bf215546Sopenharmony_ci					alu->src[i].chan = j;
798bf215546Sopenharmony_ci					break;
799bf215546Sopenharmony_ci				}
800bf215546Sopenharmony_ci			}
801bf215546Sopenharmony_ci		}
802bf215546Sopenharmony_ci	}
803bf215546Sopenharmony_ci}
804bf215546Sopenharmony_ci
805bf215546Sopenharmony_cistatic int merge_inst_groups(struct r600_bytecode *bc, struct r600_bytecode_alu *slots[5],
806bf215546Sopenharmony_ci			     struct r600_bytecode_alu *alu_prev)
807bf215546Sopenharmony_ci{
808bf215546Sopenharmony_ci	struct r600_bytecode_alu *prev[5];
809bf215546Sopenharmony_ci	struct r600_bytecode_alu *result[5] = { NULL };
810bf215546Sopenharmony_ci
811bf215546Sopenharmony_ci        uint8_t interp_xz = 0;
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci	uint32_t literal[4], prev_literal[4];
814bf215546Sopenharmony_ci	unsigned nliteral = 0, prev_nliteral = 0;
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci	int i, j, r, src, num_src;
817bf215546Sopenharmony_ci	int num_once_inst = 0;
818bf215546Sopenharmony_ci	int have_mova = 0, have_rel = 0;
819bf215546Sopenharmony_ci	int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
820bf215546Sopenharmony_ci
821bf215546Sopenharmony_ci	r = assign_alu_units(bc, alu_prev, prev);
822bf215546Sopenharmony_ci	if (r)
823bf215546Sopenharmony_ci		return r;
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_ci	for (i = 0; i < max_slots; ++i) {
826bf215546Sopenharmony_ci		if (prev[i]) {
827bf215546Sopenharmony_ci		      if (prev[i]->pred_sel)
828bf215546Sopenharmony_ci			      return 0;
829bf215546Sopenharmony_ci		      if (is_alu_once_inst(prev[i]))
830bf215546Sopenharmony_ci			      return 0;
831bf215546Sopenharmony_ci
832bf215546Sopenharmony_ci                      if (prev[i]->op == ALU_OP1_INTERP_LOAD_P0)
833bf215546Sopenharmony_ci                         interp_xz |= 3;
834bf215546Sopenharmony_ci                      if (prev[i]->op == ALU_OP2_INTERP_X)
835bf215546Sopenharmony_ci                         interp_xz |= 1;
836bf215546Sopenharmony_ci                      if (prev[i]->op == ALU_OP2_INTERP_Z)
837bf215546Sopenharmony_ci                         interp_xz |= 2;
838bf215546Sopenharmony_ci		}
839bf215546Sopenharmony_ci		if (slots[i]) {
840bf215546Sopenharmony_ci			if (slots[i]->pred_sel)
841bf215546Sopenharmony_ci				return 0;
842bf215546Sopenharmony_ci			if (is_alu_once_inst(slots[i]))
843bf215546Sopenharmony_ci				return 0;
844bf215546Sopenharmony_ci                        if (slots[i]->op == ALU_OP1_INTERP_LOAD_P0)
845bf215546Sopenharmony_ci                           interp_xz |= 3;
846bf215546Sopenharmony_ci                        if (slots[i]->op == ALU_OP2_INTERP_X)
847bf215546Sopenharmony_ci                           interp_xz |= 1;
848bf215546Sopenharmony_ci                        if (slots[i]->op == ALU_OP2_INTERP_Z)
849bf215546Sopenharmony_ci                           interp_xz |= 2;
850bf215546Sopenharmony_ci		}
851bf215546Sopenharmony_ci                if (interp_xz == 3)
852bf215546Sopenharmony_ci                   return 0;
853bf215546Sopenharmony_ci	}
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci	for (i = 0; i < max_slots; ++i) {
856bf215546Sopenharmony_ci		struct r600_bytecode_alu *alu;
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci		if (num_once_inst > 0)
859bf215546Sopenharmony_ci		   return 0;
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_ci		/* check number of literals */
862bf215546Sopenharmony_ci		if (prev[i]) {
863bf215546Sopenharmony_ci			if (r600_bytecode_alu_nliterals(prev[i], literal, &nliteral))
864bf215546Sopenharmony_ci				return 0;
865bf215546Sopenharmony_ci			if (r600_bytecode_alu_nliterals(prev[i], prev_literal, &prev_nliteral))
866bf215546Sopenharmony_ci				return 0;
867bf215546Sopenharmony_ci			if (is_alu_mova_inst(prev[i])) {
868bf215546Sopenharmony_ci				if (have_rel)
869bf215546Sopenharmony_ci					return 0;
870bf215546Sopenharmony_ci				have_mova = 1;
871bf215546Sopenharmony_ci			}
872bf215546Sopenharmony_ci
873bf215546Sopenharmony_ci			if (alu_uses_rel(prev[i])) {
874bf215546Sopenharmony_ci				if (have_mova) {
875bf215546Sopenharmony_ci					return 0;
876bf215546Sopenharmony_ci				}
877bf215546Sopenharmony_ci				have_rel = 1;
878bf215546Sopenharmony_ci			}
879bf215546Sopenharmony_ci			if (alu_uses_lds(prev[i]))
880bf215546Sopenharmony_ci				return 0;
881bf215546Sopenharmony_ci
882bf215546Sopenharmony_ci			num_once_inst += is_alu_once_inst(prev[i]);
883bf215546Sopenharmony_ci		}
884bf215546Sopenharmony_ci		if (slots[i] && r600_bytecode_alu_nliterals(slots[i], literal, &nliteral))
885bf215546Sopenharmony_ci			return 0;
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci		/* Let's check used slots. */
888bf215546Sopenharmony_ci		if (prev[i] && !slots[i]) {
889bf215546Sopenharmony_ci			result[i] = prev[i];
890bf215546Sopenharmony_ci			continue;
891bf215546Sopenharmony_ci		} else if (prev[i] && slots[i]) {
892bf215546Sopenharmony_ci			if (max_slots == 5 && result[4] == NULL && prev[4] == NULL && slots[4] == NULL) {
893bf215546Sopenharmony_ci				/* Trans unit is still free try to use it. */
894bf215546Sopenharmony_ci				if (is_alu_any_unit_inst(bc, slots[i]) && !alu_uses_lds(slots[i])) {
895bf215546Sopenharmony_ci					result[i] = prev[i];
896bf215546Sopenharmony_ci					result[4] = slots[i];
897bf215546Sopenharmony_ci				} else if (is_alu_any_unit_inst(bc, prev[i])) {
898bf215546Sopenharmony_ci					if (slots[i]->dst.sel == prev[i]->dst.sel &&
899bf215546Sopenharmony_ci					    alu_writes(slots[i]) &&
900bf215546Sopenharmony_ci					    alu_writes(prev[i]))
901bf215546Sopenharmony_ci						return 0;
902bf215546Sopenharmony_ci
903bf215546Sopenharmony_ci					result[i] = slots[i];
904bf215546Sopenharmony_ci					result[4] = prev[i];
905bf215546Sopenharmony_ci				} else
906bf215546Sopenharmony_ci					return 0;
907bf215546Sopenharmony_ci			} else
908bf215546Sopenharmony_ci				return 0;
909bf215546Sopenharmony_ci		} else if(!slots[i]) {
910bf215546Sopenharmony_ci			continue;
911bf215546Sopenharmony_ci		} else {
912bf215546Sopenharmony_ci			if (max_slots == 5 && slots[i] && prev[4] &&
913bf215546Sopenharmony_ci					slots[i]->dst.sel == prev[4]->dst.sel &&
914bf215546Sopenharmony_ci					slots[i]->dst.chan == prev[4]->dst.chan &&
915bf215546Sopenharmony_ci					alu_writes(slots[i]) &&
916bf215546Sopenharmony_ci					alu_writes(prev[4]))
917bf215546Sopenharmony_ci				return 0;
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_ci			result[i] = slots[i];
920bf215546Sopenharmony_ci		}
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci		alu = slots[i];
923bf215546Sopenharmony_ci		num_once_inst += is_alu_once_inst(alu);
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci		/* don't reschedule NOPs */
926bf215546Sopenharmony_ci		if (is_nop_inst(alu))
927bf215546Sopenharmony_ci			return 0;
928bf215546Sopenharmony_ci
929bf215546Sopenharmony_ci		if (is_alu_mova_inst(alu)) {
930bf215546Sopenharmony_ci			if (have_rel) {
931bf215546Sopenharmony_ci				return 0;
932bf215546Sopenharmony_ci			}
933bf215546Sopenharmony_ci			have_mova = 1;
934bf215546Sopenharmony_ci		}
935bf215546Sopenharmony_ci
936bf215546Sopenharmony_ci		if (alu_uses_rel(alu)) {
937bf215546Sopenharmony_ci			if (have_mova) {
938bf215546Sopenharmony_ci				return 0;
939bf215546Sopenharmony_ci			}
940bf215546Sopenharmony_ci			have_rel = 1;
941bf215546Sopenharmony_ci		}
942bf215546Sopenharmony_ci
943bf215546Sopenharmony_ci		if (alu->op == ALU_OP0_SET_CF_IDX0 ||
944bf215546Sopenharmony_ci			alu->op == ALU_OP0_SET_CF_IDX1)
945bf215546Sopenharmony_ci			return 0; /* data hazard with MOVA */
946bf215546Sopenharmony_ci
947bf215546Sopenharmony_ci		/* Let's check source gprs */
948bf215546Sopenharmony_ci		num_src = r600_bytecode_get_num_operands(alu);
949bf215546Sopenharmony_ci		for (src = 0; src < num_src; ++src) {
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_ci			/* Constants don't matter. */
952bf215546Sopenharmony_ci			if (!is_gpr(alu->src[src].sel))
953bf215546Sopenharmony_ci				continue;
954bf215546Sopenharmony_ci
955bf215546Sopenharmony_ci			for (j = 0; j < max_slots; ++j) {
956bf215546Sopenharmony_ci				if (!prev[j] || !alu_writes(prev[j]))
957bf215546Sopenharmony_ci					continue;
958bf215546Sopenharmony_ci
959bf215546Sopenharmony_ci				/* If it's relative then we can't determin which gpr is really used. */
960bf215546Sopenharmony_ci				if (prev[j]->dst.chan == alu->src[src].chan &&
961bf215546Sopenharmony_ci					(prev[j]->dst.sel == alu->src[src].sel ||
962bf215546Sopenharmony_ci					prev[j]->dst.rel || alu->src[src].rel))
963bf215546Sopenharmony_ci					return 0;
964bf215546Sopenharmony_ci			}
965bf215546Sopenharmony_ci		}
966bf215546Sopenharmony_ci	}
967bf215546Sopenharmony_ci
968bf215546Sopenharmony_ci	/* more than one PRED_ or KILL_ ? */
969bf215546Sopenharmony_ci	if (num_once_inst > 1)
970bf215546Sopenharmony_ci		return 0;
971bf215546Sopenharmony_ci
972bf215546Sopenharmony_ci	/* check if the result can still be swizzlet */
973bf215546Sopenharmony_ci	r = check_and_set_bank_swizzle(bc, result);
974bf215546Sopenharmony_ci	if (r)
975bf215546Sopenharmony_ci		return 0;
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_ci	/* looks like everything worked out right, apply the changes */
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci	/* undo adding previus literals */
980bf215546Sopenharmony_ci	bc->cf_last->ndw -= align(prev_nliteral, 2);
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_ci	/* sort instructions */
983bf215546Sopenharmony_ci	for (i = 0; i < max_slots; ++i) {
984bf215546Sopenharmony_ci		slots[i] = result[i];
985bf215546Sopenharmony_ci		if (result[i]) {
986bf215546Sopenharmony_ci			list_del(&result[i]->list);
987bf215546Sopenharmony_ci			result[i]->last = 0;
988bf215546Sopenharmony_ci			list_addtail(&result[i]->list, &bc->cf_last->alu);
989bf215546Sopenharmony_ci		}
990bf215546Sopenharmony_ci	}
991bf215546Sopenharmony_ci
992bf215546Sopenharmony_ci	/* determine new last instruction */
993bf215546Sopenharmony_ci	list_entry(bc->cf_last->alu.prev, struct r600_bytecode_alu, list)->last = 1;
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci	/* determine new first instruction */
996bf215546Sopenharmony_ci	for (i = 0; i < max_slots; ++i) {
997bf215546Sopenharmony_ci		if (result[i]) {
998bf215546Sopenharmony_ci			bc->cf_last->curr_bs_head = result[i];
999bf215546Sopenharmony_ci			break;
1000bf215546Sopenharmony_ci		}
1001bf215546Sopenharmony_ci	}
1002bf215546Sopenharmony_ci
1003bf215546Sopenharmony_ci	bc->cf_last->prev_bs_head = bc->cf_last->prev2_bs_head;
1004bf215546Sopenharmony_ci	bc->cf_last->prev2_bs_head = NULL;
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_ci	return 0;
1007bf215546Sopenharmony_ci}
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci/* we'll keep kcache sets sorted by bank & addr */
1010bf215546Sopenharmony_cistatic int r600_bytecode_alloc_kcache_line(struct r600_bytecode *bc,
1011bf215546Sopenharmony_ci		struct r600_bytecode_kcache *kcache,
1012bf215546Sopenharmony_ci		unsigned bank, unsigned line, unsigned index_mode)
1013bf215546Sopenharmony_ci{
1014bf215546Sopenharmony_ci	int i, kcache_banks = bc->gfx_level >= EVERGREEN ? 4 : 2;
1015bf215546Sopenharmony_ci
1016bf215546Sopenharmony_ci	for (i = 0; i < kcache_banks; i++) {
1017bf215546Sopenharmony_ci		if (kcache[i].mode) {
1018bf215546Sopenharmony_ci			int d;
1019bf215546Sopenharmony_ci
1020bf215546Sopenharmony_ci			if (kcache[i].bank < bank)
1021bf215546Sopenharmony_ci				continue;
1022bf215546Sopenharmony_ci
1023bf215546Sopenharmony_ci			if ((kcache[i].bank == bank && kcache[i].addr > line+1) ||
1024bf215546Sopenharmony_ci					kcache[i].bank > bank) {
1025bf215546Sopenharmony_ci				/* try to insert new line */
1026bf215546Sopenharmony_ci				if (kcache[kcache_banks-1].mode) {
1027bf215546Sopenharmony_ci					/* all sets are in use */
1028bf215546Sopenharmony_ci					return -ENOMEM;
1029bf215546Sopenharmony_ci				}
1030bf215546Sopenharmony_ci
1031bf215546Sopenharmony_ci				memmove(&kcache[i+1],&kcache[i], (kcache_banks-i-1)*sizeof(struct r600_bytecode_kcache));
1032bf215546Sopenharmony_ci				kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1033bf215546Sopenharmony_ci				kcache[i].bank = bank;
1034bf215546Sopenharmony_ci				kcache[i].addr = line;
1035bf215546Sopenharmony_ci				kcache[i].index_mode = index_mode;
1036bf215546Sopenharmony_ci				return 0;
1037bf215546Sopenharmony_ci			}
1038bf215546Sopenharmony_ci
1039bf215546Sopenharmony_ci			d = line - kcache[i].addr;
1040bf215546Sopenharmony_ci
1041bf215546Sopenharmony_ci			if (d == -1) {
1042bf215546Sopenharmony_ci				kcache[i].addr--;
1043bf215546Sopenharmony_ci				if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_2) {
1044bf215546Sopenharmony_ci					/* we are prepending the line to the current set,
1045bf215546Sopenharmony_ci					 * discarding the existing second line,
1046bf215546Sopenharmony_ci					 * so we'll have to insert line+2 after it */
1047bf215546Sopenharmony_ci					line += 2;
1048bf215546Sopenharmony_ci					continue;
1049bf215546Sopenharmony_ci				} else if (kcache[i].mode == V_SQ_CF_KCACHE_LOCK_1) {
1050bf215546Sopenharmony_ci					kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1051bf215546Sopenharmony_ci					return 0;
1052bf215546Sopenharmony_ci				} else {
1053bf215546Sopenharmony_ci					/* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
1054bf215546Sopenharmony_ci					return -ENOMEM;
1055bf215546Sopenharmony_ci				}
1056bf215546Sopenharmony_ci			} else if (d == 1) {
1057bf215546Sopenharmony_ci				kcache[i].mode = V_SQ_CF_KCACHE_LOCK_2;
1058bf215546Sopenharmony_ci				return 0;
1059bf215546Sopenharmony_ci			} else if (d == 0)
1060bf215546Sopenharmony_ci				return 0;
1061bf215546Sopenharmony_ci		} else { /* free kcache set - use it */
1062bf215546Sopenharmony_ci			kcache[i].mode = V_SQ_CF_KCACHE_LOCK_1;
1063bf215546Sopenharmony_ci			kcache[i].bank = bank;
1064bf215546Sopenharmony_ci			kcache[i].addr = line;
1065bf215546Sopenharmony_ci			kcache[i].index_mode = index_mode;
1066bf215546Sopenharmony_ci			return 0;
1067bf215546Sopenharmony_ci		}
1068bf215546Sopenharmony_ci	}
1069bf215546Sopenharmony_ci	return -ENOMEM;
1070bf215546Sopenharmony_ci}
1071bf215546Sopenharmony_ci
1072bf215546Sopenharmony_cistatic int r600_bytecode_alloc_inst_kcache_lines(struct r600_bytecode *bc,
1073bf215546Sopenharmony_ci		struct r600_bytecode_kcache *kcache,
1074bf215546Sopenharmony_ci		struct r600_bytecode_alu *alu)
1075bf215546Sopenharmony_ci{
1076bf215546Sopenharmony_ci	int i, r;
1077bf215546Sopenharmony_ci
1078bf215546Sopenharmony_ci	for (i = 0; i < 3; i++) {
1079bf215546Sopenharmony_ci		unsigned bank, line, sel = alu->src[i].sel, index_mode;
1080bf215546Sopenharmony_ci
1081bf215546Sopenharmony_ci		if (sel < 512)
1082bf215546Sopenharmony_ci			continue;
1083bf215546Sopenharmony_ci
1084bf215546Sopenharmony_ci		bank = alu->src[i].kc_bank;
1085bf215546Sopenharmony_ci		assert(bank < R600_MAX_HW_CONST_BUFFERS);
1086bf215546Sopenharmony_ci		line = (sel-512)>>4;
1087bf215546Sopenharmony_ci		index_mode = alu->src[i].kc_rel ? 1 : 0; // V_SQ_CF_INDEX_0 / V_SQ_CF_INDEX_NONE
1088bf215546Sopenharmony_ci
1089bf215546Sopenharmony_ci		if ((r = r600_bytecode_alloc_kcache_line(bc, kcache, bank, line, index_mode)))
1090bf215546Sopenharmony_ci			return r;
1091bf215546Sopenharmony_ci	}
1092bf215546Sopenharmony_ci	return 0;
1093bf215546Sopenharmony_ci}
1094bf215546Sopenharmony_ci
1095bf215546Sopenharmony_cistatic int r600_bytecode_assign_kcache_banks(
1096bf215546Sopenharmony_ci		struct r600_bytecode_alu *alu,
1097bf215546Sopenharmony_ci		struct r600_bytecode_kcache * kcache)
1098bf215546Sopenharmony_ci{
1099bf215546Sopenharmony_ci	int i, j;
1100bf215546Sopenharmony_ci
1101bf215546Sopenharmony_ci	/* Alter the src operands to refer to the kcache. */
1102bf215546Sopenharmony_ci	for (i = 0; i < 3; ++i) {
1103bf215546Sopenharmony_ci		static const unsigned int base[] = {128, 160, 256, 288};
1104bf215546Sopenharmony_ci		unsigned int line, sel = alu->src[i].sel, found = 0;
1105bf215546Sopenharmony_ci
1106bf215546Sopenharmony_ci		if (sel < 512)
1107bf215546Sopenharmony_ci			continue;
1108bf215546Sopenharmony_ci
1109bf215546Sopenharmony_ci		sel -= 512;
1110bf215546Sopenharmony_ci		line = sel>>4;
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci		for (j = 0; j < 4 && !found; ++j) {
1113bf215546Sopenharmony_ci			switch (kcache[j].mode) {
1114bf215546Sopenharmony_ci			case V_SQ_CF_KCACHE_NOP:
1115bf215546Sopenharmony_ci			case V_SQ_CF_KCACHE_LOCK_LOOP_INDEX:
1116bf215546Sopenharmony_ci				R600_ERR("unexpected kcache line mode\n");
1117bf215546Sopenharmony_ci				return -ENOMEM;
1118bf215546Sopenharmony_ci			default:
1119bf215546Sopenharmony_ci				if (kcache[j].bank == alu->src[i].kc_bank &&
1120bf215546Sopenharmony_ci						kcache[j].addr <= line &&
1121bf215546Sopenharmony_ci						line < kcache[j].addr + kcache[j].mode) {
1122bf215546Sopenharmony_ci					alu->src[i].sel = sel - (kcache[j].addr<<4);
1123bf215546Sopenharmony_ci					alu->src[i].sel += base[j];
1124bf215546Sopenharmony_ci					found=1;
1125bf215546Sopenharmony_ci			    }
1126bf215546Sopenharmony_ci			}
1127bf215546Sopenharmony_ci		}
1128bf215546Sopenharmony_ci	}
1129bf215546Sopenharmony_ci	return 0;
1130bf215546Sopenharmony_ci}
1131bf215546Sopenharmony_ci
1132bf215546Sopenharmony_cistatic int r600_bytecode_alloc_kcache_lines(struct r600_bytecode *bc,
1133bf215546Sopenharmony_ci		struct r600_bytecode_alu *alu,
1134bf215546Sopenharmony_ci		unsigned type)
1135bf215546Sopenharmony_ci{
1136bf215546Sopenharmony_ci	struct r600_bytecode_kcache kcache_sets[4];
1137bf215546Sopenharmony_ci	struct r600_bytecode_kcache *kcache = kcache_sets;
1138bf215546Sopenharmony_ci	int r;
1139bf215546Sopenharmony_ci
1140bf215546Sopenharmony_ci	memcpy(kcache, bc->cf_last->kcache, 4 * sizeof(struct r600_bytecode_kcache));
1141bf215546Sopenharmony_ci
1142bf215546Sopenharmony_ci	if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1143bf215546Sopenharmony_ci		/* can't alloc, need to start new clause */
1144bf215546Sopenharmony_ci
1145bf215546Sopenharmony_ci		/* Make sure the CF ends with an "last" instruction when
1146bf215546Sopenharmony_ci		 * we split an ALU group because of a new CF */
1147bf215546Sopenharmony_ci		if (!list_is_empty(&bc->cf_last->alu))  {
1148bf215546Sopenharmony_ci			struct r600_bytecode_alu *last_submitted =
1149bf215546Sopenharmony_ci				list_last_entry(&bc->cf_last->alu, struct r600_bytecode_alu, list);
1150bf215546Sopenharmony_ci				last_submitted->last = 1;
1151bf215546Sopenharmony_ci		}
1152bf215546Sopenharmony_ci
1153bf215546Sopenharmony_ci		if ((r = r600_bytecode_add_cf(bc))) {
1154bf215546Sopenharmony_ci			return r;
1155bf215546Sopenharmony_ci		}
1156bf215546Sopenharmony_ci		bc->cf_last->op = type;
1157bf215546Sopenharmony_ci
1158bf215546Sopenharmony_ci		/* retry with the new clause */
1159bf215546Sopenharmony_ci		kcache = bc->cf_last->kcache;
1160bf215546Sopenharmony_ci		if ((r = r600_bytecode_alloc_inst_kcache_lines(bc, kcache, alu))) {
1161bf215546Sopenharmony_ci			/* can't alloc again- should never happen */
1162bf215546Sopenharmony_ci			return r;
1163bf215546Sopenharmony_ci		}
1164bf215546Sopenharmony_ci	} else {
1165bf215546Sopenharmony_ci		/* update kcache sets */
1166bf215546Sopenharmony_ci		memcpy(bc->cf_last->kcache, kcache, 4 * sizeof(struct r600_bytecode_kcache));
1167bf215546Sopenharmony_ci	}
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ci	/* if we actually used more than 2 kcache sets, or have relative indexing - use ALU_EXTENDED on eg+ */
1170bf215546Sopenharmony_ci	if (kcache[2].mode != V_SQ_CF_KCACHE_NOP ||
1171bf215546Sopenharmony_ci		kcache[0].index_mode || kcache[1].index_mode || kcache[2].index_mode || kcache[3].index_mode) {
1172bf215546Sopenharmony_ci		if (bc->gfx_level < EVERGREEN)
1173bf215546Sopenharmony_ci			return -ENOMEM;
1174bf215546Sopenharmony_ci		bc->cf_last->eg_alu_extended = 1;
1175bf215546Sopenharmony_ci	}
1176bf215546Sopenharmony_ci
1177bf215546Sopenharmony_ci	return 0;
1178bf215546Sopenharmony_ci}
1179bf215546Sopenharmony_ci
1180bf215546Sopenharmony_cistatic int insert_nop_r6xx(struct r600_bytecode *bc, int max_slots)
1181bf215546Sopenharmony_ci{
1182bf215546Sopenharmony_ci	struct r600_bytecode_alu alu;
1183bf215546Sopenharmony_ci	int r, i;
1184bf215546Sopenharmony_ci
1185bf215546Sopenharmony_ci	for (i = 0; i < max_slots; i++) {
1186bf215546Sopenharmony_ci		memset(&alu, 0, sizeof(alu));
1187bf215546Sopenharmony_ci		alu.op = ALU_OP0_NOP;
1188bf215546Sopenharmony_ci		alu.src[0].chan = i & 3;
1189bf215546Sopenharmony_ci		alu.dst.chan = i & 3;
1190bf215546Sopenharmony_ci		alu.last = (i == max_slots - 1);
1191bf215546Sopenharmony_ci		r = r600_bytecode_add_alu(bc, &alu);
1192bf215546Sopenharmony_ci		if (r)
1193bf215546Sopenharmony_ci			return r;
1194bf215546Sopenharmony_ci	}
1195bf215546Sopenharmony_ci	return 0;
1196bf215546Sopenharmony_ci}
1197bf215546Sopenharmony_ci
1198bf215546Sopenharmony_ci/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
1199bf215546Sopenharmony_cistatic int load_ar_r6xx(struct r600_bytecode *bc, bool for_src)
1200bf215546Sopenharmony_ci{
1201bf215546Sopenharmony_ci	struct r600_bytecode_alu alu;
1202bf215546Sopenharmony_ci	int r;
1203bf215546Sopenharmony_ci
1204bf215546Sopenharmony_ci	if (bc->ar_loaded)
1205bf215546Sopenharmony_ci		return 0;
1206bf215546Sopenharmony_ci
1207bf215546Sopenharmony_ci	/* hack to avoid making MOVA the last instruction in the clause */
1208bf215546Sopenharmony_ci	if ((bc->cf_last->ndw>>1) >= 110)
1209bf215546Sopenharmony_ci		bc->force_add_cf = 1;
1210bf215546Sopenharmony_ci   else if (for_src) {
1211bf215546Sopenharmony_ci      insert_nop_r6xx(bc, 4);
1212bf215546Sopenharmony_ci      bc->nalu_groups++;
1213bf215546Sopenharmony_ci   }
1214bf215546Sopenharmony_ci
1215bf215546Sopenharmony_ci	memset(&alu, 0, sizeof(alu));
1216bf215546Sopenharmony_ci	alu.op = ALU_OP1_MOVA_GPR_INT;
1217bf215546Sopenharmony_ci	alu.src[0].sel = bc->ar_reg;
1218bf215546Sopenharmony_ci	alu.src[0].chan = bc->ar_chan;
1219bf215546Sopenharmony_ci	alu.last = 1;
1220bf215546Sopenharmony_ci	alu.index_mode = INDEX_MODE_LOOP;
1221bf215546Sopenharmony_ci	r = r600_bytecode_add_alu(bc, &alu);
1222bf215546Sopenharmony_ci	if (r)
1223bf215546Sopenharmony_ci		return r;
1224bf215546Sopenharmony_ci
1225bf215546Sopenharmony_ci	/* no requirement to set uses waterfall on MOVA_GPR_INT */
1226bf215546Sopenharmony_ci	bc->ar_loaded = 1;
1227bf215546Sopenharmony_ci	return 0;
1228bf215546Sopenharmony_ci}
1229bf215546Sopenharmony_ci
1230bf215546Sopenharmony_ci/* load AR register from gpr (bc->ar_reg) with MOVA_INT */
1231bf215546Sopenharmony_ciint r600_load_ar(struct r600_bytecode *bc, bool for_src)
1232bf215546Sopenharmony_ci{
1233bf215546Sopenharmony_ci	struct r600_bytecode_alu alu;
1234bf215546Sopenharmony_ci	int r;
1235bf215546Sopenharmony_ci
1236bf215546Sopenharmony_ci	if (bc->ar_handling)
1237bf215546Sopenharmony_ci		return load_ar_r6xx(bc, for_src);
1238bf215546Sopenharmony_ci
1239bf215546Sopenharmony_ci	if (bc->ar_loaded)
1240bf215546Sopenharmony_ci		return 0;
1241bf215546Sopenharmony_ci
1242bf215546Sopenharmony_ci	/* hack to avoid making MOVA the last instruction in the clause */
1243bf215546Sopenharmony_ci	if ((bc->cf_last->ndw>>1) >= 110)
1244bf215546Sopenharmony_ci		bc->force_add_cf = 1;
1245bf215546Sopenharmony_ci
1246bf215546Sopenharmony_ci	memset(&alu, 0, sizeof(alu));
1247bf215546Sopenharmony_ci	alu.op = ALU_OP1_MOVA_INT;
1248bf215546Sopenharmony_ci	alu.src[0].sel = bc->ar_reg;
1249bf215546Sopenharmony_ci	alu.src[0].chan = bc->ar_chan;
1250bf215546Sopenharmony_ci	alu.last = 1;
1251bf215546Sopenharmony_ci	r = r600_bytecode_add_alu(bc, &alu);
1252bf215546Sopenharmony_ci	if (r)
1253bf215546Sopenharmony_ci		return r;
1254bf215546Sopenharmony_ci
1255bf215546Sopenharmony_ci	bc->cf_last->r6xx_uses_waterfall = 1;
1256bf215546Sopenharmony_ci	bc->ar_loaded = 1;
1257bf215546Sopenharmony_ci	return 0;
1258bf215546Sopenharmony_ci}
1259bf215546Sopenharmony_ci
1260bf215546Sopenharmony_ciint r600_bytecode_add_alu_type(struct r600_bytecode *bc,
1261bf215546Sopenharmony_ci		const struct r600_bytecode_alu *alu, unsigned type)
1262bf215546Sopenharmony_ci{
1263bf215546Sopenharmony_ci	struct r600_bytecode_alu *nalu = r600_bytecode_alu();
1264bf215546Sopenharmony_ci	struct r600_bytecode_alu *lalu;
1265bf215546Sopenharmony_ci	int i, r;
1266bf215546Sopenharmony_ci
1267bf215546Sopenharmony_ci	if (!nalu)
1268bf215546Sopenharmony_ci		return -ENOMEM;
1269bf215546Sopenharmony_ci	memcpy(nalu, alu, sizeof(struct r600_bytecode_alu));
1270bf215546Sopenharmony_ci
1271bf215546Sopenharmony_ci	if (alu->is_op3) {
1272bf215546Sopenharmony_ci		/* will fail later since alu does not support it. */
1273bf215546Sopenharmony_ci		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1274bf215546Sopenharmony_ci	}
1275bf215546Sopenharmony_ci
1276bf215546Sopenharmony_ci	if (bc->cf_last != NULL && bc->cf_last->op != type) {
1277bf215546Sopenharmony_ci		/* check if we could add it anyway */
1278bf215546Sopenharmony_ci		if ((bc->cf_last->op == CF_OP_ALU && type == CF_OP_ALU_PUSH_BEFORE) ||
1279bf215546Sopenharmony_ci		 	(bc->cf_last->op == CF_OP_ALU_PUSH_BEFORE && type == CF_OP_ALU)) {
1280bf215546Sopenharmony_ci		 	LIST_FOR_EACH_ENTRY(lalu, &bc->cf_last->alu, list) {
1281bf215546Sopenharmony_ci		 		if (lalu->execute_mask) {
1282bf215546Sopenharmony_ci					bc->force_add_cf = 1;
1283bf215546Sopenharmony_ci					break;
1284bf215546Sopenharmony_ci				}
1285bf215546Sopenharmony_ci		 		type = CF_OP_ALU_PUSH_BEFORE;
1286bf215546Sopenharmony_ci			}
1287bf215546Sopenharmony_ci		} else
1288bf215546Sopenharmony_ci			bc->force_add_cf = 1;
1289bf215546Sopenharmony_ci	}
1290bf215546Sopenharmony_ci
1291bf215546Sopenharmony_ci	/* cf can contains only alu or only vtx or only tex */
1292bf215546Sopenharmony_ci	if (bc->cf_last == NULL || bc->force_add_cf) {
1293bf215546Sopenharmony_ci               if (bc->cf_last && bc->cf_last->curr_bs_head)
1294bf215546Sopenharmony_ci                  bc->cf_last->curr_bs_head->last = 1;
1295bf215546Sopenharmony_ci		r = r600_bytecode_add_cf(bc);
1296bf215546Sopenharmony_ci		if (r) {
1297bf215546Sopenharmony_ci			free(nalu);
1298bf215546Sopenharmony_ci			return r;
1299bf215546Sopenharmony_ci		}
1300bf215546Sopenharmony_ci	}
1301bf215546Sopenharmony_ci	bc->cf_last->op = type;
1302bf215546Sopenharmony_ci
1303bf215546Sopenharmony_ci	/* Load index register if required */
1304bf215546Sopenharmony_ci	if (bc->gfx_level >= EVERGREEN) {
1305bf215546Sopenharmony_ci		for (i = 0; i < 3; i++)
1306bf215546Sopenharmony_ci			if (nalu->src[i].kc_bank &&  nalu->src[i].kc_rel)
1307bf215546Sopenharmony_ci				egcm_load_index_reg(bc, 0, true);
1308bf215546Sopenharmony_ci	}
1309bf215546Sopenharmony_ci
1310bf215546Sopenharmony_ci	/* Check AR usage and load it if required */
1311bf215546Sopenharmony_ci	for (i = 0; i < 3; i++)
1312bf215546Sopenharmony_ci		if (nalu->src[i].rel && !bc->ar_loaded)
1313bf215546Sopenharmony_ci			r600_load_ar(bc, true);
1314bf215546Sopenharmony_ci
1315bf215546Sopenharmony_ci	if (nalu->dst.rel && !bc->ar_loaded)
1316bf215546Sopenharmony_ci		r600_load_ar(bc, false);
1317bf215546Sopenharmony_ci
1318bf215546Sopenharmony_ci	/* Setup the kcache for this ALU instruction. This will start a new
1319bf215546Sopenharmony_ci	 * ALU clause if needed. */
1320bf215546Sopenharmony_ci	if ((r = r600_bytecode_alloc_kcache_lines(bc, nalu, type))) {
1321bf215546Sopenharmony_ci		free(nalu);
1322bf215546Sopenharmony_ci		return r;
1323bf215546Sopenharmony_ci	}
1324bf215546Sopenharmony_ci
1325bf215546Sopenharmony_ci	if (!bc->cf_last->curr_bs_head) {
1326bf215546Sopenharmony_ci		bc->cf_last->curr_bs_head = nalu;
1327bf215546Sopenharmony_ci	}
1328bf215546Sopenharmony_ci	/* number of gpr == the last gpr used in any alu */
1329bf215546Sopenharmony_ci	for (i = 0; i < 3; i++) {
1330bf215546Sopenharmony_ci		if (nalu->src[i].sel >= bc->ngpr && nalu->src[i].sel < 128) {
1331bf215546Sopenharmony_ci			bc->ngpr = nalu->src[i].sel + 1;
1332bf215546Sopenharmony_ci		}
1333bf215546Sopenharmony_ci		if (nalu->src[i].sel == V_SQ_ALU_SRC_LITERAL)
1334bf215546Sopenharmony_ci			r600_bytecode_special_constants(nalu->src[i].value,
1335bf215546Sopenharmony_ci				&nalu->src[i].sel);
1336bf215546Sopenharmony_ci	}
1337bf215546Sopenharmony_ci	if (nalu->dst.write && nalu->dst.sel >= bc->ngpr) {
1338bf215546Sopenharmony_ci		bc->ngpr = nalu->dst.sel + 1;
1339bf215546Sopenharmony_ci	}
1340bf215546Sopenharmony_ci	list_addtail(&nalu->list, &bc->cf_last->alu);
1341bf215546Sopenharmony_ci	/* each alu use 2 dwords */
1342bf215546Sopenharmony_ci	bc->cf_last->ndw += 2;
1343bf215546Sopenharmony_ci	bc->ndw += 2;
1344bf215546Sopenharmony_ci
1345bf215546Sopenharmony_ci	/* process cur ALU instructions for bank swizzle */
1346bf215546Sopenharmony_ci	if (nalu->last) {
1347bf215546Sopenharmony_ci		uint32_t literal[4];
1348bf215546Sopenharmony_ci		unsigned nliteral;
1349bf215546Sopenharmony_ci		struct r600_bytecode_alu *slots[5];
1350bf215546Sopenharmony_ci		int max_slots = bc->gfx_level == CAYMAN ? 4 : 5;
1351bf215546Sopenharmony_ci		r = assign_alu_units(bc, bc->cf_last->curr_bs_head, slots);
1352bf215546Sopenharmony_ci		if (r)
1353bf215546Sopenharmony_ci			return r;
1354bf215546Sopenharmony_ci
1355bf215546Sopenharmony_ci		if (bc->cf_last->prev_bs_head) {
1356bf215546Sopenharmony_ci         struct r600_bytecode_alu *cur_prev_head = bc->cf_last->prev_bs_head;
1357bf215546Sopenharmony_ci			r = merge_inst_groups(bc, slots, cur_prev_head);
1358bf215546Sopenharmony_ci			if (r)
1359bf215546Sopenharmony_ci				return r;
1360bf215546Sopenharmony_ci         if (cur_prev_head != bc->cf_last->prev_bs_head)
1361bf215546Sopenharmony_ci            bc->nalu_groups--;
1362bf215546Sopenharmony_ci		}
1363bf215546Sopenharmony_ci
1364bf215546Sopenharmony_ci		if (bc->cf_last->prev_bs_head) {
1365bf215546Sopenharmony_ci			r = replace_gpr_with_pv_ps(bc, slots, bc->cf_last->prev_bs_head);
1366bf215546Sopenharmony_ci			if (r)
1367bf215546Sopenharmony_ci				return r;
1368bf215546Sopenharmony_ci		}
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_ci		r = check_and_set_bank_swizzle(bc, slots);
1371bf215546Sopenharmony_ci		if (r)
1372bf215546Sopenharmony_ci			return r;
1373bf215546Sopenharmony_ci
1374bf215546Sopenharmony_ci		for (i = 0, nliteral = 0; i < max_slots; i++) {
1375bf215546Sopenharmony_ci			if (slots[i]) {
1376bf215546Sopenharmony_ci				r = r600_bytecode_alu_nliterals(slots[i], literal, &nliteral);
1377bf215546Sopenharmony_ci				if (r)
1378bf215546Sopenharmony_ci					return r;
1379bf215546Sopenharmony_ci			}
1380bf215546Sopenharmony_ci		}
1381bf215546Sopenharmony_ci		bc->cf_last->ndw += align(nliteral, 2);
1382bf215546Sopenharmony_ci
1383bf215546Sopenharmony_ci		/* at most 128 slots, one add alu can add 5 slots + 4 constants(2 slots)
1384bf215546Sopenharmony_ci		 * worst case */
1385bf215546Sopenharmony_ci		if ((bc->cf_last->ndw >> 1) >= 120) {
1386bf215546Sopenharmony_ci			bc->force_add_cf = 1;
1387bf215546Sopenharmony_ci		}
1388bf215546Sopenharmony_ci
1389bf215546Sopenharmony_ci		bc->cf_last->prev2_bs_head = bc->cf_last->prev_bs_head;
1390bf215546Sopenharmony_ci		bc->cf_last->prev_bs_head = bc->cf_last->curr_bs_head;
1391bf215546Sopenharmony_ci		bc->cf_last->curr_bs_head = NULL;
1392bf215546Sopenharmony_ci
1393bf215546Sopenharmony_ci		bc->nalu_groups++;
1394bf215546Sopenharmony_ci
1395bf215546Sopenharmony_ci		if (bc->r6xx_nop_after_rel_dst) {
1396bf215546Sopenharmony_ci			for (int i = 0; i < max_slots; ++i) {
1397bf215546Sopenharmony_ci				if (slots[i] && slots[i]->dst.rel) {
1398bf215546Sopenharmony_ci					insert_nop_r6xx(bc, max_slots);
1399bf215546Sopenharmony_ci					bc->nalu_groups++;
1400bf215546Sopenharmony_ci					break;
1401bf215546Sopenharmony_ci				}
1402bf215546Sopenharmony_ci			}
1403bf215546Sopenharmony_ci		}
1404bf215546Sopenharmony_ci	}
1405bf215546Sopenharmony_ci
1406bf215546Sopenharmony_ci	/* Might need to insert spill write ops after current clause */
1407bf215546Sopenharmony_ci	if (nalu->last && bc->n_pending_outputs) {
1408bf215546Sopenharmony_ci		while (bc->n_pending_outputs) {
1409bf215546Sopenharmony_ci			r = r600_bytecode_add_output(bc, &bc->pending_outputs[--bc->n_pending_outputs]);
1410bf215546Sopenharmony_ci			if (r)
1411bf215546Sopenharmony_ci				return r;
1412bf215546Sopenharmony_ci		}
1413bf215546Sopenharmony_ci	}
1414bf215546Sopenharmony_ci
1415bf215546Sopenharmony_ci	return 0;
1416bf215546Sopenharmony_ci}
1417bf215546Sopenharmony_ci
1418bf215546Sopenharmony_ciint r600_bytecode_add_alu(struct r600_bytecode *bc, const struct r600_bytecode_alu *alu)
1419bf215546Sopenharmony_ci{
1420bf215546Sopenharmony_ci	return r600_bytecode_add_alu_type(bc, alu, CF_OP_ALU);
1421bf215546Sopenharmony_ci}
1422bf215546Sopenharmony_ci
1423bf215546Sopenharmony_cistatic unsigned r600_bytecode_num_tex_and_vtx_instructions(const struct r600_bytecode *bc)
1424bf215546Sopenharmony_ci{
1425bf215546Sopenharmony_ci	switch (bc->gfx_level) {
1426bf215546Sopenharmony_ci	case R600:
1427bf215546Sopenharmony_ci		return 8;
1428bf215546Sopenharmony_ci
1429bf215546Sopenharmony_ci	case R700:
1430bf215546Sopenharmony_ci	case EVERGREEN:
1431bf215546Sopenharmony_ci	case CAYMAN:
1432bf215546Sopenharmony_ci		return 16;
1433bf215546Sopenharmony_ci
1434bf215546Sopenharmony_ci	default:
1435bf215546Sopenharmony_ci		R600_ERR("Unknown gfx level %d.\n", bc->gfx_level);
1436bf215546Sopenharmony_ci		return 8;
1437bf215546Sopenharmony_ci	}
1438bf215546Sopenharmony_ci}
1439bf215546Sopenharmony_ci
1440bf215546Sopenharmony_cistatic inline boolean last_inst_was_not_vtx_fetch(struct r600_bytecode *bc)
1441bf215546Sopenharmony_ci{
1442bf215546Sopenharmony_ci	return !((r600_isa_cf(bc->cf_last->op)->flags & CF_FETCH) &&
1443bf215546Sopenharmony_ci		 bc->cf_last->op != CF_OP_GDS &&
1444bf215546Sopenharmony_ci		 (bc->gfx_level == CAYMAN ||
1445bf215546Sopenharmony_ci		  bc->cf_last->op != CF_OP_TEX));
1446bf215546Sopenharmony_ci}
1447bf215546Sopenharmony_ci
1448bf215546Sopenharmony_cistatic int r600_bytecode_add_vtx_internal(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx,
1449bf215546Sopenharmony_ci					  bool use_tc)
1450bf215546Sopenharmony_ci{
1451bf215546Sopenharmony_ci	struct r600_bytecode_vtx *nvtx = r600_bytecode_vtx();
1452bf215546Sopenharmony_ci	int r;
1453bf215546Sopenharmony_ci
1454bf215546Sopenharmony_ci	if (!nvtx)
1455bf215546Sopenharmony_ci		return -ENOMEM;
1456bf215546Sopenharmony_ci	memcpy(nvtx, vtx, sizeof(struct r600_bytecode_vtx));
1457bf215546Sopenharmony_ci
1458bf215546Sopenharmony_ci	/* Load index register if required */
1459bf215546Sopenharmony_ci	if (bc->gfx_level >= EVERGREEN) {
1460bf215546Sopenharmony_ci		if (vtx->buffer_index_mode)
1461bf215546Sopenharmony_ci			egcm_load_index_reg(bc, vtx->buffer_index_mode - 1, false);
1462bf215546Sopenharmony_ci	}
1463bf215546Sopenharmony_ci
1464bf215546Sopenharmony_ci	/* cf can contains only alu or only vtx or only tex */
1465bf215546Sopenharmony_ci	if (bc->cf_last == NULL ||
1466bf215546Sopenharmony_ci	    last_inst_was_not_vtx_fetch(bc) ||
1467bf215546Sopenharmony_ci	    bc->force_add_cf) {
1468bf215546Sopenharmony_ci		r = r600_bytecode_add_cf(bc);
1469bf215546Sopenharmony_ci		if (r) {
1470bf215546Sopenharmony_ci			free(nvtx);
1471bf215546Sopenharmony_ci			return r;
1472bf215546Sopenharmony_ci		}
1473bf215546Sopenharmony_ci		switch (bc->gfx_level) {
1474bf215546Sopenharmony_ci		case R600:
1475bf215546Sopenharmony_ci		case R700:
1476bf215546Sopenharmony_ci			bc->cf_last->op = CF_OP_VTX;
1477bf215546Sopenharmony_ci			break;
1478bf215546Sopenharmony_ci		case EVERGREEN:
1479bf215546Sopenharmony_ci			if (use_tc)
1480bf215546Sopenharmony_ci				bc->cf_last->op = CF_OP_TEX;
1481bf215546Sopenharmony_ci			else
1482bf215546Sopenharmony_ci				bc->cf_last->op = CF_OP_VTX;
1483bf215546Sopenharmony_ci			break;
1484bf215546Sopenharmony_ci		case CAYMAN:
1485bf215546Sopenharmony_ci			bc->cf_last->op = CF_OP_TEX;
1486bf215546Sopenharmony_ci			break;
1487bf215546Sopenharmony_ci		default:
1488bf215546Sopenharmony_ci			R600_ERR("Unknown gfx level %d.\n", bc->gfx_level);
1489bf215546Sopenharmony_ci			free(nvtx);
1490bf215546Sopenharmony_ci			return -EINVAL;
1491bf215546Sopenharmony_ci		}
1492bf215546Sopenharmony_ci	}
1493bf215546Sopenharmony_ci	list_addtail(&nvtx->list, &bc->cf_last->vtx);
1494bf215546Sopenharmony_ci	/* each fetch use 4 dwords */
1495bf215546Sopenharmony_ci	bc->cf_last->ndw += 4;
1496bf215546Sopenharmony_ci	bc->ndw += 4;
1497bf215546Sopenharmony_ci	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1498bf215546Sopenharmony_ci		bc->force_add_cf = 1;
1499bf215546Sopenharmony_ci
1500bf215546Sopenharmony_ci	bc->ngpr = MAX2(bc->ngpr, vtx->src_gpr + 1);
1501bf215546Sopenharmony_ci	bc->ngpr = MAX2(bc->ngpr, vtx->dst_gpr + 1);
1502bf215546Sopenharmony_ci
1503bf215546Sopenharmony_ci	return 0;
1504bf215546Sopenharmony_ci}
1505bf215546Sopenharmony_ci
1506bf215546Sopenharmony_ciint r600_bytecode_add_vtx(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1507bf215546Sopenharmony_ci{
1508bf215546Sopenharmony_ci	return r600_bytecode_add_vtx_internal(bc, vtx, false);
1509bf215546Sopenharmony_ci}
1510bf215546Sopenharmony_ci
1511bf215546Sopenharmony_ciint r600_bytecode_add_vtx_tc(struct r600_bytecode *bc, const struct r600_bytecode_vtx *vtx)
1512bf215546Sopenharmony_ci{
1513bf215546Sopenharmony_ci	return r600_bytecode_add_vtx_internal(bc, vtx, true);
1514bf215546Sopenharmony_ci}
1515bf215546Sopenharmony_ci
1516bf215546Sopenharmony_ciint r600_bytecode_add_tex(struct r600_bytecode *bc, const struct r600_bytecode_tex *tex)
1517bf215546Sopenharmony_ci{
1518bf215546Sopenharmony_ci	struct r600_bytecode_tex *ntex = r600_bytecode_tex();
1519bf215546Sopenharmony_ci	int r;
1520bf215546Sopenharmony_ci
1521bf215546Sopenharmony_ci	if (!ntex)
1522bf215546Sopenharmony_ci		return -ENOMEM;
1523bf215546Sopenharmony_ci	memcpy(ntex, tex, sizeof(struct r600_bytecode_tex));
1524bf215546Sopenharmony_ci
1525bf215546Sopenharmony_ci	/* Load index register if required */
1526bf215546Sopenharmony_ci	if (bc->gfx_level >= EVERGREEN) {
1527bf215546Sopenharmony_ci		if (tex->sampler_index_mode || tex->resource_index_mode)
1528bf215546Sopenharmony_ci			egcm_load_index_reg(bc, 1, false);
1529bf215546Sopenharmony_ci	}
1530bf215546Sopenharmony_ci
1531bf215546Sopenharmony_ci	/* we can't fetch data und use it as texture lookup address in the same TEX clause */
1532bf215546Sopenharmony_ci	if (bc->cf_last != NULL &&
1533bf215546Sopenharmony_ci		bc->cf_last->op == CF_OP_TEX) {
1534bf215546Sopenharmony_ci		struct r600_bytecode_tex *ttex;
1535bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY(ttex, &bc->cf_last->tex, list) {
1536bf215546Sopenharmony_ci			if (ttex->dst_gpr == ntex->src_gpr &&
1537bf215546Sopenharmony_ci                            (ttex->dst_sel_x < 4 || ttex->dst_sel_y < 4 ||
1538bf215546Sopenharmony_ci                             ttex->dst_sel_z < 4 || ttex->dst_sel_w < 4)) {
1539bf215546Sopenharmony_ci				bc->force_add_cf = 1;
1540bf215546Sopenharmony_ci				break;
1541bf215546Sopenharmony_ci			}
1542bf215546Sopenharmony_ci		}
1543bf215546Sopenharmony_ci		/* vtx instrs get inserted after tex, so make sure we aren't moving the tex
1544bf215546Sopenharmony_ci		 * before (say) the instr fetching the texcoord.
1545bf215546Sopenharmony_ci		 */
1546bf215546Sopenharmony_ci		if (!list_is_empty(&bc->cf_last->vtx))
1547bf215546Sopenharmony_ci			bc->force_add_cf = 1;
1548bf215546Sopenharmony_ci
1549bf215546Sopenharmony_ci		/* slight hack to make gradients always go into same cf */
1550bf215546Sopenharmony_ci		if (ntex->op == FETCH_OP_SET_GRADIENTS_H)
1551bf215546Sopenharmony_ci			bc->force_add_cf = 1;
1552bf215546Sopenharmony_ci	}
1553bf215546Sopenharmony_ci
1554bf215546Sopenharmony_ci	/* cf can contains only alu or only vtx or only tex */
1555bf215546Sopenharmony_ci	if (bc->cf_last == NULL ||
1556bf215546Sopenharmony_ci		bc->cf_last->op != CF_OP_TEX ||
1557bf215546Sopenharmony_ci	        bc->force_add_cf) {
1558bf215546Sopenharmony_ci		r = r600_bytecode_add_cf(bc);
1559bf215546Sopenharmony_ci		if (r) {
1560bf215546Sopenharmony_ci			free(ntex);
1561bf215546Sopenharmony_ci			return r;
1562bf215546Sopenharmony_ci		}
1563bf215546Sopenharmony_ci		bc->cf_last->op = CF_OP_TEX;
1564bf215546Sopenharmony_ci	}
1565bf215546Sopenharmony_ci	if (ntex->src_gpr >= bc->ngpr) {
1566bf215546Sopenharmony_ci		bc->ngpr = ntex->src_gpr + 1;
1567bf215546Sopenharmony_ci	}
1568bf215546Sopenharmony_ci	if (ntex->dst_gpr >= bc->ngpr) {
1569bf215546Sopenharmony_ci		bc->ngpr = ntex->dst_gpr + 1;
1570bf215546Sopenharmony_ci	}
1571bf215546Sopenharmony_ci	list_addtail(&ntex->list, &bc->cf_last->tex);
1572bf215546Sopenharmony_ci	/* each texture fetch use 4 dwords */
1573bf215546Sopenharmony_ci	bc->cf_last->ndw += 4;
1574bf215546Sopenharmony_ci	bc->ndw += 4;
1575bf215546Sopenharmony_ci	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1576bf215546Sopenharmony_ci		bc->force_add_cf = 1;
1577bf215546Sopenharmony_ci	return 0;
1578bf215546Sopenharmony_ci}
1579bf215546Sopenharmony_ci
1580bf215546Sopenharmony_ciint r600_bytecode_add_gds(struct r600_bytecode *bc, const struct r600_bytecode_gds *gds)
1581bf215546Sopenharmony_ci{
1582bf215546Sopenharmony_ci	struct r600_bytecode_gds *ngds = r600_bytecode_gds();
1583bf215546Sopenharmony_ci	int r;
1584bf215546Sopenharmony_ci
1585bf215546Sopenharmony_ci	if (ngds == NULL)
1586bf215546Sopenharmony_ci		return -ENOMEM;
1587bf215546Sopenharmony_ci	memcpy(ngds, gds, sizeof(struct r600_bytecode_gds));
1588bf215546Sopenharmony_ci
1589bf215546Sopenharmony_ci	if (bc->gfx_level >= EVERGREEN) {
1590bf215546Sopenharmony_ci		if (gds->uav_index_mode)
1591bf215546Sopenharmony_ci			egcm_load_index_reg(bc, gds->uav_index_mode - 1, false);
1592bf215546Sopenharmony_ci	}
1593bf215546Sopenharmony_ci
1594bf215546Sopenharmony_ci	if (bc->cf_last == NULL ||
1595bf215546Sopenharmony_ci	    bc->cf_last->op != CF_OP_GDS ||
1596bf215546Sopenharmony_ci	    bc->force_add_cf) {
1597bf215546Sopenharmony_ci		r = r600_bytecode_add_cf(bc);
1598bf215546Sopenharmony_ci		if (r) {
1599bf215546Sopenharmony_ci			free(ngds);
1600bf215546Sopenharmony_ci			return r;
1601bf215546Sopenharmony_ci		}
1602bf215546Sopenharmony_ci		bc->cf_last->op = CF_OP_GDS;
1603bf215546Sopenharmony_ci	}
1604bf215546Sopenharmony_ci
1605bf215546Sopenharmony_ci	list_addtail(&ngds->list, &bc->cf_last->gds);
1606bf215546Sopenharmony_ci	bc->cf_last->ndw += 4; /* each GDS uses 4 dwords */
1607bf215546Sopenharmony_ci	if ((bc->cf_last->ndw / 4) >= r600_bytecode_num_tex_and_vtx_instructions(bc))
1608bf215546Sopenharmony_ci		bc->force_add_cf = 1;
1609bf215546Sopenharmony_ci	return 0;
1610bf215546Sopenharmony_ci}
1611bf215546Sopenharmony_ci
1612bf215546Sopenharmony_ciint r600_bytecode_add_cfinst(struct r600_bytecode *bc, unsigned op)
1613bf215546Sopenharmony_ci{
1614bf215546Sopenharmony_ci	int r;
1615bf215546Sopenharmony_ci
1616bf215546Sopenharmony_ci	/* Emit WAIT_ACK before control flow to ensure pending writes are always acked. */
1617bf215546Sopenharmony_ci	if (op != CF_OP_WAIT_ACK && op != CF_OP_MEM_SCRATCH)
1618bf215546Sopenharmony_ci		r600_bytecode_wait_acks(bc);
1619bf215546Sopenharmony_ci
1620bf215546Sopenharmony_ci	r = r600_bytecode_add_cf(bc);
1621bf215546Sopenharmony_ci	if (r)
1622bf215546Sopenharmony_ci		return r;
1623bf215546Sopenharmony_ci
1624bf215546Sopenharmony_ci	bc->cf_last->cond = V_SQ_CF_COND_ACTIVE;
1625bf215546Sopenharmony_ci	bc->cf_last->op = op;
1626bf215546Sopenharmony_ci	return 0;
1627bf215546Sopenharmony_ci}
1628bf215546Sopenharmony_ci
1629bf215546Sopenharmony_ciint cm_bytecode_add_cf_end(struct r600_bytecode *bc)
1630bf215546Sopenharmony_ci{
1631bf215546Sopenharmony_ci	return r600_bytecode_add_cfinst(bc, CF_OP_CF_END);
1632bf215546Sopenharmony_ci}
1633bf215546Sopenharmony_ci
1634bf215546Sopenharmony_ci/* common to all 3 families */
1635bf215546Sopenharmony_cistatic int r600_bytecode_vtx_build(struct r600_bytecode *bc, struct r600_bytecode_vtx *vtx, unsigned id)
1636bf215546Sopenharmony_ci{
1637bf215546Sopenharmony_ci	if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
1638bf215546Sopenharmony_ci		return r700_bytecode_fetch_mem_build(bc, vtx, id);
1639bf215546Sopenharmony_ci	bc->bytecode[id] = S_SQ_VTX_WORD0_VTX_INST(r600_isa_fetch_opcode(bc->isa->hw_class, vtx->op)) |
1640bf215546Sopenharmony_ci			S_SQ_VTX_WORD0_BUFFER_ID(vtx->buffer_id) |
1641bf215546Sopenharmony_ci			S_SQ_VTX_WORD0_FETCH_TYPE(vtx->fetch_type) |
1642bf215546Sopenharmony_ci			S_SQ_VTX_WORD0_SRC_GPR(vtx->src_gpr) |
1643bf215546Sopenharmony_ci			S_SQ_VTX_WORD0_SRC_SEL_X(vtx->src_sel_x);
1644bf215546Sopenharmony_ci	if (bc->gfx_level < CAYMAN)
1645bf215546Sopenharmony_ci		bc->bytecode[id] |= S_SQ_VTX_WORD0_MEGA_FETCH_COUNT(vtx->mega_fetch_count);
1646bf215546Sopenharmony_ci	id++;
1647bf215546Sopenharmony_ci	bc->bytecode[id++] = S_SQ_VTX_WORD1_DST_SEL_X(vtx->dst_sel_x) |
1648bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_DST_SEL_Y(vtx->dst_sel_y) |
1649bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_DST_SEL_Z(vtx->dst_sel_z) |
1650bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_DST_SEL_W(vtx->dst_sel_w) |
1651bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_USE_CONST_FIELDS(vtx->use_const_fields) |
1652bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_DATA_FORMAT(vtx->data_format) |
1653bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_NUM_FORMAT_ALL(vtx->num_format_all) |
1654bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_FORMAT_COMP_ALL(vtx->format_comp_all) |
1655bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_SRF_MODE_ALL(vtx->srf_mode_all) |
1656bf215546Sopenharmony_ci				S_SQ_VTX_WORD1_GPR_DST_GPR(vtx->dst_gpr);
1657bf215546Sopenharmony_ci	bc->bytecode[id] = S_SQ_VTX_WORD2_OFFSET(vtx->offset)|
1658bf215546Sopenharmony_ci				S_SQ_VTX_WORD2_ENDIAN_SWAP(vtx->endian);
1659bf215546Sopenharmony_ci	if (bc->gfx_level >= EVERGREEN)
1660bf215546Sopenharmony_ci		bc->bytecode[id] |= ((vtx->buffer_index_mode & 0x3) << 21); // S_SQ_VTX_WORD2_BIM(vtx->buffer_index_mode);
1661bf215546Sopenharmony_ci	if (bc->gfx_level < CAYMAN)
1662bf215546Sopenharmony_ci		bc->bytecode[id] |= S_SQ_VTX_WORD2_MEGA_FETCH(1);
1663bf215546Sopenharmony_ci	id++;
1664bf215546Sopenharmony_ci	bc->bytecode[id++] = 0;
1665bf215546Sopenharmony_ci	return 0;
1666bf215546Sopenharmony_ci}
1667bf215546Sopenharmony_ci
1668bf215546Sopenharmony_ci/* common to all 3 families */
1669bf215546Sopenharmony_cistatic int r600_bytecode_tex_build(struct r600_bytecode *bc, struct r600_bytecode_tex *tex, unsigned id)
1670bf215546Sopenharmony_ci{
1671bf215546Sopenharmony_ci	bc->bytecode[id] = S_SQ_TEX_WORD0_TEX_INST(
1672bf215546Sopenharmony_ci					r600_isa_fetch_opcode(bc->isa->hw_class, tex->op)) |
1673bf215546Sopenharmony_ci			    EG_S_SQ_TEX_WORD0_INST_MOD(tex->inst_mod) |
1674bf215546Sopenharmony_ci				S_SQ_TEX_WORD0_RESOURCE_ID(tex->resource_id) |
1675bf215546Sopenharmony_ci				S_SQ_TEX_WORD0_SRC_GPR(tex->src_gpr) |
1676bf215546Sopenharmony_ci				S_SQ_TEX_WORD0_SRC_REL(tex->src_rel);
1677bf215546Sopenharmony_ci	if (bc->gfx_level >= EVERGREEN)
1678bf215546Sopenharmony_ci		bc->bytecode[id] |= ((tex->sampler_index_mode & 0x3) << 27) | // S_SQ_TEX_WORD0_SIM(tex->sampler_index_mode);
1679bf215546Sopenharmony_ci				((tex->resource_index_mode & 0x3) << 25); // S_SQ_TEX_WORD0_RIM(tex->resource_index_mode)
1680bf215546Sopenharmony_ci	id++;
1681bf215546Sopenharmony_ci	bc->bytecode[id++] = S_SQ_TEX_WORD1_DST_GPR(tex->dst_gpr) |
1682bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_DST_REL(tex->dst_rel) |
1683bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_DST_SEL_X(tex->dst_sel_x) |
1684bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_DST_SEL_Y(tex->dst_sel_y) |
1685bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_DST_SEL_Z(tex->dst_sel_z) |
1686bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_DST_SEL_W(tex->dst_sel_w) |
1687bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_LOD_BIAS(tex->lod_bias) |
1688bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_COORD_TYPE_X(tex->coord_type_x) |
1689bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_COORD_TYPE_Y(tex->coord_type_y) |
1690bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_COORD_TYPE_Z(tex->coord_type_z) |
1691bf215546Sopenharmony_ci				S_SQ_TEX_WORD1_COORD_TYPE_W(tex->coord_type_w);
1692bf215546Sopenharmony_ci	bc->bytecode[id++] = S_SQ_TEX_WORD2_OFFSET_X(tex->offset_x) |
1693bf215546Sopenharmony_ci				S_SQ_TEX_WORD2_OFFSET_Y(tex->offset_y) |
1694bf215546Sopenharmony_ci				S_SQ_TEX_WORD2_OFFSET_Z(tex->offset_z) |
1695bf215546Sopenharmony_ci				S_SQ_TEX_WORD2_SAMPLER_ID(tex->sampler_id) |
1696bf215546Sopenharmony_ci				S_SQ_TEX_WORD2_SRC_SEL_X(tex->src_sel_x) |
1697bf215546Sopenharmony_ci				S_SQ_TEX_WORD2_SRC_SEL_Y(tex->src_sel_y) |
1698bf215546Sopenharmony_ci				S_SQ_TEX_WORD2_SRC_SEL_Z(tex->src_sel_z) |
1699bf215546Sopenharmony_ci				S_SQ_TEX_WORD2_SRC_SEL_W(tex->src_sel_w);
1700bf215546Sopenharmony_ci	bc->bytecode[id++] = 0;
1701bf215546Sopenharmony_ci	return 0;
1702bf215546Sopenharmony_ci}
1703bf215546Sopenharmony_ci
1704bf215546Sopenharmony_ci/* r600 only, r700/eg bits in r700_asm.c */
1705bf215546Sopenharmony_cistatic int r600_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
1706bf215546Sopenharmony_ci{
1707bf215546Sopenharmony_ci	unsigned opcode = r600_isa_alu_opcode(bc->isa->hw_class, alu->op);
1708bf215546Sopenharmony_ci
1709bf215546Sopenharmony_ci	/* don't replace gpr by pv or ps for destination register */
1710bf215546Sopenharmony_ci	bc->bytecode[id++] = S_SQ_ALU_WORD0_SRC0_SEL(alu->src[0].sel) |
1711bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_SRC0_REL(alu->src[0].rel) |
1712bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_SRC0_CHAN(alu->src[0].chan) |
1713bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_SRC0_NEG(alu->src[0].neg) |
1714bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_SRC1_SEL(alu->src[1].sel) |
1715bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_SRC1_REL(alu->src[1].rel) |
1716bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_SRC1_CHAN(alu->src[1].chan) |
1717bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_SRC1_NEG(alu->src[1].neg) |
1718bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_INDEX_MODE(alu->index_mode) |
1719bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_PRED_SEL(alu->pred_sel) |
1720bf215546Sopenharmony_ci				S_SQ_ALU_WORD0_LAST(alu->last);
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_ci	if (alu->is_op3) {
1723bf215546Sopenharmony_ci		assert(!alu->src[0].abs && !alu->src[1].abs && !alu->src[2].abs);
1724bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1725bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1726bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1727bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1728bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP3_SRC2_SEL(alu->src[2].sel) |
1729bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP3_SRC2_REL(alu->src[2].rel) |
1730bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP3_SRC2_CHAN(alu->src[2].chan) |
1731bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP3_SRC2_NEG(alu->src[2].neg) |
1732bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP3_ALU_INST(opcode) |
1733bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle);
1734bf215546Sopenharmony_ci	} else {
1735bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_ALU_WORD1_DST_GPR(alu->dst.sel) |
1736bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_DST_CHAN(alu->dst.chan) |
1737bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_DST_REL(alu->dst.rel) |
1738bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_CLAMP(alu->dst.clamp) |
1739bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP2_SRC0_ABS(alu->src[0].abs) |
1740bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP2_SRC1_ABS(alu->src[1].abs) |
1741bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP2_WRITE_MASK(alu->dst.write) |
1742bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP2_OMOD(alu->omod) |
1743bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP2_ALU_INST(opcode) |
1744bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_BANK_SWIZZLE(alu->bank_swizzle) |
1745bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(alu->execute_mask) |
1746bf215546Sopenharmony_ci					S_SQ_ALU_WORD1_OP2_UPDATE_PRED(alu->update_pred);
1747bf215546Sopenharmony_ci	}
1748bf215546Sopenharmony_ci	return 0;
1749bf215546Sopenharmony_ci}
1750bf215546Sopenharmony_ci
1751bf215546Sopenharmony_cistatic void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_cf *cf)
1752bf215546Sopenharmony_ci{
1753bf215546Sopenharmony_ci	*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
1754bf215546Sopenharmony_ci	*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
1755bf215546Sopenharmony_ci			S_SQ_CF_WORD1_BARRIER(1) |
1756bf215546Sopenharmony_ci			S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
1757bf215546Sopenharmony_ci			S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1758bf215546Sopenharmony_ci}
1759bf215546Sopenharmony_ci
1760bf215546Sopenharmony_ci/* common for r600/r700 - eg in eg_asm.c */
1761bf215546Sopenharmony_cistatic int r600_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
1762bf215546Sopenharmony_ci{
1763bf215546Sopenharmony_ci	unsigned id = cf->id;
1764bf215546Sopenharmony_ci	const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1765bf215546Sopenharmony_ci	unsigned opcode = r600_isa_cf_opcode(bc->isa->hw_class, cf->op);
1766bf215546Sopenharmony_ci
1767bf215546Sopenharmony_ci
1768bf215546Sopenharmony_ci	if (cf->op == CF_NATIVE) {
1769bf215546Sopenharmony_ci		bc->bytecode[id++] = cf->isa[0];
1770bf215546Sopenharmony_ci		bc->bytecode[id++] = cf->isa[1];
1771bf215546Sopenharmony_ci	} else if (cfop->flags & CF_ALU) {
1772bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_CF_ALU_WORD0_ADDR(cf->addr >> 1) |
1773bf215546Sopenharmony_ci			S_SQ_CF_ALU_WORD0_KCACHE_MODE0(cf->kcache[0].mode) |
1774bf215546Sopenharmony_ci			S_SQ_CF_ALU_WORD0_KCACHE_BANK0(cf->kcache[0].bank) |
1775bf215546Sopenharmony_ci			S_SQ_CF_ALU_WORD0_KCACHE_BANK1(cf->kcache[1].bank);
1776bf215546Sopenharmony_ci
1777bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_CF_ALU_WORD1_CF_INST(opcode) |
1778bf215546Sopenharmony_ci			S_SQ_CF_ALU_WORD1_KCACHE_MODE1(cf->kcache[1].mode) |
1779bf215546Sopenharmony_ci			S_SQ_CF_ALU_WORD1_KCACHE_ADDR0(cf->kcache[0].addr) |
1780bf215546Sopenharmony_ci			S_SQ_CF_ALU_WORD1_KCACHE_ADDR1(cf->kcache[1].addr) |
1781bf215546Sopenharmony_ci					S_SQ_CF_ALU_WORD1_BARRIER(1) |
1782bf215546Sopenharmony_ci					S_SQ_CF_ALU_WORD1_USES_WATERFALL(bc->gfx_level == R600 ? cf->r6xx_uses_waterfall : 0) |
1783bf215546Sopenharmony_ci					S_SQ_CF_ALU_WORD1_COUNT((cf->ndw / 2) - 1);
1784bf215546Sopenharmony_ci	} else if (cfop->flags & CF_FETCH) {
1785bf215546Sopenharmony_ci		if (bc->gfx_level == R700)
1786bf215546Sopenharmony_ci			r700_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1787bf215546Sopenharmony_ci		else
1788bf215546Sopenharmony_ci			r600_bytecode_cf_vtx_build(&bc->bytecode[id], cf);
1789bf215546Sopenharmony_ci	} else if (cfop->flags & CF_EXP) {
1790bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1791bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1792bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1793bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1794bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1795bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1796bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(cf->output.swizzle_x) |
1797bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(cf->output.swizzle_y) |
1798bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(cf->output.swizzle_z) |
1799bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(cf->output.swizzle_w) |
1800bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1801bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1802bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
1803bf215546Sopenharmony_ci	} else if (cfop->flags & CF_MEM) {
1804bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
1805bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(cf->output.elem_size) |
1806bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(cf->output.array_base) |
1807bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(cf->output.type) |
1808bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD0_INDEX_GPR(cf->output.index_gpr);
1809bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(cf->output.burst_count - 1) |
1810bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(cf->barrier) |
1811bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(opcode) |
1812bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program) |
1813bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(cf->output.array_size) |
1814bf215546Sopenharmony_ci			S_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(cf->output.comp_mask);
1815bf215546Sopenharmony_ci	} else {
1816bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
1817bf215546Sopenharmony_ci		bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
1818bf215546Sopenharmony_ci					S_SQ_CF_WORD1_BARRIER(1) |
1819bf215546Sopenharmony_ci			                S_SQ_CF_WORD1_COND(cf->cond) |
1820bf215546Sopenharmony_ci			                S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
1821bf215546Sopenharmony_ci					S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
1822bf215546Sopenharmony_ci	}
1823bf215546Sopenharmony_ci	return 0;
1824bf215546Sopenharmony_ci}
1825bf215546Sopenharmony_ci
1826bf215546Sopenharmony_ciint r600_bytecode_build(struct r600_bytecode *bc)
1827bf215546Sopenharmony_ci{
1828bf215546Sopenharmony_ci	struct r600_bytecode_cf *cf;
1829bf215546Sopenharmony_ci	struct r600_bytecode_alu *alu;
1830bf215546Sopenharmony_ci	struct r600_bytecode_vtx *vtx;
1831bf215546Sopenharmony_ci	struct r600_bytecode_tex *tex;
1832bf215546Sopenharmony_ci	struct r600_bytecode_gds *gds;
1833bf215546Sopenharmony_ci	uint32_t literal[4];
1834bf215546Sopenharmony_ci	unsigned nliteral;
1835bf215546Sopenharmony_ci	unsigned addr;
1836bf215546Sopenharmony_ci	int i, r;
1837bf215546Sopenharmony_ci
1838bf215546Sopenharmony_ci	if (!bc->nstack) { // If not 0, Stack_size already provided by llvm
1839bf215546Sopenharmony_ci		if (bc->stack.max_entries)
1840bf215546Sopenharmony_ci			bc->nstack = bc->stack.max_entries;
1841bf215546Sopenharmony_ci		else if (bc->type == PIPE_SHADER_VERTEX ||
1842bf215546Sopenharmony_ci			 bc->type == PIPE_SHADER_TESS_EVAL ||
1843bf215546Sopenharmony_ci			 bc->type == PIPE_SHADER_TESS_CTRL)
1844bf215546Sopenharmony_ci			bc->nstack = 1;
1845bf215546Sopenharmony_ci	}
1846bf215546Sopenharmony_ci
1847bf215546Sopenharmony_ci	/* first path compute addr of each CF block */
1848bf215546Sopenharmony_ci	/* addr start after all the CF instructions */
1849bf215546Sopenharmony_ci	addr = bc->cf_last->id + 2;
1850bf215546Sopenharmony_ci	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1851bf215546Sopenharmony_ci		if (r600_isa_cf(cf->op)->flags & CF_FETCH) {
1852bf215546Sopenharmony_ci			addr += 3;
1853bf215546Sopenharmony_ci			addr &= 0xFFFFFFFCUL;
1854bf215546Sopenharmony_ci		}
1855bf215546Sopenharmony_ci		cf->addr = addr;
1856bf215546Sopenharmony_ci		addr += cf->ndw;
1857bf215546Sopenharmony_ci		bc->ndw = cf->addr + cf->ndw;
1858bf215546Sopenharmony_ci	}
1859bf215546Sopenharmony_ci	free(bc->bytecode);
1860bf215546Sopenharmony_ci	bc->bytecode = calloc(4, bc->ndw);
1861bf215546Sopenharmony_ci	if (bc->bytecode == NULL)
1862bf215546Sopenharmony_ci		return -ENOMEM;
1863bf215546Sopenharmony_ci	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
1864bf215546Sopenharmony_ci		const struct cf_op_info *cfop = r600_isa_cf(cf->op);
1865bf215546Sopenharmony_ci		addr = cf->addr;
1866bf215546Sopenharmony_ci		if (bc->gfx_level >= EVERGREEN)
1867bf215546Sopenharmony_ci			r = eg_bytecode_cf_build(bc, cf);
1868bf215546Sopenharmony_ci		else
1869bf215546Sopenharmony_ci			r = r600_bytecode_cf_build(bc, cf);
1870bf215546Sopenharmony_ci		if (r)
1871bf215546Sopenharmony_ci			return r;
1872bf215546Sopenharmony_ci		if (cfop->flags & CF_ALU) {
1873bf215546Sopenharmony_ci			nliteral = 0;
1874bf215546Sopenharmony_ci			memset(literal, 0, sizeof(literal));
1875bf215546Sopenharmony_ci			LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
1876bf215546Sopenharmony_ci				r = r600_bytecode_alu_nliterals(alu, literal, &nliteral);
1877bf215546Sopenharmony_ci				if (r)
1878bf215546Sopenharmony_ci					return r;
1879bf215546Sopenharmony_ci				r600_bytecode_alu_adjust_literals(alu, literal, nliteral);
1880bf215546Sopenharmony_ci				r600_bytecode_assign_kcache_banks(alu, cf->kcache);
1881bf215546Sopenharmony_ci
1882bf215546Sopenharmony_ci				switch(bc->gfx_level) {
1883bf215546Sopenharmony_ci				case R600:
1884bf215546Sopenharmony_ci					r = r600_bytecode_alu_build(bc, alu, addr);
1885bf215546Sopenharmony_ci					break;
1886bf215546Sopenharmony_ci				case R700:
1887bf215546Sopenharmony_ci					r = r700_bytecode_alu_build(bc, alu, addr);
1888bf215546Sopenharmony_ci					break;
1889bf215546Sopenharmony_ci				case EVERGREEN:
1890bf215546Sopenharmony_ci				case CAYMAN:
1891bf215546Sopenharmony_ci					r = eg_bytecode_alu_build(bc, alu, addr);
1892bf215546Sopenharmony_ci					break;
1893bf215546Sopenharmony_ci				default:
1894bf215546Sopenharmony_ci					R600_ERR("unknown gfx level %d.\n", bc->gfx_level);
1895bf215546Sopenharmony_ci					return -EINVAL;
1896bf215546Sopenharmony_ci				}
1897bf215546Sopenharmony_ci				if (r)
1898bf215546Sopenharmony_ci					return r;
1899bf215546Sopenharmony_ci				addr += 2;
1900bf215546Sopenharmony_ci				if (alu->last) {
1901bf215546Sopenharmony_ci					for (i = 0; i < align(nliteral, 2); ++i) {
1902bf215546Sopenharmony_ci						bc->bytecode[addr++] = literal[i];
1903bf215546Sopenharmony_ci					}
1904bf215546Sopenharmony_ci					nliteral = 0;
1905bf215546Sopenharmony_ci					memset(literal, 0, sizeof(literal));
1906bf215546Sopenharmony_ci				}
1907bf215546Sopenharmony_ci			}
1908bf215546Sopenharmony_ci		} else if (cf->op == CF_OP_VTX) {
1909bf215546Sopenharmony_ci			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1910bf215546Sopenharmony_ci				r = r600_bytecode_vtx_build(bc, vtx, addr);
1911bf215546Sopenharmony_ci				if (r)
1912bf215546Sopenharmony_ci					return r;
1913bf215546Sopenharmony_ci				addr += 4;
1914bf215546Sopenharmony_ci			}
1915bf215546Sopenharmony_ci		} else if (cf->op == CF_OP_GDS) {
1916bf215546Sopenharmony_ci			assert(bc->gfx_level >= EVERGREEN);
1917bf215546Sopenharmony_ci			LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
1918bf215546Sopenharmony_ci				r = eg_bytecode_gds_build(bc, gds, addr);
1919bf215546Sopenharmony_ci				if (r)
1920bf215546Sopenharmony_ci					return r;
1921bf215546Sopenharmony_ci				addr += 4;
1922bf215546Sopenharmony_ci			}
1923bf215546Sopenharmony_ci		} else if (cf->op == CF_OP_TEX) {
1924bf215546Sopenharmony_ci			LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
1925bf215546Sopenharmony_ci				assert(bc->gfx_level >= EVERGREEN);
1926bf215546Sopenharmony_ci				r = r600_bytecode_vtx_build(bc, vtx, addr);
1927bf215546Sopenharmony_ci				if (r)
1928bf215546Sopenharmony_ci					return r;
1929bf215546Sopenharmony_ci				addr += 4;
1930bf215546Sopenharmony_ci			}
1931bf215546Sopenharmony_ci			LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
1932bf215546Sopenharmony_ci				r = r600_bytecode_tex_build(bc, tex, addr);
1933bf215546Sopenharmony_ci				if (r)
1934bf215546Sopenharmony_ci					return r;
1935bf215546Sopenharmony_ci				addr += 4;
1936bf215546Sopenharmony_ci			}
1937bf215546Sopenharmony_ci		}
1938bf215546Sopenharmony_ci	}
1939bf215546Sopenharmony_ci	return 0;
1940bf215546Sopenharmony_ci}
1941bf215546Sopenharmony_ci
1942bf215546Sopenharmony_civoid r600_bytecode_clear(struct r600_bytecode *bc)
1943bf215546Sopenharmony_ci{
1944bf215546Sopenharmony_ci	struct r600_bytecode_cf *cf = NULL, *next_cf;
1945bf215546Sopenharmony_ci
1946bf215546Sopenharmony_ci	free(bc->bytecode);
1947bf215546Sopenharmony_ci	bc->bytecode = NULL;
1948bf215546Sopenharmony_ci
1949bf215546Sopenharmony_ci	LIST_FOR_EACH_ENTRY_SAFE(cf, next_cf, &bc->cf, list) {
1950bf215546Sopenharmony_ci		struct r600_bytecode_alu *alu = NULL, *next_alu;
1951bf215546Sopenharmony_ci		struct r600_bytecode_tex *tex = NULL, *next_tex;
1952bf215546Sopenharmony_ci		struct r600_bytecode_tex *vtx = NULL, *next_vtx;
1953bf215546Sopenharmony_ci		struct r600_bytecode_gds *gds = NULL, *next_gds;
1954bf215546Sopenharmony_ci
1955bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY_SAFE(alu, next_alu, &cf->alu, list) {
1956bf215546Sopenharmony_ci			free(alu);
1957bf215546Sopenharmony_ci		}
1958bf215546Sopenharmony_ci
1959bf215546Sopenharmony_ci		list_inithead(&cf->alu);
1960bf215546Sopenharmony_ci
1961bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY_SAFE(tex, next_tex, &cf->tex, list) {
1962bf215546Sopenharmony_ci			free(tex);
1963bf215546Sopenharmony_ci		}
1964bf215546Sopenharmony_ci
1965bf215546Sopenharmony_ci		list_inithead(&cf->tex);
1966bf215546Sopenharmony_ci
1967bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY_SAFE(vtx, next_vtx, &cf->vtx, list) {
1968bf215546Sopenharmony_ci			free(vtx);
1969bf215546Sopenharmony_ci		}
1970bf215546Sopenharmony_ci
1971bf215546Sopenharmony_ci		list_inithead(&cf->vtx);
1972bf215546Sopenharmony_ci
1973bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY_SAFE(gds, next_gds, &cf->gds, list) {
1974bf215546Sopenharmony_ci			free(gds);
1975bf215546Sopenharmony_ci		}
1976bf215546Sopenharmony_ci
1977bf215546Sopenharmony_ci		list_inithead(&cf->gds);
1978bf215546Sopenharmony_ci
1979bf215546Sopenharmony_ci		free(cf);
1980bf215546Sopenharmony_ci	}
1981bf215546Sopenharmony_ci
1982bf215546Sopenharmony_ci	list_inithead(&cf->list);
1983bf215546Sopenharmony_ci}
1984bf215546Sopenharmony_ci
1985bf215546Sopenharmony_cistatic int print_swizzle(unsigned swz)
1986bf215546Sopenharmony_ci{
1987bf215546Sopenharmony_ci	const char * swzchars = "xyzw01?_";
1988bf215546Sopenharmony_ci	assert(swz<8 && swz != 6);
1989bf215546Sopenharmony_ci	return fprintf(stderr, "%c", swzchars[swz]);
1990bf215546Sopenharmony_ci}
1991bf215546Sopenharmony_ci
1992bf215546Sopenharmony_cistatic int print_sel(unsigned sel, unsigned rel, unsigned index_mode,
1993bf215546Sopenharmony_ci		unsigned need_brackets)
1994bf215546Sopenharmony_ci{
1995bf215546Sopenharmony_ci	int o = 0;
1996bf215546Sopenharmony_ci	if (rel && index_mode >= 5 && sel < 128)
1997bf215546Sopenharmony_ci		o += fprintf(stderr, "G");
1998bf215546Sopenharmony_ci	if (rel || need_brackets) {
1999bf215546Sopenharmony_ci		o += fprintf(stderr, "[");
2000bf215546Sopenharmony_ci	}
2001bf215546Sopenharmony_ci	o += fprintf(stderr, "%d", sel);
2002bf215546Sopenharmony_ci	if (rel) {
2003bf215546Sopenharmony_ci		if (index_mode == 0 || index_mode == 6)
2004bf215546Sopenharmony_ci			o += fprintf(stderr, "+AR");
2005bf215546Sopenharmony_ci		else if (index_mode == 4)
2006bf215546Sopenharmony_ci			o += fprintf(stderr, "+AL");
2007bf215546Sopenharmony_ci	}
2008bf215546Sopenharmony_ci	if (rel || need_brackets) {
2009bf215546Sopenharmony_ci		o += fprintf(stderr, "]");
2010bf215546Sopenharmony_ci	}
2011bf215546Sopenharmony_ci	return o;
2012bf215546Sopenharmony_ci}
2013bf215546Sopenharmony_ci
2014bf215546Sopenharmony_cistatic int print_dst(struct r600_bytecode_alu *alu)
2015bf215546Sopenharmony_ci{
2016bf215546Sopenharmony_ci	int o = 0;
2017bf215546Sopenharmony_ci	unsigned sel = alu->dst.sel;
2018bf215546Sopenharmony_ci	char reg_char = 'R';
2019bf215546Sopenharmony_ci	if (sel > 128 - 4) { /* clause temporary gpr */
2020bf215546Sopenharmony_ci		sel -= 128 - 4;
2021bf215546Sopenharmony_ci		reg_char = 'T';
2022bf215546Sopenharmony_ci	}
2023bf215546Sopenharmony_ci
2024bf215546Sopenharmony_ci	if (alu_writes(alu)) {
2025bf215546Sopenharmony_ci		o += fprintf(stderr, "%c", reg_char);
2026bf215546Sopenharmony_ci		o += print_sel(alu->dst.sel, alu->dst.rel, alu->index_mode, 0);
2027bf215546Sopenharmony_ci	} else {
2028bf215546Sopenharmony_ci		o += fprintf(stderr, "__");
2029bf215546Sopenharmony_ci	}
2030bf215546Sopenharmony_ci	o += fprintf(stderr, ".");
2031bf215546Sopenharmony_ci	o += print_swizzle(alu->dst.chan);
2032bf215546Sopenharmony_ci	return o;
2033bf215546Sopenharmony_ci}
2034bf215546Sopenharmony_ci
2035bf215546Sopenharmony_cistatic int print_src(struct r600_bytecode_alu *alu, unsigned idx)
2036bf215546Sopenharmony_ci{
2037bf215546Sopenharmony_ci	int o = 0;
2038bf215546Sopenharmony_ci	struct r600_bytecode_alu_src *src = &alu->src[idx];
2039bf215546Sopenharmony_ci	unsigned sel = src->sel, need_sel = 1, need_chan = 1, need_brackets = 0;
2040bf215546Sopenharmony_ci
2041bf215546Sopenharmony_ci	if (src->neg)
2042bf215546Sopenharmony_ci		o += fprintf(stderr,"-");
2043bf215546Sopenharmony_ci	if (src->abs)
2044bf215546Sopenharmony_ci		o += fprintf(stderr,"|");
2045bf215546Sopenharmony_ci
2046bf215546Sopenharmony_ci	if (sel < 128 - 4) {
2047bf215546Sopenharmony_ci		o += fprintf(stderr, "R");
2048bf215546Sopenharmony_ci	} else if (sel < 128) {
2049bf215546Sopenharmony_ci		o += fprintf(stderr, "T");
2050bf215546Sopenharmony_ci		sel -= 128 - 4;
2051bf215546Sopenharmony_ci	} else if (sel < 160) {
2052bf215546Sopenharmony_ci		o += fprintf(stderr, "KC0");
2053bf215546Sopenharmony_ci		need_brackets = 1;
2054bf215546Sopenharmony_ci		sel -= 128;
2055bf215546Sopenharmony_ci	} else if (sel < 192) {
2056bf215546Sopenharmony_ci		o += fprintf(stderr, "KC1");
2057bf215546Sopenharmony_ci		need_brackets = 1;
2058bf215546Sopenharmony_ci		sel -= 160;
2059bf215546Sopenharmony_ci	} else if (sel >= 512) {
2060bf215546Sopenharmony_ci		o += fprintf(stderr, "C%d", src->kc_bank);
2061bf215546Sopenharmony_ci		need_brackets = 1;
2062bf215546Sopenharmony_ci		sel -= 512;
2063bf215546Sopenharmony_ci	} else if (sel >= 448) {
2064bf215546Sopenharmony_ci		o += fprintf(stderr, "Param");
2065bf215546Sopenharmony_ci		sel -= 448;
2066bf215546Sopenharmony_ci		need_chan = 0;
2067bf215546Sopenharmony_ci	} else if (sel >= 288) {
2068bf215546Sopenharmony_ci		o += fprintf(stderr, "KC3");
2069bf215546Sopenharmony_ci		need_brackets = 1;
2070bf215546Sopenharmony_ci		sel -= 288;
2071bf215546Sopenharmony_ci	} else if (sel >= 256) {
2072bf215546Sopenharmony_ci		o += fprintf(stderr, "KC2");
2073bf215546Sopenharmony_ci		need_brackets = 1;
2074bf215546Sopenharmony_ci		sel -= 256;
2075bf215546Sopenharmony_ci	} else {
2076bf215546Sopenharmony_ci		need_sel = 0;
2077bf215546Sopenharmony_ci		need_chan = 0;
2078bf215546Sopenharmony_ci		switch (sel) {
2079bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_LDS_DIRECT_A:
2080bf215546Sopenharmony_ci			o += fprintf(stderr, "LDS_A[0x%08X]", src->value);
2081bf215546Sopenharmony_ci			break;
2082bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_LDS_DIRECT_B:
2083bf215546Sopenharmony_ci			o += fprintf(stderr, "LDS_B[0x%08X]", src->value);
2084bf215546Sopenharmony_ci			break;
2085bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_LDS_OQ_A:
2086bf215546Sopenharmony_ci			o += fprintf(stderr, "LDS_OQ_A");
2087bf215546Sopenharmony_ci			need_chan = 1;
2088bf215546Sopenharmony_ci			break;
2089bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_LDS_OQ_B:
2090bf215546Sopenharmony_ci			o += fprintf(stderr, "LDS_OQ_B");
2091bf215546Sopenharmony_ci			need_chan = 1;
2092bf215546Sopenharmony_ci			break;
2093bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_LDS_OQ_A_POP:
2094bf215546Sopenharmony_ci			o += fprintf(stderr, "LDS_OQ_A_POP");
2095bf215546Sopenharmony_ci			need_chan = 1;
2096bf215546Sopenharmony_ci			break;
2097bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_LDS_OQ_B_POP:
2098bf215546Sopenharmony_ci			o += fprintf(stderr, "LDS_OQ_B_POP");
2099bf215546Sopenharmony_ci			need_chan = 1;
2100bf215546Sopenharmony_ci			break;
2101bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_TIME_LO:
2102bf215546Sopenharmony_ci			o += fprintf(stderr, "TIME_LO");
2103bf215546Sopenharmony_ci			break;
2104bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_TIME_HI:
2105bf215546Sopenharmony_ci			o += fprintf(stderr, "TIME_HI");
2106bf215546Sopenharmony_ci			break;
2107bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_SE_ID:
2108bf215546Sopenharmony_ci			o += fprintf(stderr, "SE_ID");
2109bf215546Sopenharmony_ci			break;
2110bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_SIMD_ID:
2111bf215546Sopenharmony_ci			o += fprintf(stderr, "SIMD_ID");
2112bf215546Sopenharmony_ci			break;
2113bf215546Sopenharmony_ci		case EG_V_SQ_ALU_SRC_HW_WAVE_ID:
2114bf215546Sopenharmony_ci			o += fprintf(stderr, "HW_WAVE_ID");
2115bf215546Sopenharmony_ci			break;
2116bf215546Sopenharmony_ci		case V_SQ_ALU_SRC_PS:
2117bf215546Sopenharmony_ci			o += fprintf(stderr, "PS");
2118bf215546Sopenharmony_ci			break;
2119bf215546Sopenharmony_ci		case V_SQ_ALU_SRC_PV:
2120bf215546Sopenharmony_ci			o += fprintf(stderr, "PV");
2121bf215546Sopenharmony_ci			need_chan = 1;
2122bf215546Sopenharmony_ci			break;
2123bf215546Sopenharmony_ci		case V_SQ_ALU_SRC_LITERAL:
2124bf215546Sopenharmony_ci			o += fprintf(stderr, "[0x%08X %f]", src->value, u_bitcast_u2f(src->value));
2125bf215546Sopenharmony_ci			break;
2126bf215546Sopenharmony_ci		case V_SQ_ALU_SRC_0_5:
2127bf215546Sopenharmony_ci			o += fprintf(stderr, "0.5");
2128bf215546Sopenharmony_ci			break;
2129bf215546Sopenharmony_ci		case V_SQ_ALU_SRC_M_1_INT:
2130bf215546Sopenharmony_ci			o += fprintf(stderr, "-1");
2131bf215546Sopenharmony_ci			break;
2132bf215546Sopenharmony_ci		case V_SQ_ALU_SRC_1_INT:
2133bf215546Sopenharmony_ci			o += fprintf(stderr, "1");
2134bf215546Sopenharmony_ci			break;
2135bf215546Sopenharmony_ci		case V_SQ_ALU_SRC_1:
2136bf215546Sopenharmony_ci			o += fprintf(stderr, "1.0");
2137bf215546Sopenharmony_ci			break;
2138bf215546Sopenharmony_ci		case V_SQ_ALU_SRC_0:
2139bf215546Sopenharmony_ci			o += fprintf(stderr, "0");
2140bf215546Sopenharmony_ci			break;
2141bf215546Sopenharmony_ci		default:
2142bf215546Sopenharmony_ci			o += fprintf(stderr, "??IMM_%d", sel);
2143bf215546Sopenharmony_ci			break;
2144bf215546Sopenharmony_ci		}
2145bf215546Sopenharmony_ci	}
2146bf215546Sopenharmony_ci
2147bf215546Sopenharmony_ci	if (need_sel)
2148bf215546Sopenharmony_ci		o += print_sel(sel, src->rel, alu->index_mode, need_brackets);
2149bf215546Sopenharmony_ci
2150bf215546Sopenharmony_ci	if (need_chan) {
2151bf215546Sopenharmony_ci		o += fprintf(stderr, ".");
2152bf215546Sopenharmony_ci		o += print_swizzle(src->chan);
2153bf215546Sopenharmony_ci	}
2154bf215546Sopenharmony_ci
2155bf215546Sopenharmony_ci	if (src->abs)
2156bf215546Sopenharmony_ci		o += fprintf(stderr,"|");
2157bf215546Sopenharmony_ci
2158bf215546Sopenharmony_ci	return o;
2159bf215546Sopenharmony_ci}
2160bf215546Sopenharmony_ci
2161bf215546Sopenharmony_cistatic int print_indent(int p, int c)
2162bf215546Sopenharmony_ci{
2163bf215546Sopenharmony_ci	int o = 0;
2164bf215546Sopenharmony_ci	while (p++ < c)
2165bf215546Sopenharmony_ci		o += fprintf(stderr, " ");
2166bf215546Sopenharmony_ci	return o;
2167bf215546Sopenharmony_ci}
2168bf215546Sopenharmony_ci
2169bf215546Sopenharmony_civoid r600_bytecode_disasm(struct r600_bytecode *bc)
2170bf215546Sopenharmony_ci{
2171bf215546Sopenharmony_ci	const char *index_mode[] = {"CF_INDEX_NONE", "CF_INDEX_0", "CF_INDEX_1"};
2172bf215546Sopenharmony_ci	static int index = 0;
2173bf215546Sopenharmony_ci	struct r600_bytecode_cf *cf = NULL;
2174bf215546Sopenharmony_ci	struct r600_bytecode_alu *alu = NULL;
2175bf215546Sopenharmony_ci	struct r600_bytecode_vtx *vtx = NULL;
2176bf215546Sopenharmony_ci	struct r600_bytecode_tex *tex = NULL;
2177bf215546Sopenharmony_ci	struct r600_bytecode_gds *gds = NULL;
2178bf215546Sopenharmony_ci
2179bf215546Sopenharmony_ci	unsigned i, id, ngr = 0, last;
2180bf215546Sopenharmony_ci	uint32_t literal[4];
2181bf215546Sopenharmony_ci	unsigned nliteral;
2182bf215546Sopenharmony_ci	char chip = '6';
2183bf215546Sopenharmony_ci
2184bf215546Sopenharmony_ci	switch (bc->gfx_level) {
2185bf215546Sopenharmony_ci	case R700:
2186bf215546Sopenharmony_ci		chip = '7';
2187bf215546Sopenharmony_ci		break;
2188bf215546Sopenharmony_ci	case EVERGREEN:
2189bf215546Sopenharmony_ci		chip = 'E';
2190bf215546Sopenharmony_ci		break;
2191bf215546Sopenharmony_ci	case CAYMAN:
2192bf215546Sopenharmony_ci		chip = 'C';
2193bf215546Sopenharmony_ci		break;
2194bf215546Sopenharmony_ci	case R600:
2195bf215546Sopenharmony_ci	default:
2196bf215546Sopenharmony_ci		chip = '6';
2197bf215546Sopenharmony_ci		break;
2198bf215546Sopenharmony_ci	}
2199bf215546Sopenharmony_ci	fprintf(stderr, "bytecode %d dw -- %d gprs -- %d nstack -------------\n",
2200bf215546Sopenharmony_ci	        bc->ndw, bc->ngpr, bc->nstack);
2201bf215546Sopenharmony_ci	fprintf(stderr, "shader %d -- %c\n", index++, chip);
2202bf215546Sopenharmony_ci
2203bf215546Sopenharmony_ci	LIST_FOR_EACH_ENTRY(cf, &bc->cf, list) {
2204bf215546Sopenharmony_ci		id = cf->id;
2205bf215546Sopenharmony_ci		if (cf->op == CF_NATIVE) {
2206bf215546Sopenharmony_ci			fprintf(stderr, "%04d %08X %08X CF_NATIVE\n", id, bc->bytecode[id],
2207bf215546Sopenharmony_ci					bc->bytecode[id + 1]);
2208bf215546Sopenharmony_ci		} else {
2209bf215546Sopenharmony_ci			const struct cf_op_info *cfop = r600_isa_cf(cf->op);
2210bf215546Sopenharmony_ci			if (cfop->flags & CF_ALU) {
2211bf215546Sopenharmony_ci				if (cf->eg_alu_extended) {
2212bf215546Sopenharmony_ci					fprintf(stderr, "%04d %08X %08X  %s\n", id, bc->bytecode[id],
2213bf215546Sopenharmony_ci							bc->bytecode[id + 1], "ALU_EXT");
2214bf215546Sopenharmony_ci					id += 2;
2215bf215546Sopenharmony_ci				}
2216bf215546Sopenharmony_ci				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2217bf215546Sopenharmony_ci						bc->bytecode[id + 1], cfop->name);
2218bf215546Sopenharmony_ci				fprintf(stderr, "%d @%d ", cf->ndw / 2, cf->addr);
2219bf215546Sopenharmony_ci				for (i = 0; i < 4; ++i) {
2220bf215546Sopenharmony_ci					if (cf->kcache[i].mode) {
2221bf215546Sopenharmony_ci						int c_start = (cf->kcache[i].addr << 4);
2222bf215546Sopenharmony_ci						int c_end = c_start + (cf->kcache[i].mode << 4);
2223bf215546Sopenharmony_ci						fprintf(stderr, "KC%d[CB%d:%d-%d%s%s] ",
2224bf215546Sopenharmony_ci						        i, cf->kcache[i].bank, c_start, c_end,
2225bf215546Sopenharmony_ci						        cf->kcache[i].index_mode ? " " : "",
2226bf215546Sopenharmony_ci						        cf->kcache[i].index_mode ? index_mode[cf->kcache[i].index_mode] : "");
2227bf215546Sopenharmony_ci					}
2228bf215546Sopenharmony_ci				}
2229bf215546Sopenharmony_ci				fprintf(stderr, "\n");
2230bf215546Sopenharmony_ci			} else if (cfop->flags & CF_FETCH) {
2231bf215546Sopenharmony_ci				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2232bf215546Sopenharmony_ci						bc->bytecode[id + 1], cfop->name);
2233bf215546Sopenharmony_ci				fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
2234bf215546Sopenharmony_ci				if (cf->vpm)
2235bf215546Sopenharmony_ci					fprintf(stderr, "VPM ");
2236bf215546Sopenharmony_ci				if (cf->end_of_program)
2237bf215546Sopenharmony_ci					fprintf(stderr, "EOP ");
2238bf215546Sopenharmony_ci				fprintf(stderr, "\n");
2239bf215546Sopenharmony_ci
2240bf215546Sopenharmony_ci			} else if (cfop->flags & CF_EXP) {
2241bf215546Sopenharmony_ci				int o = 0;
2242bf215546Sopenharmony_ci				const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
2243bf215546Sopenharmony_ci				o += fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2244bf215546Sopenharmony_ci						bc->bytecode[id + 1], cfop->name);
2245bf215546Sopenharmony_ci				o += print_indent(o, 43);
2246bf215546Sopenharmony_ci				o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2247bf215546Sopenharmony_ci				if (cf->output.burst_count > 1) {
2248bf215546Sopenharmony_ci					o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2249bf215546Sopenharmony_ci							cf->output.array_base + cf->output.burst_count - 1);
2250bf215546Sopenharmony_ci
2251bf215546Sopenharmony_ci					o += print_indent(o, 55);
2252bf215546Sopenharmony_ci					o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2253bf215546Sopenharmony_ci							cf->output.gpr + cf->output.burst_count - 1);
2254bf215546Sopenharmony_ci				} else {
2255bf215546Sopenharmony_ci					o += fprintf(stderr, "%d ", cf->output.array_base);
2256bf215546Sopenharmony_ci					o += print_indent(o, 55);
2257bf215546Sopenharmony_ci					o += fprintf(stderr, "R%d.", cf->output.gpr);
2258bf215546Sopenharmony_ci				}
2259bf215546Sopenharmony_ci
2260bf215546Sopenharmony_ci				o += print_swizzle(cf->output.swizzle_x);
2261bf215546Sopenharmony_ci				o += print_swizzle(cf->output.swizzle_y);
2262bf215546Sopenharmony_ci				o += print_swizzle(cf->output.swizzle_z);
2263bf215546Sopenharmony_ci				o += print_swizzle(cf->output.swizzle_w);
2264bf215546Sopenharmony_ci
2265bf215546Sopenharmony_ci				print_indent(o, 67);
2266bf215546Sopenharmony_ci
2267bf215546Sopenharmony_ci				fprintf(stderr, " ES:%X ", cf->output.elem_size);
2268bf215546Sopenharmony_ci				if (cf->mark)
2269bf215546Sopenharmony_ci					fprintf(stderr, "MARK ");
2270bf215546Sopenharmony_ci				if (!cf->barrier)
2271bf215546Sopenharmony_ci					fprintf(stderr, "NO_BARRIER ");
2272bf215546Sopenharmony_ci				if (cf->end_of_program)
2273bf215546Sopenharmony_ci					fprintf(stderr, "EOP ");
2274bf215546Sopenharmony_ci				fprintf(stderr, "\n");
2275bf215546Sopenharmony_ci			} else if (r600_isa_cf(cf->op)->flags & CF_MEM) {
2276bf215546Sopenharmony_ci				int o = 0;
2277bf215546Sopenharmony_ci				const char *exp_type[] = {"WRITE", "WRITE_IND", "WRITE_ACK",
2278bf215546Sopenharmony_ci						"WRITE_IND_ACK"};
2279bf215546Sopenharmony_ci				o += fprintf(stderr, "%04d %08X %08X  %s ", id,
2280bf215546Sopenharmony_ci						bc->bytecode[id], bc->bytecode[id + 1], cfop->name);
2281bf215546Sopenharmony_ci				o += print_indent(o, 43);
2282bf215546Sopenharmony_ci				o += fprintf(stderr, "%s ", exp_type[cf->output.type]);
2283bf215546Sopenharmony_ci
2284bf215546Sopenharmony_ci				if (r600_isa_cf(cf->op)->flags & CF_RAT) {
2285bf215546Sopenharmony_ci					o += fprintf(stderr, "RAT%d", cf->rat.id);
2286bf215546Sopenharmony_ci					if (cf->rat.index_mode) {
2287bf215546Sopenharmony_ci						o += fprintf(stderr, "[IDX%d]", cf->rat.index_mode - 1);
2288bf215546Sopenharmony_ci					}
2289bf215546Sopenharmony_ci					o += fprintf(stderr, " INST: %d ", cf->rat.inst);
2290bf215546Sopenharmony_ci				}
2291bf215546Sopenharmony_ci
2292bf215546Sopenharmony_ci				if (cf->output.burst_count > 1) {
2293bf215546Sopenharmony_ci					o += fprintf(stderr, "%d-%d ", cf->output.array_base,
2294bf215546Sopenharmony_ci							cf->output.array_base + cf->output.burst_count - 1);
2295bf215546Sopenharmony_ci					o += print_indent(o, 55);
2296bf215546Sopenharmony_ci					o += fprintf(stderr, "R%d-%d.", cf->output.gpr,
2297bf215546Sopenharmony_ci							cf->output.gpr + cf->output.burst_count - 1);
2298bf215546Sopenharmony_ci				} else {
2299bf215546Sopenharmony_ci					o += fprintf(stderr, "%d ", cf->output.array_base);
2300bf215546Sopenharmony_ci					o += print_indent(o, 55);
2301bf215546Sopenharmony_ci					o += fprintf(stderr, "R%d.", cf->output.gpr);
2302bf215546Sopenharmony_ci				}
2303bf215546Sopenharmony_ci				for (i = 0; i < 4; ++i) {
2304bf215546Sopenharmony_ci					if (cf->output.comp_mask & (1 << i))
2305bf215546Sopenharmony_ci						o += print_swizzle(i);
2306bf215546Sopenharmony_ci					else
2307bf215546Sopenharmony_ci						o += print_swizzle(7);
2308bf215546Sopenharmony_ci				}
2309bf215546Sopenharmony_ci
2310bf215546Sopenharmony_ci				if (cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE_IND ||
2311bf215546Sopenharmony_ci				    cf->output.type == V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_READ_IND)
2312bf215546Sopenharmony_ci					o += fprintf(stderr, " R%d", cf->output.index_gpr);
2313bf215546Sopenharmony_ci
2314bf215546Sopenharmony_ci				o += print_indent(o, 67);
2315bf215546Sopenharmony_ci
2316bf215546Sopenharmony_ci				fprintf(stderr, " ES:%i ", cf->output.elem_size);
2317bf215546Sopenharmony_ci				if (cf->output.array_size != 0xFFF)
2318bf215546Sopenharmony_ci					fprintf(stderr, "AS:%i ", cf->output.array_size);
2319bf215546Sopenharmony_ci				if (cf->mark)
2320bf215546Sopenharmony_ci					fprintf(stderr, "MARK ");
2321bf215546Sopenharmony_ci				if (!cf->barrier)
2322bf215546Sopenharmony_ci					fprintf(stderr, "NO_BARRIER ");
2323bf215546Sopenharmony_ci				if (cf->end_of_program)
2324bf215546Sopenharmony_ci					fprintf(stderr, "EOP ");
2325bf215546Sopenharmony_ci
2326bf215546Sopenharmony_ci				if (cf->output.mark)
2327bf215546Sopenharmony_ci					fprintf(stderr, "MARK ");
2328bf215546Sopenharmony_ci
2329bf215546Sopenharmony_ci				fprintf(stderr, "\n");
2330bf215546Sopenharmony_ci			} else {
2331bf215546Sopenharmony_ci				fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
2332bf215546Sopenharmony_ci						bc->bytecode[id + 1], cfop->name);
2333bf215546Sopenharmony_ci				fprintf(stderr, "@%d ", cf->cf_addr);
2334bf215546Sopenharmony_ci				if (cf->cond)
2335bf215546Sopenharmony_ci					fprintf(stderr, "CND:%X ", cf->cond);
2336bf215546Sopenharmony_ci				if (cf->pop_count)
2337bf215546Sopenharmony_ci					fprintf(stderr, "POP:%X ", cf->pop_count);
2338bf215546Sopenharmony_ci				if (cf->count && (cfop->flags & CF_EMIT))
2339bf215546Sopenharmony_ci					fprintf(stderr, "STREAM%d ", cf->count);
2340bf215546Sopenharmony_ci				if (cf->vpm)
2341bf215546Sopenharmony_ci					fprintf(stderr, "VPM ");
2342bf215546Sopenharmony_ci				if (cf->end_of_program)
2343bf215546Sopenharmony_ci					fprintf(stderr, "EOP ");
2344bf215546Sopenharmony_ci				fprintf(stderr, "\n");
2345bf215546Sopenharmony_ci			}
2346bf215546Sopenharmony_ci		}
2347bf215546Sopenharmony_ci
2348bf215546Sopenharmony_ci		id = cf->addr;
2349bf215546Sopenharmony_ci		nliteral = 0;
2350bf215546Sopenharmony_ci		last = 1;
2351bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY(alu, &cf->alu, list) {
2352bf215546Sopenharmony_ci			const char *omod_str[] = {"","*2","*4","/2"};
2353bf215546Sopenharmony_ci			const struct alu_op_info *aop = r600_isa_alu(alu->op);
2354bf215546Sopenharmony_ci			int o = 0;
2355bf215546Sopenharmony_ci
2356bf215546Sopenharmony_ci			r600_bytecode_alu_nliterals(alu, literal, &nliteral);
2357bf215546Sopenharmony_ci			o += fprintf(stderr, " %04d %08X %08X  ", id, bc->bytecode[id], bc->bytecode[id+1]);
2358bf215546Sopenharmony_ci			if (last)
2359bf215546Sopenharmony_ci				o += fprintf(stderr, "%4d ", ++ngr);
2360bf215546Sopenharmony_ci			else
2361bf215546Sopenharmony_ci				o += fprintf(stderr, "     ");
2362bf215546Sopenharmony_ci			o += fprintf(stderr, "%c%c %c ", alu->execute_mask ? 'M':' ',
2363bf215546Sopenharmony_ci					alu->update_pred ? 'P':' ',
2364bf215546Sopenharmony_ci					alu->pred_sel ? alu->pred_sel==2 ? '0':'1':' ');
2365bf215546Sopenharmony_ci
2366bf215546Sopenharmony_ci			o += fprintf(stderr, "%s%s%s ", aop->name,
2367bf215546Sopenharmony_ci					omod_str[alu->omod], alu->dst.clamp ? "_sat":"");
2368bf215546Sopenharmony_ci
2369bf215546Sopenharmony_ci			o += print_indent(o,60);
2370bf215546Sopenharmony_ci			o += print_dst(alu);
2371bf215546Sopenharmony_ci			for (i = 0; i < aop->src_count; ++i) {
2372bf215546Sopenharmony_ci				o += fprintf(stderr, i == 0 ? ",  ": ", ");
2373bf215546Sopenharmony_ci				o += print_src(alu, i);
2374bf215546Sopenharmony_ci			}
2375bf215546Sopenharmony_ci
2376bf215546Sopenharmony_ci			if (alu->bank_swizzle) {
2377bf215546Sopenharmony_ci				o += print_indent(o,75);
2378bf215546Sopenharmony_ci				o += fprintf(stderr, "  BS:%d", alu->bank_swizzle);
2379bf215546Sopenharmony_ci			}
2380bf215546Sopenharmony_ci
2381bf215546Sopenharmony_ci			fprintf(stderr, "\n");
2382bf215546Sopenharmony_ci			id += 2;
2383bf215546Sopenharmony_ci
2384bf215546Sopenharmony_ci			if (alu->last) {
2385bf215546Sopenharmony_ci				for (i = 0; i < nliteral; i++, id++) {
2386bf215546Sopenharmony_ci					float *f = (float*)(bc->bytecode + id);
2387bf215546Sopenharmony_ci					o = fprintf(stderr, " %04d %08X", id, bc->bytecode[id]);
2388bf215546Sopenharmony_ci					print_indent(o, 60);
2389bf215546Sopenharmony_ci					fprintf(stderr, " %f (%d)\n", *f, *(bc->bytecode + id));
2390bf215546Sopenharmony_ci				}
2391bf215546Sopenharmony_ci				id += nliteral & 1;
2392bf215546Sopenharmony_ci				nliteral = 0;
2393bf215546Sopenharmony_ci			}
2394bf215546Sopenharmony_ci			last = alu->last;
2395bf215546Sopenharmony_ci		}
2396bf215546Sopenharmony_ci
2397bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY(tex, &cf->tex, list) {
2398bf215546Sopenharmony_ci			int o = 0;
2399bf215546Sopenharmony_ci			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2400bf215546Sopenharmony_ci					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2401bf215546Sopenharmony_ci
2402bf215546Sopenharmony_ci			o += fprintf(stderr, "%s ", r600_isa_fetch(tex->op)->name);
2403bf215546Sopenharmony_ci
2404bf215546Sopenharmony_ci			o += print_indent(o, 50);
2405bf215546Sopenharmony_ci
2406bf215546Sopenharmony_ci			o += fprintf(stderr, "R%d.", tex->dst_gpr);
2407bf215546Sopenharmony_ci			o += print_swizzle(tex->dst_sel_x);
2408bf215546Sopenharmony_ci			o += print_swizzle(tex->dst_sel_y);
2409bf215546Sopenharmony_ci			o += print_swizzle(tex->dst_sel_z);
2410bf215546Sopenharmony_ci			o += print_swizzle(tex->dst_sel_w);
2411bf215546Sopenharmony_ci
2412bf215546Sopenharmony_ci			o += fprintf(stderr, ", R%d.", tex->src_gpr);
2413bf215546Sopenharmony_ci			o += print_swizzle(tex->src_sel_x);
2414bf215546Sopenharmony_ci			o += print_swizzle(tex->src_sel_y);
2415bf215546Sopenharmony_ci			o += print_swizzle(tex->src_sel_z);
2416bf215546Sopenharmony_ci			o += print_swizzle(tex->src_sel_w);
2417bf215546Sopenharmony_ci
2418bf215546Sopenharmony_ci			o += fprintf(stderr, ",  RID:%d", tex->resource_id);
2419bf215546Sopenharmony_ci			o += fprintf(stderr, ", SID:%d  ", tex->sampler_id);
2420bf215546Sopenharmony_ci
2421bf215546Sopenharmony_ci			if (tex->sampler_index_mode)
2422bf215546Sopenharmony_ci				fprintf(stderr, "SQ_%s ", index_mode[tex->sampler_index_mode]);
2423bf215546Sopenharmony_ci
2424bf215546Sopenharmony_ci			if (tex->lod_bias)
2425bf215546Sopenharmony_ci				fprintf(stderr, "LB:%d ", tex->lod_bias);
2426bf215546Sopenharmony_ci
2427bf215546Sopenharmony_ci			fprintf(stderr, "CT:%c%c%c%c ",
2428bf215546Sopenharmony_ci					tex->coord_type_x ? 'N' : 'U',
2429bf215546Sopenharmony_ci					tex->coord_type_y ? 'N' : 'U',
2430bf215546Sopenharmony_ci					tex->coord_type_z ? 'N' : 'U',
2431bf215546Sopenharmony_ci					tex->coord_type_w ? 'N' : 'U');
2432bf215546Sopenharmony_ci
2433bf215546Sopenharmony_ci			if (tex->offset_x)
2434bf215546Sopenharmony_ci				fprintf(stderr, "OX:%d ", tex->offset_x);
2435bf215546Sopenharmony_ci			if (tex->offset_y)
2436bf215546Sopenharmony_ci				fprintf(stderr, "OY:%d ", tex->offset_y);
2437bf215546Sopenharmony_ci			if (tex->offset_z)
2438bf215546Sopenharmony_ci				fprintf(stderr, "OZ:%d ", tex->offset_z);
2439bf215546Sopenharmony_ci
2440bf215546Sopenharmony_ci			id += 4;
2441bf215546Sopenharmony_ci			fprintf(stderr, "\n");
2442bf215546Sopenharmony_ci		}
2443bf215546Sopenharmony_ci
2444bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY(vtx, &cf->vtx, list) {
2445bf215546Sopenharmony_ci			int o = 0;
2446bf215546Sopenharmony_ci			const char * fetch_type[] = {"VERTEX", "INSTANCE", ""};
2447bf215546Sopenharmony_ci			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2448bf215546Sopenharmony_ci					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2449bf215546Sopenharmony_ci
2450bf215546Sopenharmony_ci			o += fprintf(stderr, "%s ", r600_isa_fetch(vtx->op)->name);
2451bf215546Sopenharmony_ci
2452bf215546Sopenharmony_ci			o += print_indent(o, 50);
2453bf215546Sopenharmony_ci
2454bf215546Sopenharmony_ci			o += fprintf(stderr, "R%d.", vtx->dst_gpr);
2455bf215546Sopenharmony_ci			o += print_swizzle(vtx->dst_sel_x);
2456bf215546Sopenharmony_ci			o += print_swizzle(vtx->dst_sel_y);
2457bf215546Sopenharmony_ci			o += print_swizzle(vtx->dst_sel_z);
2458bf215546Sopenharmony_ci			o += print_swizzle(vtx->dst_sel_w);
2459bf215546Sopenharmony_ci
2460bf215546Sopenharmony_ci			o += fprintf(stderr, ", R%d.", vtx->src_gpr);
2461bf215546Sopenharmony_ci			o += print_swizzle(vtx->src_sel_x);
2462bf215546Sopenharmony_ci			if (r600_isa_fetch(vtx->op)->flags & FF_MEM)
2463bf215546Sopenharmony_ci				o += print_swizzle(vtx->src_sel_y);
2464bf215546Sopenharmony_ci
2465bf215546Sopenharmony_ci			if (vtx->offset)
2466bf215546Sopenharmony_ci				fprintf(stderr, " +%db", vtx->offset);
2467bf215546Sopenharmony_ci
2468bf215546Sopenharmony_ci			o += print_indent(o, 55);
2469bf215546Sopenharmony_ci
2470bf215546Sopenharmony_ci			fprintf(stderr, ",  RID:%d ", vtx->buffer_id);
2471bf215546Sopenharmony_ci
2472bf215546Sopenharmony_ci			fprintf(stderr, "%s ", fetch_type[vtx->fetch_type]);
2473bf215546Sopenharmony_ci
2474bf215546Sopenharmony_ci			if (bc->gfx_level < CAYMAN && vtx->mega_fetch_count)
2475bf215546Sopenharmony_ci				fprintf(stderr, "MFC:%d ", vtx->mega_fetch_count);
2476bf215546Sopenharmony_ci
2477bf215546Sopenharmony_ci			if (bc->gfx_level >= EVERGREEN && vtx->buffer_index_mode)
2478bf215546Sopenharmony_ci				fprintf(stderr, "SQ_%s ", index_mode[vtx->buffer_index_mode]);
2479bf215546Sopenharmony_ci
2480bf215546Sopenharmony_ci			if (r600_isa_fetch(vtx->op)->flags & FF_MEM) {
2481bf215546Sopenharmony_ci				if (vtx->uncached)
2482bf215546Sopenharmony_ci					fprintf(stderr, "UNCACHED ");
2483bf215546Sopenharmony_ci				if (vtx->indexed)
2484bf215546Sopenharmony_ci					fprintf(stderr, "INDEXED:%d ", vtx->indexed);
2485bf215546Sopenharmony_ci
2486bf215546Sopenharmony_ci				fprintf(stderr, "ELEM_SIZE:%d ", vtx->elem_size);
2487bf215546Sopenharmony_ci				if (vtx->burst_count)
2488bf215546Sopenharmony_ci					fprintf(stderr, "BURST_COUNT:%d ", vtx->burst_count);
2489bf215546Sopenharmony_ci				fprintf(stderr, "ARRAY_BASE:%d ", vtx->array_base);
2490bf215546Sopenharmony_ci				fprintf(stderr, "ARRAY_SIZE:%d ", vtx->array_size);
2491bf215546Sopenharmony_ci			}
2492bf215546Sopenharmony_ci
2493bf215546Sopenharmony_ci			fprintf(stderr, "UCF:%d ", vtx->use_const_fields);
2494bf215546Sopenharmony_ci			fprintf(stderr, "FMT(DTA:%d ", vtx->data_format);
2495bf215546Sopenharmony_ci			fprintf(stderr, "NUM:%d ", vtx->num_format_all);
2496bf215546Sopenharmony_ci			fprintf(stderr, "COMP:%d ", vtx->format_comp_all);
2497bf215546Sopenharmony_ci			fprintf(stderr, "MODE:%d)\n", vtx->srf_mode_all);
2498bf215546Sopenharmony_ci
2499bf215546Sopenharmony_ci			id += 4;
2500bf215546Sopenharmony_ci		}
2501bf215546Sopenharmony_ci
2502bf215546Sopenharmony_ci		LIST_FOR_EACH_ENTRY(gds, &cf->gds, list) {
2503bf215546Sopenharmony_ci			int o = 0;
2504bf215546Sopenharmony_ci			o += fprintf(stderr, " %04d %08X %08X %08X   ", id, bc->bytecode[id],
2505bf215546Sopenharmony_ci					bc->bytecode[id + 1], bc->bytecode[id + 2]);
2506bf215546Sopenharmony_ci
2507bf215546Sopenharmony_ci			o += fprintf(stderr, "%s ", r600_isa_fetch(gds->op)->name);
2508bf215546Sopenharmony_ci
2509bf215546Sopenharmony_ci			if (gds->op != FETCH_OP_TF_WRITE) {
2510bf215546Sopenharmony_ci				o += fprintf(stderr, "R%d.", gds->dst_gpr);
2511bf215546Sopenharmony_ci				o += print_swizzle(gds->dst_sel_x);
2512bf215546Sopenharmony_ci				o += print_swizzle(gds->dst_sel_y);
2513bf215546Sopenharmony_ci				o += print_swizzle(gds->dst_sel_z);
2514bf215546Sopenharmony_ci				o += print_swizzle(gds->dst_sel_w);
2515bf215546Sopenharmony_ci			}
2516bf215546Sopenharmony_ci
2517bf215546Sopenharmony_ci			o += fprintf(stderr, ", R%d.", gds->src_gpr);
2518bf215546Sopenharmony_ci			o += print_swizzle(gds->src_sel_x);
2519bf215546Sopenharmony_ci			o += print_swizzle(gds->src_sel_y);
2520bf215546Sopenharmony_ci			o += print_swizzle(gds->src_sel_z);
2521bf215546Sopenharmony_ci
2522bf215546Sopenharmony_ci			if (gds->op != FETCH_OP_TF_WRITE) {
2523bf215546Sopenharmony_ci				o += fprintf(stderr, ", R%d.", gds->src_gpr2);
2524bf215546Sopenharmony_ci			}
2525bf215546Sopenharmony_ci			if (gds->alloc_consume) {
2526bf215546Sopenharmony_ci				o += fprintf(stderr, " UAV: %d", gds->uav_id);
2527bf215546Sopenharmony_ci				if (gds->uav_index_mode)
2528bf215546Sopenharmony_ci					o += fprintf(stderr, "[%s]", index_mode[gds->uav_index_mode]);
2529bf215546Sopenharmony_ci			}
2530bf215546Sopenharmony_ci			fprintf(stderr, "\n");
2531bf215546Sopenharmony_ci			id += 4;
2532bf215546Sopenharmony_ci		}
2533bf215546Sopenharmony_ci	}
2534bf215546Sopenharmony_ci
2535bf215546Sopenharmony_ci	fprintf(stderr, "--------------------------------------\n");
2536bf215546Sopenharmony_ci}
2537bf215546Sopenharmony_ci
2538bf215546Sopenharmony_civoid r600_vertex_data_type(enum pipe_format pformat,
2539bf215546Sopenharmony_ci				  unsigned *format,
2540bf215546Sopenharmony_ci				  unsigned *num_format, unsigned *format_comp, unsigned *endian)
2541bf215546Sopenharmony_ci{
2542bf215546Sopenharmony_ci	const struct util_format_description *desc;
2543bf215546Sopenharmony_ci	unsigned i;
2544bf215546Sopenharmony_ci
2545bf215546Sopenharmony_ci	*format = 0;
2546bf215546Sopenharmony_ci	*num_format = 0;
2547bf215546Sopenharmony_ci	*format_comp = 0;
2548bf215546Sopenharmony_ci	*endian = ENDIAN_NONE;
2549bf215546Sopenharmony_ci
2550bf215546Sopenharmony_ci	if (pformat == PIPE_FORMAT_R11G11B10_FLOAT) {
2551bf215546Sopenharmony_ci		*format = FMT_10_11_11_FLOAT;
2552bf215546Sopenharmony_ci		*endian = r600_endian_swap(32);
2553bf215546Sopenharmony_ci		return;
2554bf215546Sopenharmony_ci	}
2555bf215546Sopenharmony_ci
2556bf215546Sopenharmony_ci	if (pformat == PIPE_FORMAT_B5G6R5_UNORM) {
2557bf215546Sopenharmony_ci		*format = FMT_5_6_5;
2558bf215546Sopenharmony_ci		*endian = r600_endian_swap(16);
2559bf215546Sopenharmony_ci		return;
2560bf215546Sopenharmony_ci	}
2561bf215546Sopenharmony_ci
2562bf215546Sopenharmony_ci	if (pformat == PIPE_FORMAT_B5G5R5A1_UNORM) {
2563bf215546Sopenharmony_ci		*format = FMT_1_5_5_5;
2564bf215546Sopenharmony_ci		*endian = r600_endian_swap(16);
2565bf215546Sopenharmony_ci		return;
2566bf215546Sopenharmony_ci	}
2567bf215546Sopenharmony_ci
2568bf215546Sopenharmony_ci	if (pformat == PIPE_FORMAT_A1B5G5R5_UNORM) {
2569bf215546Sopenharmony_ci		*format = FMT_5_5_5_1;
2570bf215546Sopenharmony_ci		return;
2571bf215546Sopenharmony_ci	}
2572bf215546Sopenharmony_ci
2573bf215546Sopenharmony_ci	desc = util_format_description(pformat);
2574bf215546Sopenharmony_ci	if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) {
2575bf215546Sopenharmony_ci		goto out_unknown;
2576bf215546Sopenharmony_ci	}
2577bf215546Sopenharmony_ci
2578bf215546Sopenharmony_ci	/* Find the first non-VOID channel. */
2579bf215546Sopenharmony_ci	for (i = 0; i < 4; i++) {
2580bf215546Sopenharmony_ci		if (desc->channel[i].type != UTIL_FORMAT_TYPE_VOID) {
2581bf215546Sopenharmony_ci			break;
2582bf215546Sopenharmony_ci		}
2583bf215546Sopenharmony_ci	}
2584bf215546Sopenharmony_ci
2585bf215546Sopenharmony_ci	*endian = r600_endian_swap(desc->channel[i].size);
2586bf215546Sopenharmony_ci
2587bf215546Sopenharmony_ci	switch (desc->channel[i].type) {
2588bf215546Sopenharmony_ci	/* Half-floats, floats, ints */
2589bf215546Sopenharmony_ci	case UTIL_FORMAT_TYPE_FLOAT:
2590bf215546Sopenharmony_ci		switch (desc->channel[i].size) {
2591bf215546Sopenharmony_ci		case 16:
2592bf215546Sopenharmony_ci			switch (desc->nr_channels) {
2593bf215546Sopenharmony_ci			case 1:
2594bf215546Sopenharmony_ci				*format = FMT_16_FLOAT;
2595bf215546Sopenharmony_ci				break;
2596bf215546Sopenharmony_ci			case 2:
2597bf215546Sopenharmony_ci				*format = FMT_16_16_FLOAT;
2598bf215546Sopenharmony_ci				break;
2599bf215546Sopenharmony_ci			case 3:
2600bf215546Sopenharmony_ci			case 4:
2601bf215546Sopenharmony_ci				*format = FMT_16_16_16_16_FLOAT;
2602bf215546Sopenharmony_ci				break;
2603bf215546Sopenharmony_ci			}
2604bf215546Sopenharmony_ci			break;
2605bf215546Sopenharmony_ci		case 32:
2606bf215546Sopenharmony_ci			switch (desc->nr_channels) {
2607bf215546Sopenharmony_ci			case 1:
2608bf215546Sopenharmony_ci				*format = FMT_32_FLOAT;
2609bf215546Sopenharmony_ci				break;
2610bf215546Sopenharmony_ci			case 2:
2611bf215546Sopenharmony_ci				*format = FMT_32_32_FLOAT;
2612bf215546Sopenharmony_ci				break;
2613bf215546Sopenharmony_ci			case 3:
2614bf215546Sopenharmony_ci				*format = FMT_32_32_32_FLOAT;
2615bf215546Sopenharmony_ci				break;
2616bf215546Sopenharmony_ci			case 4:
2617bf215546Sopenharmony_ci				*format = FMT_32_32_32_32_FLOAT;
2618bf215546Sopenharmony_ci				break;
2619bf215546Sopenharmony_ci			}
2620bf215546Sopenharmony_ci			break;
2621bf215546Sopenharmony_ci		default:
2622bf215546Sopenharmony_ci			goto out_unknown;
2623bf215546Sopenharmony_ci		}
2624bf215546Sopenharmony_ci		break;
2625bf215546Sopenharmony_ci		/* Unsigned ints */
2626bf215546Sopenharmony_ci	case UTIL_FORMAT_TYPE_UNSIGNED:
2627bf215546Sopenharmony_ci		/* Signed ints */
2628bf215546Sopenharmony_ci	case UTIL_FORMAT_TYPE_SIGNED:
2629bf215546Sopenharmony_ci		switch (desc->channel[i].size) {
2630bf215546Sopenharmony_ci		case 4:
2631bf215546Sopenharmony_ci			switch (desc->nr_channels) {
2632bf215546Sopenharmony_ci			case 2:
2633bf215546Sopenharmony_ci				*format = FMT_4_4;
2634bf215546Sopenharmony_ci				break;
2635bf215546Sopenharmony_ci			case 4:
2636bf215546Sopenharmony_ci				*format = FMT_4_4_4_4;
2637bf215546Sopenharmony_ci				break;
2638bf215546Sopenharmony_ci			}
2639bf215546Sopenharmony_ci			break;
2640bf215546Sopenharmony_ci		case 8:
2641bf215546Sopenharmony_ci			switch (desc->nr_channels) {
2642bf215546Sopenharmony_ci			case 1:
2643bf215546Sopenharmony_ci				*format = FMT_8;
2644bf215546Sopenharmony_ci				break;
2645bf215546Sopenharmony_ci			case 2:
2646bf215546Sopenharmony_ci				*format = FMT_8_8;
2647bf215546Sopenharmony_ci				break;
2648bf215546Sopenharmony_ci			case 3:
2649bf215546Sopenharmony_ci			case 4:
2650bf215546Sopenharmony_ci				*format = FMT_8_8_8_8;
2651bf215546Sopenharmony_ci				break;
2652bf215546Sopenharmony_ci			}
2653bf215546Sopenharmony_ci			break;
2654bf215546Sopenharmony_ci		case 10:
2655bf215546Sopenharmony_ci			if (desc->nr_channels != 4)
2656bf215546Sopenharmony_ci				goto out_unknown;
2657bf215546Sopenharmony_ci
2658bf215546Sopenharmony_ci			*format = FMT_2_10_10_10;
2659bf215546Sopenharmony_ci			break;
2660bf215546Sopenharmony_ci		case 16:
2661bf215546Sopenharmony_ci			switch (desc->nr_channels) {
2662bf215546Sopenharmony_ci			case 1:
2663bf215546Sopenharmony_ci				*format = FMT_16;
2664bf215546Sopenharmony_ci				break;
2665bf215546Sopenharmony_ci			case 2:
2666bf215546Sopenharmony_ci				*format = FMT_16_16;
2667bf215546Sopenharmony_ci				break;
2668bf215546Sopenharmony_ci			case 3:
2669bf215546Sopenharmony_ci			case 4:
2670bf215546Sopenharmony_ci				*format = FMT_16_16_16_16;
2671bf215546Sopenharmony_ci				break;
2672bf215546Sopenharmony_ci			}
2673bf215546Sopenharmony_ci			break;
2674bf215546Sopenharmony_ci		case 32:
2675bf215546Sopenharmony_ci			switch (desc->nr_channels) {
2676bf215546Sopenharmony_ci			case 1:
2677bf215546Sopenharmony_ci				*format = FMT_32;
2678bf215546Sopenharmony_ci				break;
2679bf215546Sopenharmony_ci			case 2:
2680bf215546Sopenharmony_ci				*format = FMT_32_32;
2681bf215546Sopenharmony_ci				break;
2682bf215546Sopenharmony_ci			case 3:
2683bf215546Sopenharmony_ci				*format = FMT_32_32_32;
2684bf215546Sopenharmony_ci				break;
2685bf215546Sopenharmony_ci			case 4:
2686bf215546Sopenharmony_ci				*format = FMT_32_32_32_32;
2687bf215546Sopenharmony_ci				break;
2688bf215546Sopenharmony_ci			}
2689bf215546Sopenharmony_ci			break;
2690bf215546Sopenharmony_ci		default:
2691bf215546Sopenharmony_ci			goto out_unknown;
2692bf215546Sopenharmony_ci		}
2693bf215546Sopenharmony_ci		break;
2694bf215546Sopenharmony_ci	default:
2695bf215546Sopenharmony_ci		goto out_unknown;
2696bf215546Sopenharmony_ci	}
2697bf215546Sopenharmony_ci
2698bf215546Sopenharmony_ci	if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2699bf215546Sopenharmony_ci		*format_comp = 1;
2700bf215546Sopenharmony_ci	}
2701bf215546Sopenharmony_ci
2702bf215546Sopenharmony_ci	*num_format = 0;
2703bf215546Sopenharmony_ci	if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED ||
2704bf215546Sopenharmony_ci	    desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
2705bf215546Sopenharmony_ci		if (!desc->channel[i].normalized) {
2706bf215546Sopenharmony_ci			if (desc->channel[i].pure_integer)
2707bf215546Sopenharmony_ci				*num_format = 1;
2708bf215546Sopenharmony_ci			else
2709bf215546Sopenharmony_ci				*num_format = 2;
2710bf215546Sopenharmony_ci		}
2711bf215546Sopenharmony_ci	}
2712bf215546Sopenharmony_ci	return;
2713bf215546Sopenharmony_ciout_unknown:
2714bf215546Sopenharmony_ci	R600_ERR("unsupported vertex format %s\n", util_format_name(pformat));
2715bf215546Sopenharmony_ci}
2716bf215546Sopenharmony_ci
2717bf215546Sopenharmony_civoid *r600_create_vertex_fetch_shader(struct pipe_context *ctx,
2718bf215546Sopenharmony_ci				      unsigned count,
2719bf215546Sopenharmony_ci				      const struct pipe_vertex_element *elements)
2720bf215546Sopenharmony_ci{
2721bf215546Sopenharmony_ci	struct r600_context *rctx = (struct r600_context *)ctx;
2722bf215546Sopenharmony_ci	struct r600_bytecode bc;
2723bf215546Sopenharmony_ci	struct r600_bytecode_vtx vtx;
2724bf215546Sopenharmony_ci	const struct util_format_description *desc;
2725bf215546Sopenharmony_ci	unsigned fetch_resource_start = rctx->b.gfx_level >= EVERGREEN ? 0 : 160;
2726bf215546Sopenharmony_ci	unsigned format, num_format, format_comp, endian;
2727bf215546Sopenharmony_ci	uint32_t *bytecode;
2728bf215546Sopenharmony_ci	int i, j, r, fs_size;
2729bf215546Sopenharmony_ci	struct r600_fetch_shader *shader;
2730bf215546Sopenharmony_ci	unsigned no_sb = rctx->screen->b.debug_flags & DBG_NO_SB ||
2731bf215546Sopenharmony_ci                         (rctx->screen->b.debug_flags & DBG_NIR);
2732bf215546Sopenharmony_ci	unsigned sb_disasm = !no_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
2733bf215546Sopenharmony_ci
2734bf215546Sopenharmony_ci	assert(count < 32);
2735bf215546Sopenharmony_ci
2736bf215546Sopenharmony_ci	memset(&bc, 0, sizeof(bc));
2737bf215546Sopenharmony_ci	r600_bytecode_init(&bc, rctx->b.gfx_level, rctx->b.family,
2738bf215546Sopenharmony_ci			   rctx->screen->has_compressed_msaa_texturing);
2739bf215546Sopenharmony_ci
2740bf215546Sopenharmony_ci	bc.isa = rctx->isa;
2741bf215546Sopenharmony_ci
2742bf215546Sopenharmony_ci	for (i = 0; i < count; i++) {
2743bf215546Sopenharmony_ci		if (elements[i].instance_divisor > 1) {
2744bf215546Sopenharmony_ci			if (rctx->b.gfx_level == CAYMAN) {
2745bf215546Sopenharmony_ci				for (j = 0; j < 4; j++) {
2746bf215546Sopenharmony_ci					struct r600_bytecode_alu alu;
2747bf215546Sopenharmony_ci					memset(&alu, 0, sizeof(alu));
2748bf215546Sopenharmony_ci					alu.op = ALU_OP2_MULHI_UINT;
2749bf215546Sopenharmony_ci					alu.src[0].sel = 0;
2750bf215546Sopenharmony_ci					alu.src[0].chan = 3;
2751bf215546Sopenharmony_ci					alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2752bf215546Sopenharmony_ci					alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
2753bf215546Sopenharmony_ci					alu.dst.sel = i + 1;
2754bf215546Sopenharmony_ci					alu.dst.chan = j;
2755bf215546Sopenharmony_ci					alu.dst.write = j == 3;
2756bf215546Sopenharmony_ci					alu.last = j == 3;
2757bf215546Sopenharmony_ci					if ((r = r600_bytecode_add_alu(&bc, &alu))) {
2758bf215546Sopenharmony_ci						r600_bytecode_clear(&bc);
2759bf215546Sopenharmony_ci						return NULL;
2760bf215546Sopenharmony_ci					}
2761bf215546Sopenharmony_ci				}
2762bf215546Sopenharmony_ci			} else {
2763bf215546Sopenharmony_ci				struct r600_bytecode_alu alu;
2764bf215546Sopenharmony_ci				memset(&alu, 0, sizeof(alu));
2765bf215546Sopenharmony_ci				alu.op = ALU_OP2_MULHI_UINT;
2766bf215546Sopenharmony_ci				alu.src[0].sel = 0;
2767bf215546Sopenharmony_ci				alu.src[0].chan = 3;
2768bf215546Sopenharmony_ci				alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
2769bf215546Sopenharmony_ci				alu.src[1].value = (1ll << 32) / elements[i].instance_divisor + 1;
2770bf215546Sopenharmony_ci				alu.dst.sel = i + 1;
2771bf215546Sopenharmony_ci				alu.dst.chan = 3;
2772bf215546Sopenharmony_ci				alu.dst.write = 1;
2773bf215546Sopenharmony_ci				alu.last = 1;
2774bf215546Sopenharmony_ci				if ((r = r600_bytecode_add_alu(&bc, &alu))) {
2775bf215546Sopenharmony_ci					r600_bytecode_clear(&bc);
2776bf215546Sopenharmony_ci					return NULL;
2777bf215546Sopenharmony_ci				}
2778bf215546Sopenharmony_ci			}
2779bf215546Sopenharmony_ci		}
2780bf215546Sopenharmony_ci	}
2781bf215546Sopenharmony_ci
2782bf215546Sopenharmony_ci	for (i = 0; i < count; i++) {
2783bf215546Sopenharmony_ci		r600_vertex_data_type(elements[i].src_format,
2784bf215546Sopenharmony_ci				      &format, &num_format, &format_comp, &endian);
2785bf215546Sopenharmony_ci
2786bf215546Sopenharmony_ci		desc = util_format_description(elements[i].src_format);
2787bf215546Sopenharmony_ci
2788bf215546Sopenharmony_ci		if (elements[i].src_offset > 65535) {
2789bf215546Sopenharmony_ci			r600_bytecode_clear(&bc);
2790bf215546Sopenharmony_ci			R600_ERR("too big src_offset: %u\n", elements[i].src_offset);
2791bf215546Sopenharmony_ci			return NULL;
2792bf215546Sopenharmony_ci		}
2793bf215546Sopenharmony_ci
2794bf215546Sopenharmony_ci		memset(&vtx, 0, sizeof(vtx));
2795bf215546Sopenharmony_ci		vtx.buffer_id = elements[i].vertex_buffer_index + fetch_resource_start;
2796bf215546Sopenharmony_ci		vtx.fetch_type = elements[i].instance_divisor ? SQ_VTX_FETCH_INSTANCE_DATA : SQ_VTX_FETCH_VERTEX_DATA;
2797bf215546Sopenharmony_ci		vtx.src_gpr = elements[i].instance_divisor > 1 ? i + 1 : 0;
2798bf215546Sopenharmony_ci		vtx.src_sel_x = elements[i].instance_divisor ? 3 : 0;
2799bf215546Sopenharmony_ci		vtx.mega_fetch_count = 0x1F;
2800bf215546Sopenharmony_ci		vtx.dst_gpr = i + 1;
2801bf215546Sopenharmony_ci		vtx.dst_sel_x = desc->swizzle[0];
2802bf215546Sopenharmony_ci		vtx.dst_sel_y = desc->swizzle[1];
2803bf215546Sopenharmony_ci		vtx.dst_sel_z = desc->swizzle[2];
2804bf215546Sopenharmony_ci		vtx.dst_sel_w = desc->swizzle[3];
2805bf215546Sopenharmony_ci		vtx.data_format = format;
2806bf215546Sopenharmony_ci		vtx.num_format_all = num_format;
2807bf215546Sopenharmony_ci		vtx.format_comp_all = format_comp;
2808bf215546Sopenharmony_ci		vtx.offset = elements[i].src_offset;
2809bf215546Sopenharmony_ci		vtx.endian = endian;
2810bf215546Sopenharmony_ci
2811bf215546Sopenharmony_ci		if ((r = r600_bytecode_add_vtx(&bc, &vtx))) {
2812bf215546Sopenharmony_ci			r600_bytecode_clear(&bc);
2813bf215546Sopenharmony_ci			return NULL;
2814bf215546Sopenharmony_ci		}
2815bf215546Sopenharmony_ci	}
2816bf215546Sopenharmony_ci
2817bf215546Sopenharmony_ci	r600_bytecode_add_cfinst(&bc, CF_OP_RET);
2818bf215546Sopenharmony_ci
2819bf215546Sopenharmony_ci	if ((r = r600_bytecode_build(&bc))) {
2820bf215546Sopenharmony_ci		r600_bytecode_clear(&bc);
2821bf215546Sopenharmony_ci		return NULL;
2822bf215546Sopenharmony_ci	}
2823bf215546Sopenharmony_ci
2824bf215546Sopenharmony_ci	if (rctx->screen->b.debug_flags & DBG_FS) {
2825bf215546Sopenharmony_ci		fprintf(stderr, "--------------------------------------------------------------\n");
2826bf215546Sopenharmony_ci		fprintf(stderr, "Vertex elements state:\n");
2827bf215546Sopenharmony_ci		for (i = 0; i < count; i++) {
2828bf215546Sopenharmony_ci			fprintf(stderr, "   ");
2829bf215546Sopenharmony_ci			util_dump_vertex_element(stderr, elements+i);
2830bf215546Sopenharmony_ci			fprintf(stderr, "\n");
2831bf215546Sopenharmony_ci		}
2832bf215546Sopenharmony_ci
2833bf215546Sopenharmony_ci		if (!sb_disasm) {
2834bf215546Sopenharmony_ci			r600_bytecode_disasm(&bc);
2835bf215546Sopenharmony_ci
2836bf215546Sopenharmony_ci			fprintf(stderr, "______________________________________________________________\n");
2837bf215546Sopenharmony_ci		} else {
2838bf215546Sopenharmony_ci			r600_sb_bytecode_process(rctx, &bc, NULL, 1 /*dump*/, 0 /*optimize*/);
2839bf215546Sopenharmony_ci		}
2840bf215546Sopenharmony_ci	}
2841bf215546Sopenharmony_ci
2842bf215546Sopenharmony_ci	fs_size = bc.ndw*4;
2843bf215546Sopenharmony_ci
2844bf215546Sopenharmony_ci	/* Allocate the CSO. */
2845bf215546Sopenharmony_ci	shader = CALLOC_STRUCT(r600_fetch_shader);
2846bf215546Sopenharmony_ci	if (!shader) {
2847bf215546Sopenharmony_ci		r600_bytecode_clear(&bc);
2848bf215546Sopenharmony_ci		return NULL;
2849bf215546Sopenharmony_ci	}
2850bf215546Sopenharmony_ci
2851bf215546Sopenharmony_ci	u_suballocator_alloc(&rctx->allocator_fetch_shader, fs_size, 256,
2852bf215546Sopenharmony_ci			     &shader->offset,
2853bf215546Sopenharmony_ci			     (struct pipe_resource**)&shader->buffer);
2854bf215546Sopenharmony_ci	if (!shader->buffer) {
2855bf215546Sopenharmony_ci		r600_bytecode_clear(&bc);
2856bf215546Sopenharmony_ci		FREE(shader);
2857bf215546Sopenharmony_ci		return NULL;
2858bf215546Sopenharmony_ci	}
2859bf215546Sopenharmony_ci
2860bf215546Sopenharmony_ci	bytecode = r600_buffer_map_sync_with_rings
2861bf215546Sopenharmony_ci		(&rctx->b, shader->buffer,
2862bf215546Sopenharmony_ci		PIPE_MAP_WRITE | PIPE_MAP_UNSYNCHRONIZED | RADEON_MAP_TEMPORARY);
2863bf215546Sopenharmony_ci	bytecode += shader->offset / 4;
2864bf215546Sopenharmony_ci
2865bf215546Sopenharmony_ci	if (R600_BIG_ENDIAN) {
2866bf215546Sopenharmony_ci		for (i = 0; i < fs_size / 4; ++i) {
2867bf215546Sopenharmony_ci			bytecode[i] = util_cpu_to_le32(bc.bytecode[i]);
2868bf215546Sopenharmony_ci		}
2869bf215546Sopenharmony_ci	} else {
2870bf215546Sopenharmony_ci		memcpy(bytecode, bc.bytecode, fs_size);
2871bf215546Sopenharmony_ci	}
2872bf215546Sopenharmony_ci	rctx->b.ws->buffer_unmap(rctx->b.ws, shader->buffer->buf);
2873bf215546Sopenharmony_ci
2874bf215546Sopenharmony_ci	r600_bytecode_clear(&bc);
2875bf215546Sopenharmony_ci	return shader;
2876bf215546Sopenharmony_ci}
2877bf215546Sopenharmony_ci
2878bf215546Sopenharmony_civoid r600_bytecode_alu_read(struct r600_bytecode *bc,
2879bf215546Sopenharmony_ci		struct r600_bytecode_alu *alu, uint32_t word0, uint32_t word1)
2880bf215546Sopenharmony_ci{
2881bf215546Sopenharmony_ci	/* WORD0 */
2882bf215546Sopenharmony_ci	alu->src[0].sel = G_SQ_ALU_WORD0_SRC0_SEL(word0);
2883bf215546Sopenharmony_ci	alu->src[0].rel = G_SQ_ALU_WORD0_SRC0_REL(word0);
2884bf215546Sopenharmony_ci	alu->src[0].chan = G_SQ_ALU_WORD0_SRC0_CHAN(word0);
2885bf215546Sopenharmony_ci	alu->src[0].neg = G_SQ_ALU_WORD0_SRC0_NEG(word0);
2886bf215546Sopenharmony_ci	alu->src[1].sel = G_SQ_ALU_WORD0_SRC1_SEL(word0);
2887bf215546Sopenharmony_ci	alu->src[1].rel = G_SQ_ALU_WORD0_SRC1_REL(word0);
2888bf215546Sopenharmony_ci	alu->src[1].chan = G_SQ_ALU_WORD0_SRC1_CHAN(word0);
2889bf215546Sopenharmony_ci	alu->src[1].neg = G_SQ_ALU_WORD0_SRC1_NEG(word0);
2890bf215546Sopenharmony_ci	alu->index_mode = G_SQ_ALU_WORD0_INDEX_MODE(word0);
2891bf215546Sopenharmony_ci	alu->pred_sel = G_SQ_ALU_WORD0_PRED_SEL(word0);
2892bf215546Sopenharmony_ci	alu->last = G_SQ_ALU_WORD0_LAST(word0);
2893bf215546Sopenharmony_ci
2894bf215546Sopenharmony_ci	/* WORD1 */
2895bf215546Sopenharmony_ci	alu->bank_swizzle = G_SQ_ALU_WORD1_BANK_SWIZZLE(word1);
2896bf215546Sopenharmony_ci	if (alu->bank_swizzle)
2897bf215546Sopenharmony_ci		alu->bank_swizzle_force = alu->bank_swizzle;
2898bf215546Sopenharmony_ci	alu->dst.sel = G_SQ_ALU_WORD1_DST_GPR(word1);
2899bf215546Sopenharmony_ci	alu->dst.rel = G_SQ_ALU_WORD1_DST_REL(word1);
2900bf215546Sopenharmony_ci	alu->dst.chan = G_SQ_ALU_WORD1_DST_CHAN(word1);
2901bf215546Sopenharmony_ci	alu->dst.clamp = G_SQ_ALU_WORD1_CLAMP(word1);
2902bf215546Sopenharmony_ci	if (G_SQ_ALU_WORD1_ENCODING(word1)) /*ALU_DWORD1_OP3*/
2903bf215546Sopenharmony_ci	{
2904bf215546Sopenharmony_ci		alu->is_op3 = 1;
2905bf215546Sopenharmony_ci		alu->src[2].sel = G_SQ_ALU_WORD1_OP3_SRC2_SEL(word1);
2906bf215546Sopenharmony_ci		alu->src[2].rel = G_SQ_ALU_WORD1_OP3_SRC2_REL(word1);
2907bf215546Sopenharmony_ci		alu->src[2].chan = G_SQ_ALU_WORD1_OP3_SRC2_CHAN(word1);
2908bf215546Sopenharmony_ci		alu->src[2].neg = G_SQ_ALU_WORD1_OP3_SRC2_NEG(word1);
2909bf215546Sopenharmony_ci		alu->op = r600_isa_alu_by_opcode(bc->isa,
2910bf215546Sopenharmony_ci				G_SQ_ALU_WORD1_OP3_ALU_INST(word1), /* is_op3 = */ 1);
2911bf215546Sopenharmony_ci
2912bf215546Sopenharmony_ci	}
2913bf215546Sopenharmony_ci	else /*ALU_DWORD1_OP2*/
2914bf215546Sopenharmony_ci	{
2915bf215546Sopenharmony_ci		alu->src[0].abs = G_SQ_ALU_WORD1_OP2_SRC0_ABS(word1);
2916bf215546Sopenharmony_ci		alu->src[1].abs = G_SQ_ALU_WORD1_OP2_SRC1_ABS(word1);
2917bf215546Sopenharmony_ci		alu->op = r600_isa_alu_by_opcode(bc->isa,
2918bf215546Sopenharmony_ci				G_SQ_ALU_WORD1_OP2_ALU_INST(word1), /* is_op3 = */ 0);
2919bf215546Sopenharmony_ci		alu->omod = G_SQ_ALU_WORD1_OP2_OMOD(word1);
2920bf215546Sopenharmony_ci		alu->dst.write = G_SQ_ALU_WORD1_OP2_WRITE_MASK(word1);
2921bf215546Sopenharmony_ci		alu->update_pred = G_SQ_ALU_WORD1_OP2_UPDATE_PRED(word1);
2922bf215546Sopenharmony_ci		alu->execute_mask =
2923bf215546Sopenharmony_ci			G_SQ_ALU_WORD1_OP2_UPDATE_EXECUTE_MASK(word1);
2924bf215546Sopenharmony_ci	}
2925bf215546Sopenharmony_ci}
2926bf215546Sopenharmony_ci
2927bf215546Sopenharmony_ci#if 0
2928bf215546Sopenharmony_civoid r600_bytecode_export_read(struct r600_bytecode *bc,
2929bf215546Sopenharmony_ci		struct r600_bytecode_output *output, uint32_t word0, uint32_t word1)
2930bf215546Sopenharmony_ci{
2931bf215546Sopenharmony_ci	output->array_base = G_SQ_CF_ALLOC_EXPORT_WORD0_ARRAY_BASE(word0);
2932bf215546Sopenharmony_ci	output->type = G_SQ_CF_ALLOC_EXPORT_WORD0_TYPE(word0);
2933bf215546Sopenharmony_ci	output->gpr = G_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(word0);
2934bf215546Sopenharmony_ci	output->elem_size = G_SQ_CF_ALLOC_EXPORT_WORD0_ELEM_SIZE(word0);
2935bf215546Sopenharmony_ci
2936bf215546Sopenharmony_ci	output->swizzle_x = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_X(word1);
2937bf215546Sopenharmony_ci	output->swizzle_y = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Y(word1);
2938bf215546Sopenharmony_ci	output->swizzle_z = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_Z(word1);
2939bf215546Sopenharmony_ci	output->swizzle_w = G_SQ_CF_ALLOC_EXPORT_WORD1_SWIZ_SEL_W(word1);
2940bf215546Sopenharmony_ci	output->burst_count = G_SQ_CF_ALLOC_EXPORT_WORD1_BURST_COUNT(word1);
2941bf215546Sopenharmony_ci	output->end_of_program = G_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(word1);
2942bf215546Sopenharmony_ci    output->op = r600_isa_cf_by_opcode(bc->isa,
2943bf215546Sopenharmony_ci			G_SQ_CF_ALLOC_EXPORT_WORD1_CF_INST(word1), 0);
2944bf215546Sopenharmony_ci	output->barrier = G_SQ_CF_ALLOC_EXPORT_WORD1_BARRIER(word1);
2945bf215546Sopenharmony_ci	output->array_size = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_ARRAY_SIZE(word1);
2946bf215546Sopenharmony_ci	output->comp_mask = G_SQ_CF_ALLOC_EXPORT_WORD1_BUF_COMP_MASK(word1);
2947bf215546Sopenharmony_ci}
2948bf215546Sopenharmony_ci#endif
2949