1bf215546Sopenharmony_ci/* Author(s):
2bf215546Sopenharmony_ci *   Connor Abbott
3bf215546Sopenharmony_ci *   Alyssa Rosenzweig
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
6bf215546Sopenharmony_ci * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
7bf215546Sopenharmony_ci * Copyright (C) 2019-2020 Collabora, Ltd.
8bf215546Sopenharmony_ci *
9bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a copy
10bf215546Sopenharmony_ci * of this software and associated documentation files (the "Software"), to deal
11bf215546Sopenharmony_ci * in the Software without restriction, including without limitation the rights
12bf215546Sopenharmony_ci * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13bf215546Sopenharmony_ci * copies of the Software, and to permit persons to whom the Software is
14bf215546Sopenharmony_ci * furnished to do so, subject to the following conditions:
15bf215546Sopenharmony_ci *
16bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
17bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
18bf215546Sopenharmony_ci *
19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22bf215546Sopenharmony_ci * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25bf215546Sopenharmony_ci * THE SOFTWARE.
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#ifndef __midgard_h__
29bf215546Sopenharmony_ci#define __midgard_h__
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci#include <stdint.h>
32bf215546Sopenharmony_ci#include <stdbool.h>
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci#define MIDGARD_DBG_MSGS		0x0001
35bf215546Sopenharmony_ci#define MIDGARD_DBG_SHADERS		0x0002
36bf215546Sopenharmony_ci#define MIDGARD_DBG_SHADERDB            0x0004
37bf215546Sopenharmony_ci#define MIDGARD_DBG_INORDER             0x0008
38bf215546Sopenharmony_ci#define MIDGARD_DBG_VERBOSE             0x0010
39bf215546Sopenharmony_ci#define MIDGARD_DBG_INTERNAL            0x0020
40bf215546Sopenharmony_ci
41bf215546Sopenharmony_ciextern int midgard_debug;
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_citypedef enum {
44bf215546Sopenharmony_ci        midgard_word_type_alu,
45bf215546Sopenharmony_ci        midgard_word_type_load_store,
46bf215546Sopenharmony_ci        midgard_word_type_texture
47bf215546Sopenharmony_ci} midgard_word_type;
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_citypedef enum {
50bf215546Sopenharmony_ci        midgard_alu_vmul,
51bf215546Sopenharmony_ci        midgard_alu_sadd,
52bf215546Sopenharmony_ci        midgard_alu_smul,
53bf215546Sopenharmony_ci        midgard_alu_vadd,
54bf215546Sopenharmony_ci        midgard_alu_lut
55bf215546Sopenharmony_ci} midgard_alu;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_cienum {
58bf215546Sopenharmony_ci        TAG_INVALID = 0x0,
59bf215546Sopenharmony_ci        TAG_BREAK = 0x1,
60bf215546Sopenharmony_ci        TAG_TEXTURE_4_VTX = 0x2,
61bf215546Sopenharmony_ci        TAG_TEXTURE_4 = 0x3,
62bf215546Sopenharmony_ci        TAG_TEXTURE_4_BARRIER = 0x4,
63bf215546Sopenharmony_ci        TAG_LOAD_STORE_4 = 0x5,
64bf215546Sopenharmony_ci        TAG_UNKNOWN_1 = 0x6,
65bf215546Sopenharmony_ci        TAG_UNKNOWN_2 = 0x7,
66bf215546Sopenharmony_ci        TAG_ALU_4 = 0x8,
67bf215546Sopenharmony_ci        TAG_ALU_8 = 0x9,
68bf215546Sopenharmony_ci        TAG_ALU_12 = 0xA,
69bf215546Sopenharmony_ci        TAG_ALU_16 = 0xB,
70bf215546Sopenharmony_ci        TAG_ALU_4_WRITEOUT = 0xC,
71bf215546Sopenharmony_ci        TAG_ALU_8_WRITEOUT = 0xD,
72bf215546Sopenharmony_ci        TAG_ALU_12_WRITEOUT = 0xE,
73bf215546Sopenharmony_ci        TAG_ALU_16_WRITEOUT = 0xF
74bf215546Sopenharmony_ci};
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci/*
77bf215546Sopenharmony_ci * ALU words
78bf215546Sopenharmony_ci */
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_citypedef enum {
81bf215546Sopenharmony_ci        midgard_alu_op_fadd        = 0x10, /* round to even */
82bf215546Sopenharmony_ci        midgard_alu_op_fadd_rtz    = 0x11,
83bf215546Sopenharmony_ci        midgard_alu_op_fadd_rtn    = 0x12,
84bf215546Sopenharmony_ci        midgard_alu_op_fadd_rtp    = 0x13,
85bf215546Sopenharmony_ci        midgard_alu_op_fmul        = 0x14, /* round to even */
86bf215546Sopenharmony_ci        midgard_alu_op_fmul_rtz    = 0x15,
87bf215546Sopenharmony_ci        midgard_alu_op_fmul_rtn    = 0x16,
88bf215546Sopenharmony_ci        midgard_alu_op_fmul_rtp    = 0x17,
89bf215546Sopenharmony_ci
90bf215546Sopenharmony_ci        midgard_alu_op_fmin        = 0x28, /* if an operand is NaN, propagate the other */
91bf215546Sopenharmony_ci        midgard_alu_op_fmin_nan    = 0x29, /* if an operand is NaN, propagate it */
92bf215546Sopenharmony_ci        midgard_alu_op_fabsmin     = 0x2A, /* min(abs(a,b)) */
93bf215546Sopenharmony_ci        midgard_alu_op_fabsmin_nan = 0x2B, /* min_nan(abs(a,b)) */
94bf215546Sopenharmony_ci        midgard_alu_op_fmax        = 0x2C, /* if an operand is NaN, propagate the other */
95bf215546Sopenharmony_ci        midgard_alu_op_fmax_nan    = 0x2D, /* if an operand is NaN, propagate it */
96bf215546Sopenharmony_ci        midgard_alu_op_fabsmax     = 0x2E, /* max(abs(a,b)) */
97bf215546Sopenharmony_ci        midgard_alu_op_fabsmax_nan = 0x2F, /* max_nan(abs(a,b)) */
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci        midgard_alu_op_fmov        = 0x30, /* fmov_rte */
100bf215546Sopenharmony_ci        midgard_alu_op_fmov_rtz    = 0x31,
101bf215546Sopenharmony_ci        midgard_alu_op_fmov_rtn    = 0x32,
102bf215546Sopenharmony_ci        midgard_alu_op_fmov_rtp    = 0x33,
103bf215546Sopenharmony_ci        midgard_alu_op_froundeven  = 0x34,
104bf215546Sopenharmony_ci        midgard_alu_op_ftrunc      = 0x35,
105bf215546Sopenharmony_ci        midgard_alu_op_ffloor      = 0x36,
106bf215546Sopenharmony_ci        midgard_alu_op_fceil       = 0x37,
107bf215546Sopenharmony_ci        midgard_alu_op_ffma        = 0x38, /* rte */
108bf215546Sopenharmony_ci        midgard_alu_op_ffma_rtz    = 0x39,
109bf215546Sopenharmony_ci        midgard_alu_op_ffma_rtn    = 0x3A,
110bf215546Sopenharmony_ci        midgard_alu_op_ffma_rtp    = 0x3B,
111bf215546Sopenharmony_ci        midgard_alu_op_fdot3       = 0x3C,
112bf215546Sopenharmony_ci        midgard_alu_op_fdot3r      = 0x3D,
113bf215546Sopenharmony_ci        midgard_alu_op_fdot4       = 0x3E,
114bf215546Sopenharmony_ci        midgard_alu_op_freduce     = 0x3F,
115bf215546Sopenharmony_ci
116bf215546Sopenharmony_ci        midgard_alu_op_iadd        = 0x40,
117bf215546Sopenharmony_ci        midgard_alu_op_ishladd     = 0x41, /* (a<<1) + b */
118bf215546Sopenharmony_ci        midgard_alu_op_isub        = 0x46,
119bf215546Sopenharmony_ci        midgard_alu_op_ishlsub     = 0x47, /* (a<<1) - b */
120bf215546Sopenharmony_ci        midgard_alu_op_iaddsat     = 0x48,
121bf215546Sopenharmony_ci        midgard_alu_op_uaddsat     = 0x49,
122bf215546Sopenharmony_ci        midgard_alu_op_isubsat     = 0x4E,
123bf215546Sopenharmony_ci        midgard_alu_op_usubsat     = 0x4F,
124bf215546Sopenharmony_ci
125bf215546Sopenharmony_ci        midgard_alu_op_imul        = 0x58,
126bf215546Sopenharmony_ci        /* Multiplies two ints and stores the result in the next larger datasize. */
127bf215546Sopenharmony_ci        midgard_alu_op_iwmul       = 0x59, /* sint * sint = sint */
128bf215546Sopenharmony_ci        midgard_alu_op_uwmul       = 0x5A, /* uint * uint = uint */
129bf215546Sopenharmony_ci        midgard_alu_op_iuwmul      = 0x5B, /* sint * uint = sint */
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci        midgard_alu_op_imin        = 0x60,
132bf215546Sopenharmony_ci        midgard_alu_op_umin        = 0x61,
133bf215546Sopenharmony_ci        midgard_alu_op_imax        = 0x62,
134bf215546Sopenharmony_ci        midgard_alu_op_umax        = 0x63,
135bf215546Sopenharmony_ci        midgard_alu_op_iavg        = 0x64,
136bf215546Sopenharmony_ci        midgard_alu_op_uavg        = 0x65,
137bf215546Sopenharmony_ci        midgard_alu_op_iravg       = 0x66,
138bf215546Sopenharmony_ci        midgard_alu_op_uravg       = 0x67,
139bf215546Sopenharmony_ci        midgard_alu_op_iasr        = 0x68,
140bf215546Sopenharmony_ci        midgard_alu_op_ilsr        = 0x69,
141bf215546Sopenharmony_ci        midgard_alu_op_ishlsat     = 0x6C,
142bf215546Sopenharmony_ci        midgard_alu_op_ushlsat     = 0x6D,
143bf215546Sopenharmony_ci        midgard_alu_op_ishl        = 0x6E,
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci        midgard_alu_op_iand        = 0x70,
146bf215546Sopenharmony_ci        midgard_alu_op_ior         = 0x71,
147bf215546Sopenharmony_ci        midgard_alu_op_inand       = 0x72, /* ~(a & b), for inot let a = b */
148bf215546Sopenharmony_ci        midgard_alu_op_inor        = 0x73, /* ~(a | b) */
149bf215546Sopenharmony_ci        midgard_alu_op_iandnot     = 0x74, /* (a & ~b), used for not/b2f */
150bf215546Sopenharmony_ci        midgard_alu_op_iornot      = 0x75, /* (a | ~b) */
151bf215546Sopenharmony_ci        midgard_alu_op_ixor        = 0x76,
152bf215546Sopenharmony_ci        midgard_alu_op_inxor       = 0x77, /* ~(a ^ b) */
153bf215546Sopenharmony_ci        midgard_alu_op_iclz        = 0x78, /* Number of zeroes on left */
154bf215546Sopenharmony_ci        midgard_alu_op_ipopcnt     = 0x7A, /* Population count */
155bf215546Sopenharmony_ci        midgard_alu_op_imov        = 0x7B,
156bf215546Sopenharmony_ci        midgard_alu_op_iabsdiff    = 0x7C,
157bf215546Sopenharmony_ci        midgard_alu_op_uabsdiff    = 0x7D,
158bf215546Sopenharmony_ci        midgard_alu_op_ichoose     = 0x7E, /* vector, component number - dupe for shuffle() */
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_ci        midgard_alu_op_feq         = 0x80,
161bf215546Sopenharmony_ci        midgard_alu_op_fne         = 0x81,
162bf215546Sopenharmony_ci        midgard_alu_op_flt         = 0x82,
163bf215546Sopenharmony_ci        midgard_alu_op_fle         = 0x83,
164bf215546Sopenharmony_ci        midgard_alu_op_fball_eq    = 0x88,
165bf215546Sopenharmony_ci        midgard_alu_op_fball_neq   = 0x89,
166bf215546Sopenharmony_ci        midgard_alu_op_fball_lt    = 0x8A, /* all(lessThan(.., ..)) */
167bf215546Sopenharmony_ci        midgard_alu_op_fball_lte   = 0x8B, /* all(lessThanEqual(.., ..)) */
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci        midgard_alu_op_fbany_eq    = 0x90,
170bf215546Sopenharmony_ci        midgard_alu_op_fbany_neq   = 0x91,
171bf215546Sopenharmony_ci        midgard_alu_op_fbany_lt    = 0x92, /* any(lessThan(.., ..)) */
172bf215546Sopenharmony_ci        midgard_alu_op_fbany_lte   = 0x93, /* any(lessThanEqual(.., ..)) */
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci        midgard_alu_op_f2i_rte     = 0x98,
175bf215546Sopenharmony_ci        midgard_alu_op_f2i_rtz     = 0x99,
176bf215546Sopenharmony_ci        midgard_alu_op_f2i_rtn     = 0x9A,
177bf215546Sopenharmony_ci        midgard_alu_op_f2i_rtp     = 0x9B,
178bf215546Sopenharmony_ci        midgard_alu_op_f2u_rte     = 0x9C,
179bf215546Sopenharmony_ci        midgard_alu_op_f2u_rtz     = 0x9D,
180bf215546Sopenharmony_ci        midgard_alu_op_f2u_rtn     = 0x9E,
181bf215546Sopenharmony_ci        midgard_alu_op_f2u_rtp     = 0x9F,
182bf215546Sopenharmony_ci
183bf215546Sopenharmony_ci        midgard_alu_op_ieq         = 0xA0,
184bf215546Sopenharmony_ci        midgard_alu_op_ine         = 0xA1,
185bf215546Sopenharmony_ci        midgard_alu_op_ult         = 0xA2,
186bf215546Sopenharmony_ci        midgard_alu_op_ule         = 0xA3,
187bf215546Sopenharmony_ci        midgard_alu_op_ilt         = 0xA4,
188bf215546Sopenharmony_ci        midgard_alu_op_ile         = 0xA5,
189bf215546Sopenharmony_ci        midgard_alu_op_iball_eq    = 0xA8,
190bf215546Sopenharmony_ci        midgard_alu_op_iball_neq   = 0xA9,
191bf215546Sopenharmony_ci        midgard_alu_op_uball_lt    = 0xAA,
192bf215546Sopenharmony_ci        midgard_alu_op_uball_lte   = 0xAB,
193bf215546Sopenharmony_ci        midgard_alu_op_iball_lt    = 0xAC,
194bf215546Sopenharmony_ci        midgard_alu_op_iball_lte   = 0xAD,
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci        midgard_alu_op_ibany_eq    = 0xB0,
197bf215546Sopenharmony_ci        midgard_alu_op_ibany_neq   = 0xB1,
198bf215546Sopenharmony_ci        midgard_alu_op_ubany_lt    = 0xB2,
199bf215546Sopenharmony_ci        midgard_alu_op_ubany_lte   = 0xB3,
200bf215546Sopenharmony_ci        midgard_alu_op_ibany_lt    = 0xB4, /* any(lessThan(.., ..)) */
201bf215546Sopenharmony_ci        midgard_alu_op_ibany_lte   = 0xB5, /* any(lessThanEqual(.., ..)) */
202bf215546Sopenharmony_ci        midgard_alu_op_i2f_rte     = 0xB8,
203bf215546Sopenharmony_ci        midgard_alu_op_i2f_rtz     = 0xB9,
204bf215546Sopenharmony_ci        midgard_alu_op_i2f_rtn     = 0xBA,
205bf215546Sopenharmony_ci        midgard_alu_op_i2f_rtp     = 0xBB,
206bf215546Sopenharmony_ci        midgard_alu_op_u2f_rte     = 0xBC,
207bf215546Sopenharmony_ci        midgard_alu_op_u2f_rtz     = 0xBD,
208bf215546Sopenharmony_ci        midgard_alu_op_u2f_rtn     = 0xBE,
209bf215546Sopenharmony_ci        midgard_alu_op_u2f_rtp     = 0xBF,
210bf215546Sopenharmony_ci
211bf215546Sopenharmony_ci        /* All csel* instructions use as a condition the output of the previous
212bf215546Sopenharmony_ci         * vector or scalar unit, thus it must run on the second pipeline stage
213bf215546Sopenharmony_ci         * and be scheduled to the same bundle as the opcode that it uses as a
214bf215546Sopenharmony_ci         * condition. */
215bf215546Sopenharmony_ci        midgard_alu_op_icsel_v     = 0xC0,
216bf215546Sopenharmony_ci        midgard_alu_op_icsel       = 0xC1,
217bf215546Sopenharmony_ci        midgard_alu_op_fcsel_v     = 0xC4,
218bf215546Sopenharmony_ci        midgard_alu_op_fcsel       = 0xC5,
219bf215546Sopenharmony_ci        midgard_alu_op_froundaway  = 0xC6, /* round to nearest away */
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci        midgard_alu_op_fatan2_pt2  = 0xE8,
222bf215546Sopenharmony_ci        midgard_alu_op_fpow_pt1    = 0xEC,
223bf215546Sopenharmony_ci        midgard_alu_op_fpown_pt1   = 0xED,
224bf215546Sopenharmony_ci        midgard_alu_op_fpowr_pt1   = 0xEE,
225bf215546Sopenharmony_ci
226bf215546Sopenharmony_ci        midgard_alu_op_frcp        = 0xF0,
227bf215546Sopenharmony_ci        midgard_alu_op_frsqrt      = 0xF2,
228bf215546Sopenharmony_ci        midgard_alu_op_fsqrt       = 0xF3,
229bf215546Sopenharmony_ci        midgard_alu_op_fexp2       = 0xF4,
230bf215546Sopenharmony_ci        midgard_alu_op_flog2       = 0xF5,
231bf215546Sopenharmony_ci        midgard_alu_op_fsinpi      = 0xF6, /* sin(pi * x) */
232bf215546Sopenharmony_ci        midgard_alu_op_fcospi      = 0xF7, /* cos(pi * x) */
233bf215546Sopenharmony_ci        midgard_alu_op_fatan2_pt1  = 0xF9,
234bf215546Sopenharmony_ci} midgard_alu_op;
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_citypedef enum {
237bf215546Sopenharmony_ci        midgard_outmod_none        = 0,
238bf215546Sopenharmony_ci        midgard_outmod_clamp_0_inf = 1, /* max(x, 0.0), NaNs become +0.0 */
239bf215546Sopenharmony_ci        midgard_outmod_clamp_m1_1  = 2, /* clamp(x, -1.0, 1.0), NaNs become -1.0 */
240bf215546Sopenharmony_ci        midgard_outmod_clamp_0_1   = 3  /* clamp(x, 0.0, 1.0), NaNs become +0.0 */
241bf215546Sopenharmony_ci} midgard_outmod_float;
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci/* These are applied to the resulting value that's going to be stored in the dest reg.
244bf215546Sopenharmony_ci * This should be set to midgard_outmod_keeplo when shrink_mode is midgard_shrink_mode_none. */
245bf215546Sopenharmony_citypedef enum {
246bf215546Sopenharmony_ci        midgard_outmod_ssat   = 0,
247bf215546Sopenharmony_ci        midgard_outmod_usat   = 1,
248bf215546Sopenharmony_ci        midgard_outmod_keeplo = 2, /* Keep low half */
249bf215546Sopenharmony_ci        midgard_outmod_keephi = 3, /* Keep high half */
250bf215546Sopenharmony_ci} midgard_outmod_int;
251bf215546Sopenharmony_ci
252bf215546Sopenharmony_citypedef enum {
253bf215546Sopenharmony_ci        midgard_reg_mode_8  = 0,
254bf215546Sopenharmony_ci        midgard_reg_mode_16 = 1,
255bf215546Sopenharmony_ci        midgard_reg_mode_32 = 2,
256bf215546Sopenharmony_ci        midgard_reg_mode_64 = 3
257bf215546Sopenharmony_ci} midgard_reg_mode;
258bf215546Sopenharmony_ci
259bf215546Sopenharmony_citypedef enum {
260bf215546Sopenharmony_ci        midgard_shrink_mode_lower = 0,
261bf215546Sopenharmony_ci        midgard_shrink_mode_upper = 1,
262bf215546Sopenharmony_ci        midgard_shrink_mode_none  = 2
263bf215546Sopenharmony_ci} midgard_shrink_mode;
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci/* Only used if midgard_src_expand_mode is set to one of midgard_src_expand_*. */
266bf215546Sopenharmony_citypedef enum {
267bf215546Sopenharmony_ci        midgard_int_sign_extend = 0,
268bf215546Sopenharmony_ci        midgard_int_zero_extend = 1,
269bf215546Sopenharmony_ci        midgard_int_replicate   = 2,
270bf215546Sopenharmony_ci        midgard_int_left_shift  = 3
271bf215546Sopenharmony_ci} midgard_int_mod;
272bf215546Sopenharmony_ci
273bf215546Sopenharmony_ci/* Unlike midgard_int_mod, fload modifiers are applied after the expansion happens, so
274bf215546Sopenharmony_ci * they don't depend on midgard_src_expand_mode. */
275bf215546Sopenharmony_ci#define MIDGARD_FLOAT_MOD_ABS (1 << 0)
276bf215546Sopenharmony_ci#define MIDGARD_FLOAT_MOD_NEG (1 << 1)
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci/* The expand options depend on both midgard_int_mod and midgard_reg_mode.  For
279bf215546Sopenharmony_ci * example, a vec4 with midgard_int_sign_extend and midgard_src_expand_low is
280bf215546Sopenharmony_ci * treated as a vec8 and each 16-bit element from the low 64-bits is then sign
281bf215546Sopenharmony_ci * extended, resulting in a vec4 where each 32-bit element corresponds to a
282bf215546Sopenharmony_ci * 16-bit element from the low 64-bits of the input vector. */
283bf215546Sopenharmony_citypedef enum {
284bf215546Sopenharmony_ci        midgard_src_passthrough = 0,
285bf215546Sopenharmony_ci        midgard_src_rep_low = 1, /* replicate lower 64 bits to higher 64 bits */
286bf215546Sopenharmony_ci        midgard_src_rep_high = 2, /* replicate higher 64 bits to lower 64 bits */
287bf215546Sopenharmony_ci        midgard_src_swap = 3, /* swap lower 64 bits with higher 64 bits */
288bf215546Sopenharmony_ci        midgard_src_expand_low = 4, /* expand low 64 bits */
289bf215546Sopenharmony_ci        midgard_src_expand_high = 5, /* expand high 64 bits */
290bf215546Sopenharmony_ci        midgard_src_expand_low_swap = 6, /* expand low 64 bits, then swap */
291bf215546Sopenharmony_ci        midgard_src_expand_high_swap = 7, /* expand high 64 bits, then swap */
292bf215546Sopenharmony_ci} midgard_src_expand_mode;
293bf215546Sopenharmony_ci
294bf215546Sopenharmony_ci#define INPUT_EXPANDS(a) \
295bf215546Sopenharmony_ci        (a >= midgard_src_expand_low && a <= midgard_src_expand_high_swap)
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_ci#define INPUT_SWAPS(a) \
298bf215546Sopenharmony_ci        (a == midgard_src_swap || a >= midgard_src_expand_low_swap)
299bf215546Sopenharmony_ci
300bf215546Sopenharmony_citypedef struct
301bf215546Sopenharmony_ci__attribute__((__packed__))
302bf215546Sopenharmony_ci{
303bf215546Sopenharmony_ci        /* Either midgard_int_mod or from midgard_float_mod_*, depending on the
304bf215546Sopenharmony_ci         * type of op */
305bf215546Sopenharmony_ci        unsigned mod : 2;
306bf215546Sopenharmony_ci        midgard_src_expand_mode expand_mode : 3;
307bf215546Sopenharmony_ci        unsigned swizzle : 8;
308bf215546Sopenharmony_ci}
309bf215546Sopenharmony_cimidgard_vector_alu_src;
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_citypedef struct
312bf215546Sopenharmony_ci__attribute__((__packed__))
313bf215546Sopenharmony_ci{
314bf215546Sopenharmony_ci        midgard_alu_op op               :  8;
315bf215546Sopenharmony_ci        midgard_reg_mode reg_mode       :  2;
316bf215546Sopenharmony_ci        unsigned src1                   : 13;
317bf215546Sopenharmony_ci        unsigned src2                   : 13;
318bf215546Sopenharmony_ci        midgard_shrink_mode shrink_mode :  2;
319bf215546Sopenharmony_ci        unsigned outmod                 :  2;
320bf215546Sopenharmony_ci        unsigned mask                   :  8;
321bf215546Sopenharmony_ci}
322bf215546Sopenharmony_cimidgard_vector_alu;
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_citypedef struct
325bf215546Sopenharmony_ci__attribute__((__packed__))
326bf215546Sopenharmony_ci{
327bf215546Sopenharmony_ci        unsigned mod       : 2;
328bf215546Sopenharmony_ci        bool full          : 1; /* 0 = 16-bit, 1 = 32-bit */
329bf215546Sopenharmony_ci        unsigned component : 3;
330bf215546Sopenharmony_ci}
331bf215546Sopenharmony_cimidgard_scalar_alu_src;
332bf215546Sopenharmony_ci
333bf215546Sopenharmony_citypedef struct
334bf215546Sopenharmony_ci__attribute__((__packed__))
335bf215546Sopenharmony_ci{
336bf215546Sopenharmony_ci        midgard_alu_op op         :  8;
337bf215546Sopenharmony_ci        unsigned src1             :  6;
338bf215546Sopenharmony_ci        /* last 5 bits are used when src2 is an immediate */
339bf215546Sopenharmony_ci        unsigned src2             : 11;
340bf215546Sopenharmony_ci        unsigned reserved         :  1;
341bf215546Sopenharmony_ci        unsigned outmod           :  2;
342bf215546Sopenharmony_ci        bool output_full          :  1;
343bf215546Sopenharmony_ci        unsigned output_component :  3;
344bf215546Sopenharmony_ci}
345bf215546Sopenharmony_cimidgard_scalar_alu;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_citypedef struct
348bf215546Sopenharmony_ci__attribute__((__packed__))
349bf215546Sopenharmony_ci{
350bf215546Sopenharmony_ci        unsigned src1_reg : 5;
351bf215546Sopenharmony_ci        unsigned src2_reg : 5;
352bf215546Sopenharmony_ci        unsigned out_reg  : 5;
353bf215546Sopenharmony_ci        bool src2_imm     : 1;
354bf215546Sopenharmony_ci}
355bf215546Sopenharmony_cimidgard_reg_info;
356bf215546Sopenharmony_ci
357bf215546Sopenharmony_ci/* In addition to conditional branches and jumps (unconditional branches),
358bf215546Sopenharmony_ci * Midgard implements a bit of fixed function functionality used in fragment
359bf215546Sopenharmony_ci * shaders via specially crafted branches. These have special branch opcodes,
360bf215546Sopenharmony_ci * which perform a fixed-function operation and/or use the results of a
361bf215546Sopenharmony_ci * fixed-function operation as the branch condition.  */
362bf215546Sopenharmony_ci
363bf215546Sopenharmony_citypedef enum {
364bf215546Sopenharmony_ci        /* Regular branches */
365bf215546Sopenharmony_ci        midgard_jmp_writeout_op_branch_uncond = 1,
366bf215546Sopenharmony_ci        midgard_jmp_writeout_op_branch_cond = 2,
367bf215546Sopenharmony_ci
368bf215546Sopenharmony_ci        /* In a fragment shader, execute a discard_if instruction, with the
369bf215546Sopenharmony_ci         * corresponding condition code. Terminates the shader, so generally
370bf215546Sopenharmony_ci         * set the branch target to out of the shader */
371bf215546Sopenharmony_ci        midgard_jmp_writeout_op_discard = 4,
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci        /* Branch if the tilebuffer is not yet ready. At the beginning of a
374bf215546Sopenharmony_ci         * fragment shader that reads from the tile buffer, for instance via
375bf215546Sopenharmony_ci         * ARM_shader_framebuffer_fetch or EXT_pixel_local_storage, this branch
376bf215546Sopenharmony_ci         * operation should be used as a loop. An instruction like
377bf215546Sopenharmony_ci         * "br.tilebuffer.always -1" does the trick, corresponding to
378bf215546Sopenharmony_ci         * "while(!is_tilebuffer_ready) */
379bf215546Sopenharmony_ci        midgard_jmp_writeout_op_tilebuffer_pending = 6,
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci        /* In a fragment shader, try to write out the value pushed to r0 to the
382bf215546Sopenharmony_ci         * tilebuffer, subject to state in r1.z and r1.w. If this
383bf215546Sopenharmony_ci         * succeeds, the shader terminates. If it fails, it branches to the
384bf215546Sopenharmony_ci         * specified branch target. Generally, this should be used in a loop to
385bf215546Sopenharmony_ci         * itself, acting as "do { write(r0); } while(!write_successful);" */
386bf215546Sopenharmony_ci        midgard_jmp_writeout_op_writeout = 7,
387bf215546Sopenharmony_ci} midgard_jmp_writeout_op;
388bf215546Sopenharmony_ci
389bf215546Sopenharmony_citypedef enum {
390bf215546Sopenharmony_ci        midgard_condition_write0 = 0,
391bf215546Sopenharmony_ci
392bf215546Sopenharmony_ci        /* These condition codes denote a conditional branch on FALSE and on
393bf215546Sopenharmony_ci         * TRUE respectively */
394bf215546Sopenharmony_ci        midgard_condition_false = 1,
395bf215546Sopenharmony_ci        midgard_condition_true = 2,
396bf215546Sopenharmony_ci
397bf215546Sopenharmony_ci        /* This condition code always branches. For a pure branch, the
398bf215546Sopenharmony_ci         * unconditional branch coding should be used instead, but for
399bf215546Sopenharmony_ci         * fixed-function branch opcodes, this is still useful */
400bf215546Sopenharmony_ci        midgard_condition_always = 3,
401bf215546Sopenharmony_ci} midgard_condition;
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_cienum midgard_call_mode {
404bf215546Sopenharmony_ci        midgard_call_mode_default = 1,
405bf215546Sopenharmony_ci        midgard_call_mode_call = 2,
406bf215546Sopenharmony_ci        midgard_call_mode_return = 3
407bf215546Sopenharmony_ci};
408bf215546Sopenharmony_ci
409bf215546Sopenharmony_citypedef struct
410bf215546Sopenharmony_ci__attribute__((__packed__))
411bf215546Sopenharmony_ci{
412bf215546Sopenharmony_ci        midgard_jmp_writeout_op op : 3; /* == branch_uncond */
413bf215546Sopenharmony_ci        unsigned dest_tag : 4; /* tag of branch destination */
414bf215546Sopenharmony_ci        enum midgard_call_mode call_mode : 2;
415bf215546Sopenharmony_ci        int offset : 7;
416bf215546Sopenharmony_ci}
417bf215546Sopenharmony_cimidgard_branch_uncond;
418bf215546Sopenharmony_ci
419bf215546Sopenharmony_citypedef struct
420bf215546Sopenharmony_ci__attribute__((__packed__))
421bf215546Sopenharmony_ci{
422bf215546Sopenharmony_ci        midgard_jmp_writeout_op op : 3; /* == branch_cond */
423bf215546Sopenharmony_ci        unsigned dest_tag : 4; /* tag of branch destination */
424bf215546Sopenharmony_ci        int offset : 7;
425bf215546Sopenharmony_ci        midgard_condition cond : 2;
426bf215546Sopenharmony_ci}
427bf215546Sopenharmony_cimidgard_branch_cond;
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_citypedef struct
430bf215546Sopenharmony_ci__attribute__((__packed__))
431bf215546Sopenharmony_ci{
432bf215546Sopenharmony_ci        midgard_jmp_writeout_op op : 3; /* == branch_cond */
433bf215546Sopenharmony_ci        unsigned dest_tag : 4; /* tag of branch destination */
434bf215546Sopenharmony_ci        enum midgard_call_mode call_mode : 2;
435bf215546Sopenharmony_ci        signed offset : 23;
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci        /* Extended branches permit inputting up to 4 conditions loaded into
438bf215546Sopenharmony_ci         * r31 (two in r31.w and two in r31.x). In the most general case, we
439bf215546Sopenharmony_ci         * specify a function f(A, B, C, D) mapping 4 1-bit conditions to a
440bf215546Sopenharmony_ci         * single 1-bit branch criteria. Note that the domain of f has 2^(2^4)
441bf215546Sopenharmony_ci         * elements, each mapping to 1-bit of output, so we can trivially
442bf215546Sopenharmony_ci         * construct a Godel numbering of f as a (2^4)=16-bit integer. This
443bf215546Sopenharmony_ci         * 16-bit integer serves as a lookup table to compute f, subject to
444bf215546Sopenharmony_ci         * some swaps for ordering.
445bf215546Sopenharmony_ci         *
446bf215546Sopenharmony_ci         * Interesting, the standard 2-bit condition codes are also a LUT with
447bf215546Sopenharmony_ci         * the same format (2^1-bit), but it's usually easier to use enums. */
448bf215546Sopenharmony_ci
449bf215546Sopenharmony_ci        unsigned cond : 16;
450bf215546Sopenharmony_ci}
451bf215546Sopenharmony_cimidgard_branch_extended;
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_citypedef struct
454bf215546Sopenharmony_ci__attribute__((__packed__))
455bf215546Sopenharmony_ci{
456bf215546Sopenharmony_ci        midgard_jmp_writeout_op op : 3; /* == writeout */
457bf215546Sopenharmony_ci        unsigned unknown : 13;
458bf215546Sopenharmony_ci}
459bf215546Sopenharmony_cimidgard_writeout;
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci/*
462bf215546Sopenharmony_ci * Load/store words
463bf215546Sopenharmony_ci */
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_citypedef enum {
466bf215546Sopenharmony_ci        midgard_op_ld_st_noop   = 0x03,
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci        /* Unpacks a colour from a native format to <format> */
469bf215546Sopenharmony_ci        midgard_op_unpack_colour_f32 = 0x04,
470bf215546Sopenharmony_ci        midgard_op_unpack_colour_f16 = 0x05,
471bf215546Sopenharmony_ci        midgard_op_unpack_colour_u32 = 0x06,
472bf215546Sopenharmony_ci        midgard_op_unpack_colour_s32 = 0x07,
473bf215546Sopenharmony_ci
474bf215546Sopenharmony_ci        /* Packs a colour from <format> to a native format */
475bf215546Sopenharmony_ci        midgard_op_pack_colour_f32 = 0x08,
476bf215546Sopenharmony_ci        midgard_op_pack_colour_f16 = 0x09,
477bf215546Sopenharmony_ci        midgard_op_pack_colour_u32 = 0x0A,
478bf215546Sopenharmony_ci        midgard_op_pack_colour_s32 = 0x0B,
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci        /* Computes the effective address of a mem address expression */
481bf215546Sopenharmony_ci        midgard_op_lea = 0x0C,
482bf215546Sopenharmony_ci
483bf215546Sopenharmony_ci        /* Converts image coordinates into mem address */
484bf215546Sopenharmony_ci        midgard_op_lea_image = 0x0D,
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci        /* Unclear why this is on the L/S unit, but moves fp32 cube map
487bf215546Sopenharmony_ci         * coordinates in r27 to its cube map texture coordinate destination
488bf215546Sopenharmony_ci         * (e.g r29). */
489bf215546Sopenharmony_ci
490bf215546Sopenharmony_ci        midgard_op_ld_cubemap_coords = 0x0E,
491bf215546Sopenharmony_ci
492bf215546Sopenharmony_ci        /* A mov between registers that the ldst pipeline can access */
493bf215546Sopenharmony_ci        midgard_op_ldst_mov = 0x10,
494bf215546Sopenharmony_ci
495bf215546Sopenharmony_ci        /* The L/S unit can do perspective division a clock faster than the ALU
496bf215546Sopenharmony_ci         * if you're lucky. Put the vec4 in r27, and call with 0x24 as the
497bf215546Sopenharmony_ci         * unknown state; the output will be <x/w, y/w, z/w, 1>. Replace w with
498bf215546Sopenharmony_ci         * z for the z version */
499bf215546Sopenharmony_ci        midgard_op_ldst_perspective_div_y = 0x11,
500bf215546Sopenharmony_ci        midgard_op_ldst_perspective_div_z = 0x12,
501bf215546Sopenharmony_ci        midgard_op_ldst_perspective_div_w = 0x13,
502bf215546Sopenharmony_ci
503bf215546Sopenharmony_ci        /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */
504bf215546Sopenharmony_ci        midgard_op_atomic_add = 0x40,
505bf215546Sopenharmony_ci        midgard_op_atomic_add64 = 0x41,
506bf215546Sopenharmony_ci        midgard_op_atomic_add_be = 0x42,
507bf215546Sopenharmony_ci        midgard_op_atomic_add64_be = 0x43,
508bf215546Sopenharmony_ci
509bf215546Sopenharmony_ci        midgard_op_atomic_and = 0x44,
510bf215546Sopenharmony_ci        midgard_op_atomic_and64 = 0x45,
511bf215546Sopenharmony_ci        midgard_op_atomic_and_be = 0x46,
512bf215546Sopenharmony_ci        midgard_op_atomic_and64_be = 0x47,
513bf215546Sopenharmony_ci        midgard_op_atomic_or = 0x48,
514bf215546Sopenharmony_ci        midgard_op_atomic_or64 = 0x49,
515bf215546Sopenharmony_ci        midgard_op_atomic_or_be = 0x4A,
516bf215546Sopenharmony_ci        midgard_op_atomic_or64_be = 0x4B,
517bf215546Sopenharmony_ci        midgard_op_atomic_xor = 0x4C,
518bf215546Sopenharmony_ci        midgard_op_atomic_xor64 = 0x4D,
519bf215546Sopenharmony_ci        midgard_op_atomic_xor_be = 0x4E,
520bf215546Sopenharmony_ci        midgard_op_atomic_xor64_be = 0x4F,
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci        midgard_op_atomic_imin = 0x50,
523bf215546Sopenharmony_ci        midgard_op_atomic_imin64 = 0x51,
524bf215546Sopenharmony_ci        midgard_op_atomic_imin_be = 0x52,
525bf215546Sopenharmony_ci        midgard_op_atomic_imin64_be = 0x53,
526bf215546Sopenharmony_ci        midgard_op_atomic_umin = 0x54,
527bf215546Sopenharmony_ci        midgard_op_atomic_umin64 = 0x55,
528bf215546Sopenharmony_ci        midgard_op_atomic_umin_be = 0x56,
529bf215546Sopenharmony_ci        midgard_op_atomic_umin64_be = 0x57,
530bf215546Sopenharmony_ci        midgard_op_atomic_imax = 0x58,
531bf215546Sopenharmony_ci        midgard_op_atomic_imax64 = 0x59,
532bf215546Sopenharmony_ci        midgard_op_atomic_imax_be = 0x5A,
533bf215546Sopenharmony_ci        midgard_op_atomic_imax64_be = 0x5B,
534bf215546Sopenharmony_ci        midgard_op_atomic_umax = 0x5C,
535bf215546Sopenharmony_ci        midgard_op_atomic_umax64 = 0x5D,
536bf215546Sopenharmony_ci        midgard_op_atomic_umax_be = 0x5E,
537bf215546Sopenharmony_ci        midgard_op_atomic_umax64_be = 0x5F,
538bf215546Sopenharmony_ci
539bf215546Sopenharmony_ci        midgard_op_atomic_xchg = 0x60,
540bf215546Sopenharmony_ci        midgard_op_atomic_xchg64 = 0x61,
541bf215546Sopenharmony_ci        midgard_op_atomic_xchg_be = 0x62,
542bf215546Sopenharmony_ci        midgard_op_atomic_xchg64_be = 0x63,
543bf215546Sopenharmony_ci
544bf215546Sopenharmony_ci        midgard_op_atomic_cmpxchg = 0x64,
545bf215546Sopenharmony_ci        midgard_op_atomic_cmpxchg64 = 0x65,
546bf215546Sopenharmony_ci        midgard_op_atomic_cmpxchg_be = 0x66,
547bf215546Sopenharmony_ci        midgard_op_atomic_cmpxchg64_be = 0x67,
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci        /* Used for compute shader's __global arguments, __local
550bf215546Sopenharmony_ci         * variables (or for register spilling) */
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci        midgard_op_ld_u8         = 0x80, /* zero extends */
553bf215546Sopenharmony_ci        midgard_op_ld_i8         = 0x81, /* sign extends */
554bf215546Sopenharmony_ci        midgard_op_ld_u16        = 0x84, /* zero extends */
555bf215546Sopenharmony_ci        midgard_op_ld_i16        = 0x85, /* sign extends */
556bf215546Sopenharmony_ci        midgard_op_ld_u16_be     = 0x86, /* zero extends, big endian */
557bf215546Sopenharmony_ci        midgard_op_ld_i16_be     = 0x87, /* sign extends, big endian */
558bf215546Sopenharmony_ci        midgard_op_ld_32         = 0x88, /* short2, int, float */
559bf215546Sopenharmony_ci        midgard_op_ld_32_bswap2  = 0x89, /* 16-bit big endian vector */
560bf215546Sopenharmony_ci        midgard_op_ld_32_bswap4  = 0x8A, /* 32-bit big endian scalar */
561bf215546Sopenharmony_ci        midgard_op_ld_64         = 0x8C, /* int2, float2, long */
562bf215546Sopenharmony_ci        midgard_op_ld_64_bswap2  = 0x8D, /* 16-bit big endian vector */
563bf215546Sopenharmony_ci        midgard_op_ld_64_bswap4  = 0x8E, /* 32-bit big endian vector */
564bf215546Sopenharmony_ci        midgard_op_ld_64_bswap8  = 0x8F, /* 64-bit big endian scalar */
565bf215546Sopenharmony_ci        midgard_op_ld_128        = 0x90, /* float4, long2 */
566bf215546Sopenharmony_ci        midgard_op_ld_128_bswap2 = 0x91, /* 16-bit big endian vector */
567bf215546Sopenharmony_ci        midgard_op_ld_128_bswap4 = 0x92, /* 32-bit big endian vector */
568bf215546Sopenharmony_ci        midgard_op_ld_128_bswap8 = 0x93, /* 64-bit big endian vector */
569bf215546Sopenharmony_ci
570bf215546Sopenharmony_ci        midgard_op_ld_attr_32 = 0x94,
571bf215546Sopenharmony_ci        midgard_op_ld_attr_16 = 0x95,
572bf215546Sopenharmony_ci        midgard_op_ld_attr_32u = 0x96,
573bf215546Sopenharmony_ci        midgard_op_ld_attr_32i = 0x97,
574bf215546Sopenharmony_ci        midgard_op_ld_vary_32 = 0x98,
575bf215546Sopenharmony_ci        midgard_op_ld_vary_16 = 0x99,
576bf215546Sopenharmony_ci        midgard_op_ld_vary_32u = 0x9A,
577bf215546Sopenharmony_ci        midgard_op_ld_vary_32i = 0x9B,
578bf215546Sopenharmony_ci
579bf215546Sopenharmony_ci        /* This instruction behaves differently depending if the gpu is a v4
580bf215546Sopenharmony_ci         * or a newer gpu. The main difference hinges on which values of the
581bf215546Sopenharmony_ci         * second argument are valid for each gpu.
582bf215546Sopenharmony_ci         * TODO: properly document and decode each possible value for the
583bf215546Sopenharmony_ci         * second argument. */
584bf215546Sopenharmony_ci        midgard_op_ld_special_32f = 0x9C,
585bf215546Sopenharmony_ci        midgard_op_ld_special_16f = 0x9D,
586bf215546Sopenharmony_ci        midgard_op_ld_special_32u = 0x9E,
587bf215546Sopenharmony_ci        midgard_op_ld_special_32i = 0x9F,
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci        /* The distinction between these ops is the alignment
590bf215546Sopenharmony_ci         * requirement / accompanying shift. Thus, the offset to
591bf215546Sopenharmony_ci         * ld_ubo_128 is in 16-byte units and can load 128-bit. The
592bf215546Sopenharmony_ci         * offset to ld_ubo_64 is in 8-byte units; ld_ubo_32 in 4-byte
593bf215546Sopenharmony_ci         * units. */
594bf215546Sopenharmony_ci        midgard_op_ld_ubo_u8         = 0xA0, /* theoretical */
595bf215546Sopenharmony_ci        midgard_op_ld_ubo_i8         = 0xA1, /* theoretical */
596bf215546Sopenharmony_ci        midgard_op_ld_ubo_u16        = 0xA4, /* theoretical */
597bf215546Sopenharmony_ci        midgard_op_ld_ubo_i16        = 0xA5, /* theoretical */
598bf215546Sopenharmony_ci        midgard_op_ld_ubo_u16_be     = 0xA6, /* theoretical */
599bf215546Sopenharmony_ci        midgard_op_ld_ubo_i16_be     = 0xA7, /* theoretical */
600bf215546Sopenharmony_ci        midgard_op_ld_ubo_32         = 0xA8,
601bf215546Sopenharmony_ci        midgard_op_ld_ubo_32_bswap2  = 0xA9,
602bf215546Sopenharmony_ci        midgard_op_ld_ubo_32_bswap4  = 0xAA,
603bf215546Sopenharmony_ci        midgard_op_ld_ubo_64         = 0xAC,
604bf215546Sopenharmony_ci        midgard_op_ld_ubo_64_bswap2  = 0xAD,
605bf215546Sopenharmony_ci        midgard_op_ld_ubo_64_bswap4  = 0xAE,
606bf215546Sopenharmony_ci        midgard_op_ld_ubo_64_bswap8  = 0xAF,
607bf215546Sopenharmony_ci        midgard_op_ld_ubo_128        = 0xB0,
608bf215546Sopenharmony_ci        midgard_op_ld_ubo_128_bswap2 = 0xB1,
609bf215546Sopenharmony_ci        midgard_op_ld_ubo_128_bswap4 = 0xB2,
610bf215546Sopenharmony_ci        midgard_op_ld_ubo_128_bswap8 = 0xB3,
611bf215546Sopenharmony_ci
612bf215546Sopenharmony_ci        midgard_op_ld_image_32f = 0xB4,
613bf215546Sopenharmony_ci        midgard_op_ld_image_16f = 0xB5,
614bf215546Sopenharmony_ci        midgard_op_ld_image_32u = 0xB6,
615bf215546Sopenharmony_ci        midgard_op_ld_image_32i = 0xB7,
616bf215546Sopenharmony_ci
617bf215546Sopenharmony_ci        /* Only works on v5 or newer.
618bf215546Sopenharmony_ci         * Older cards must use ld_special with tilebuffer selectors. */
619bf215546Sopenharmony_ci        midgard_op_ld_tilebuffer_32f = 0xB8,
620bf215546Sopenharmony_ci        midgard_op_ld_tilebuffer_16f = 0xB9,
621bf215546Sopenharmony_ci        midgard_op_ld_tilebuffer_raw = 0xBA,
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci        midgard_op_st_u8         = 0xC0, /* zero extends */
624bf215546Sopenharmony_ci        midgard_op_st_i8         = 0xC1, /* sign extends */
625bf215546Sopenharmony_ci        midgard_op_st_u16        = 0xC4, /* zero extends */
626bf215546Sopenharmony_ci        midgard_op_st_i16        = 0xC5, /* sign extends */
627bf215546Sopenharmony_ci        midgard_op_st_u16_be     = 0xC6, /* zero extends, big endian */
628bf215546Sopenharmony_ci        midgard_op_st_i16_be     = 0xC7, /* sign extends, big endian */
629bf215546Sopenharmony_ci        midgard_op_st_32         = 0xC8, /* short2, int, float */
630bf215546Sopenharmony_ci        midgard_op_st_32_bswap2  = 0xC9, /* 16-bit big endian vector */
631bf215546Sopenharmony_ci        midgard_op_st_32_bswap4  = 0xCA, /* 32-bit big endian scalar */
632bf215546Sopenharmony_ci        midgard_op_st_64         = 0xCC, /* int2, float2, long */
633bf215546Sopenharmony_ci        midgard_op_st_64_bswap2  = 0xCD, /* 16-bit big endian vector */
634bf215546Sopenharmony_ci        midgard_op_st_64_bswap4  = 0xCE, /* 32-bit big endian vector */
635bf215546Sopenharmony_ci        midgard_op_st_64_bswap8  = 0xCF, /* 64-bit big endian scalar */
636bf215546Sopenharmony_ci        midgard_op_st_128        = 0xD0, /* float4, long2 */
637bf215546Sopenharmony_ci        midgard_op_st_128_bswap2 = 0xD1, /* 16-bit big endian vector */
638bf215546Sopenharmony_ci        midgard_op_st_128_bswap4 = 0xD2, /* 32-bit big endian vector */
639bf215546Sopenharmony_ci        midgard_op_st_128_bswap8 = 0xD3, /* 64-bit big endian vector */
640bf215546Sopenharmony_ci
641bf215546Sopenharmony_ci        midgard_op_st_vary_32 = 0xD4,
642bf215546Sopenharmony_ci        midgard_op_st_vary_16 = 0xD5,
643bf215546Sopenharmony_ci        midgard_op_st_vary_32u = 0xD6,
644bf215546Sopenharmony_ci        midgard_op_st_vary_32i = 0xD7,
645bf215546Sopenharmony_ci
646bf215546Sopenharmony_ci        /* Value to st in r27, location r26.w as short2 */
647bf215546Sopenharmony_ci        midgard_op_st_image_32f = 0xD8,
648bf215546Sopenharmony_ci        midgard_op_st_image_16f = 0xD9,
649bf215546Sopenharmony_ci        midgard_op_st_image_32u = 0xDA,
650bf215546Sopenharmony_ci        midgard_op_st_image_32i = 0xDB,
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci        midgard_op_st_special_32f = 0xDC,
653bf215546Sopenharmony_ci        midgard_op_st_special_16f = 0xDD,
654bf215546Sopenharmony_ci        midgard_op_st_special_32u = 0xDE,
655bf215546Sopenharmony_ci        midgard_op_st_special_32i = 0xDF,
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci        /* Only works on v5 or newer.
658bf215546Sopenharmony_ci         * Older cards must use ld_special with tilebuffer selectors. */
659bf215546Sopenharmony_ci        midgard_op_st_tilebuffer_32f = 0xE8,
660bf215546Sopenharmony_ci        midgard_op_st_tilebuffer_16f = 0xE9,
661bf215546Sopenharmony_ci        midgard_op_st_tilebuffer_raw = 0xEA,
662bf215546Sopenharmony_ci        midgard_op_trap = 0xFC,
663bf215546Sopenharmony_ci} midgard_load_store_op;
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_citypedef enum {
666bf215546Sopenharmony_ci        midgard_interp_sample = 0,
667bf215546Sopenharmony_ci        midgard_interp_centroid = 1,
668bf215546Sopenharmony_ci        midgard_interp_default = 2
669bf215546Sopenharmony_ci} midgard_interpolation;
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_citypedef enum {
672bf215546Sopenharmony_ci        midgard_varying_mod_none = 0,
673bf215546Sopenharmony_ci
674bf215546Sopenharmony_ci        /* Take the would-be result and divide all components by its y/z/w
675bf215546Sopenharmony_ci         * (perspective division baked in with the load)  */
676bf215546Sopenharmony_ci        midgard_varying_mod_perspective_y = 1,
677bf215546Sopenharmony_ci        midgard_varying_mod_perspective_z = 2,
678bf215546Sopenharmony_ci        midgard_varying_mod_perspective_w = 3,
679bf215546Sopenharmony_ci
680bf215546Sopenharmony_ci        /* The result is a 64-bit cubemap descriptor to use with
681bf215546Sopenharmony_ci         * midgard_tex_op_normal or midgard_tex_op_gradient */
682bf215546Sopenharmony_ci        midgard_varying_mod_cubemap = 4,
683bf215546Sopenharmony_ci} midgard_varying_modifier;
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_citypedef struct
686bf215546Sopenharmony_ci__attribute__((__packed__))
687bf215546Sopenharmony_ci{
688bf215546Sopenharmony_ci        midgard_varying_modifier modifier : 3;
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci        bool flat_shading : 1;
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci        /* These are ignored if flat_shading is enabled. */
693bf215546Sopenharmony_ci        bool perspective_correction : 1;
694bf215546Sopenharmony_ci        bool centroid_mapping : 1;
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_ci        /* This is ignored if the shader only runs once per pixel. */
697bf215546Sopenharmony_ci        bool interpolate_sample : 1;
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci        bool zero0 : 1; /* Always zero */
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci        unsigned direct_sample_pos_x : 4;
702bf215546Sopenharmony_ci        unsigned direct_sample_pos_y : 4;
703bf215546Sopenharmony_ci}
704bf215546Sopenharmony_cimidgard_varying_params;
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci/* 8-bit register/etc selector for load/store ops */
707bf215546Sopenharmony_citypedef struct
708bf215546Sopenharmony_ci__attribute__((__packed__))
709bf215546Sopenharmony_ci{
710bf215546Sopenharmony_ci        /* Indexes into the register */
711bf215546Sopenharmony_ci        unsigned component : 2;
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci        /* Register select between r26/r27 */
714bf215546Sopenharmony_ci        unsigned select : 1;
715bf215546Sopenharmony_ci
716bf215546Sopenharmony_ci        unsigned unknown : 2;
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci        /* Like any good Arm instruction set, load/store arguments can be
719bf215546Sopenharmony_ci         * implicitly left-shifted... but only the second argument. Zero for no
720bf215546Sopenharmony_ci         * shifting, up to <<7 possible though. This is useful for indexing.
721bf215546Sopenharmony_ci         *
722bf215546Sopenharmony_ci         * For the first argument, it's unknown what these bits mean */
723bf215546Sopenharmony_ci        unsigned shift : 3;
724bf215546Sopenharmony_ci}
725bf215546Sopenharmony_cimidgard_ldst_register_select;
726bf215546Sopenharmony_ci
727bf215546Sopenharmony_citypedef enum {
728bf215546Sopenharmony_ci        /* 0 is reserved */
729bf215546Sopenharmony_ci        midgard_index_address_u64 = 1,
730bf215546Sopenharmony_ci        midgard_index_address_u32 = 2,
731bf215546Sopenharmony_ci        midgard_index_address_s32 = 3,
732bf215546Sopenharmony_ci} midgard_index_address_format;
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_citypedef struct
735bf215546Sopenharmony_ci__attribute__((__packed__))
736bf215546Sopenharmony_ci{
737bf215546Sopenharmony_ci        midgard_load_store_op op : 8;
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci        /* Source/dest reg */
740bf215546Sopenharmony_ci        unsigned reg  : 5;
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci        /* Generally is a writemask.
743bf215546Sopenharmony_ci         * For ST_ATTR and ST_TEX, unused.
744bf215546Sopenharmony_ci         * For other stores, each bit masks 1/4th of the output. */
745bf215546Sopenharmony_ci        unsigned mask : 4;
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci        /* Swizzle for stores, but for atomics it encodes also the source
748bf215546Sopenharmony_ci         * register. This fits because atomics dont need a swizzle since they
749bf215546Sopenharmony_ci         * are not vectorized instructions. */
750bf215546Sopenharmony_ci        unsigned swizzle : 8;
751bf215546Sopenharmony_ci
752bf215546Sopenharmony_ci        /* Arg reg, meaning changes according to each opcode */
753bf215546Sopenharmony_ci        unsigned arg_comp : 2;
754bf215546Sopenharmony_ci        unsigned arg_reg  : 3;
755bf215546Sopenharmony_ci
756bf215546Sopenharmony_ci        /* 64-bit address enable
757bf215546Sopenharmony_ci         * 32-bit data type enable for CUBEMAP and perspective div.
758bf215546Sopenharmony_ci         * Explicit indexing enable for LD_ATTR.
759bf215546Sopenharmony_ci         * 64-bit coordinate enable for LD_IMAGE. */
760bf215546Sopenharmony_ci        bool bitsize_toggle : 1;
761bf215546Sopenharmony_ci
762bf215546Sopenharmony_ci        /* These are mainly used for opcodes that have addresses.
763bf215546Sopenharmony_ci         * For cmpxchg, index_reg is used for the comparison value.
764bf215546Sopenharmony_ci         * For ops that access the attrib table, bit 1 encodes which table.
765bf215546Sopenharmony_ci         * For LD_VAR and LD/ST_ATTR, bit 0 enables dest/src type inferral. */
766bf215546Sopenharmony_ci        midgard_index_address_format index_format : 2;
767bf215546Sopenharmony_ci        unsigned index_comp  : 2;
768bf215546Sopenharmony_ci        unsigned index_reg   : 3;
769bf215546Sopenharmony_ci        unsigned index_shift : 4;
770bf215546Sopenharmony_ci
771bf215546Sopenharmony_ci        /* Generaly is a signed offset, but has different bitsize and starts at
772bf215546Sopenharmony_ci         * different bits depending on the opcode, LDST_*_DISPLACEMENT helpers
773bf215546Sopenharmony_ci         * are recommended when packing/unpacking this attribute.
774bf215546Sopenharmony_ci         * For LD_UBO, bit 0 enables ubo index immediate.
775bf215546Sopenharmony_ci         * For LD_TILEBUFFER_RAW, bit 0 disables sample index immediate. */
776bf215546Sopenharmony_ci        int signed_offset : 18;
777bf215546Sopenharmony_ci}
778bf215546Sopenharmony_cimidgard_load_store_word;
779bf215546Sopenharmony_ci
780bf215546Sopenharmony_citypedef struct
781bf215546Sopenharmony_ci__attribute__((__packed__))
782bf215546Sopenharmony_ci{
783bf215546Sopenharmony_ci        unsigned type      : 4;
784bf215546Sopenharmony_ci        unsigned next_type : 4;
785bf215546Sopenharmony_ci        uint64_t word1     : 60;
786bf215546Sopenharmony_ci        uint64_t word2     : 60;
787bf215546Sopenharmony_ci}
788bf215546Sopenharmony_cimidgard_load_store;
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_ci/* 8-bit register selector used in texture ops to select a bias/LOD/gradient
791bf215546Sopenharmony_ci * register, shoved into the `bias` field */
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_citypedef struct
794bf215546Sopenharmony_ci__attribute__((__packed__))
795bf215546Sopenharmony_ci{
796bf215546Sopenharmony_ci        /* 32-bit register, clear for half-register */
797bf215546Sopenharmony_ci        unsigned full : 1;
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_ci        /* Register select between r28/r29 */
800bf215546Sopenharmony_ci        unsigned select : 1;
801bf215546Sopenharmony_ci
802bf215546Sopenharmony_ci        /* For a half-register, selects the upper half */
803bf215546Sopenharmony_ci        unsigned upper : 1;
804bf215546Sopenharmony_ci
805bf215546Sopenharmony_ci        /* Indexes into the register */
806bf215546Sopenharmony_ci        unsigned component : 2;
807bf215546Sopenharmony_ci
808bf215546Sopenharmony_ci        /* Padding to make this 8-bit */
809bf215546Sopenharmony_ci        unsigned zero : 3;
810bf215546Sopenharmony_ci}
811bf215546Sopenharmony_cimidgard_tex_register_select;
812bf215546Sopenharmony_ci
813bf215546Sopenharmony_ci/* Texture pipeline results are in r28-r29 */
814bf215546Sopenharmony_ci#define REG_TEX_BASE 28
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_cienum mali_texture_op {
817bf215546Sopenharmony_ci        /* [texture + LOD bias]
818bf215546Sopenharmony_ci         * If the texture is mipmapped, barriers must be enabled in the
819bf215546Sopenharmony_ci         * instruction word in order for this opcode to compute the output
820bf215546Sopenharmony_ci         * correctly. */
821bf215546Sopenharmony_ci        midgard_tex_op_normal = 1,
822bf215546Sopenharmony_ci
823bf215546Sopenharmony_ci        /* [texture + gradient for LOD and anisotropy]
824bf215546Sopenharmony_ci         * Unlike midgard_tex_op_normal, this opcode does not require barriers
825bf215546Sopenharmony_ci         * to compute the output correctly. */
826bf215546Sopenharmony_ci        midgard_tex_op_gradient = 2,
827bf215546Sopenharmony_ci
828bf215546Sopenharmony_ci        /* [unfiltered texturing]
829bf215546Sopenharmony_ci         * Unlike midgard_tex_op_normal, this opcode does not require barriers
830bf215546Sopenharmony_ci         * to compute the output correctly. */
831bf215546Sopenharmony_ci        midgard_tex_op_fetch = 4,
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci        /* [gradient from derivative] */
834bf215546Sopenharmony_ci        midgard_tex_op_grad_from_derivative = 9,
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_ci        /* [mov] */
837bf215546Sopenharmony_ci        midgard_tex_op_mov = 10,
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_ci        /* [noop]
840bf215546Sopenharmony_ci         * Mostly used for barriers. */
841bf215546Sopenharmony_ci        midgard_tex_op_barrier = 11,
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci        /* [gradient from coords] */
844bf215546Sopenharmony_ci        midgard_tex_op_grad_from_coords = 12,
845bf215546Sopenharmony_ci
846bf215546Sopenharmony_ci        /* [derivative]
847bf215546Sopenharmony_ci         * Computes derivatives in 2x2 fragment blocks. */
848bf215546Sopenharmony_ci        midgard_tex_op_derivative = 13
849bf215546Sopenharmony_ci};
850bf215546Sopenharmony_ci
851bf215546Sopenharmony_cienum mali_sampler_type {
852bf215546Sopenharmony_ci        /* 0 is reserved */
853bf215546Sopenharmony_ci        MALI_SAMPLER_FLOAT      = 0x1, /* sampler */
854bf215546Sopenharmony_ci        MALI_SAMPLER_UNSIGNED   = 0x2, /* usampler */
855bf215546Sopenharmony_ci        MALI_SAMPLER_SIGNED     = 0x3, /* isampler */
856bf215546Sopenharmony_ci};
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci/* Texture modes */
859bf215546Sopenharmony_cienum mali_texture_mode {
860bf215546Sopenharmony_ci        TEXTURE_NORMAL = 1,
861bf215546Sopenharmony_ci        TEXTURE_SHADOW = 5,
862bf215546Sopenharmony_ci        TEXTURE_GATHER_SHADOW = 6,
863bf215546Sopenharmony_ci        TEXTURE_GATHER_X = 8,
864bf215546Sopenharmony_ci        TEXTURE_GATHER_Y = 9,
865bf215546Sopenharmony_ci        TEXTURE_GATHER_Z = 10,
866bf215546Sopenharmony_ci        TEXTURE_GATHER_W = 11,
867bf215546Sopenharmony_ci};
868bf215546Sopenharmony_ci
869bf215546Sopenharmony_cienum mali_derivative_mode {
870bf215546Sopenharmony_ci        TEXTURE_DFDX = 0,
871bf215546Sopenharmony_ci        TEXTURE_DFDY = 1,
872bf215546Sopenharmony_ci};
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_cienum midgard_partial_execution {
875bf215546Sopenharmony_ci        MIDGARD_PARTIAL_EXECUTION_SKIP = 1,
876bf215546Sopenharmony_ci        MIDGARD_PARTIAL_EXECUTION_KILL = 2,
877bf215546Sopenharmony_ci        MIDGARD_PARTIAL_EXECUTION_NONE = 3
878bf215546Sopenharmony_ci};
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_citypedef struct
881bf215546Sopenharmony_ci__attribute__((__packed__))
882bf215546Sopenharmony_ci{
883bf215546Sopenharmony_ci        unsigned type      : 4;
884bf215546Sopenharmony_ci        unsigned next_type : 4;
885bf215546Sopenharmony_ci
886bf215546Sopenharmony_ci        enum mali_texture_op op  : 4;
887bf215546Sopenharmony_ci        unsigned mode : 4;
888bf215546Sopenharmony_ci        enum midgard_partial_execution exec : 2;
889bf215546Sopenharmony_ci
890bf215546Sopenharmony_ci        unsigned format : 2;
891bf215546Sopenharmony_ci
892bf215546Sopenharmony_ci        /* Are sampler_handle/texture_handler respectively set by registers? If
893bf215546Sopenharmony_ci         * true, the lower 8-bits of the respective field is a register word.
894bf215546Sopenharmony_ci         * If false, they are an immediate */
895bf215546Sopenharmony_ci
896bf215546Sopenharmony_ci        unsigned sampler_register : 1;
897bf215546Sopenharmony_ci        unsigned texture_register : 1;
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci        /* Is a register used to specify the
900bf215546Sopenharmony_ci         * LOD/bias/offset? If set, use the `bias` field as
901bf215546Sopenharmony_ci         * a register index. If clear, use the `bias` field
902bf215546Sopenharmony_ci         * as an immediate. */
903bf215546Sopenharmony_ci        unsigned lod_register : 1;
904bf215546Sopenharmony_ci
905bf215546Sopenharmony_ci        /* Is a register used to specify an offset? If set, use the
906bf215546Sopenharmony_ci         * offset_reg_* fields to encode this, duplicated for each of the
907bf215546Sopenharmony_ci         * components. If clear, there is implcitly always an immediate offst
908bf215546Sopenharmony_ci         * specificed in offset_imm_* */
909bf215546Sopenharmony_ci        unsigned offset_register : 1;
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci        unsigned in_reg_full  : 1;
912bf215546Sopenharmony_ci        unsigned in_reg_select : 1;
913bf215546Sopenharmony_ci        unsigned in_reg_upper  : 1;
914bf215546Sopenharmony_ci        unsigned in_reg_swizzle : 8;
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci        unsigned unknown8  : 2;
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci        unsigned out_full  : 1;
919bf215546Sopenharmony_ci
920bf215546Sopenharmony_ci        enum mali_sampler_type sampler_type : 2;
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci        unsigned out_reg_select : 1;
923bf215546Sopenharmony_ci        unsigned out_upper : 1;
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci        unsigned mask : 4;
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci        /* Intriguingly, textures can take an outmod just like alu ops. Int
928bf215546Sopenharmony_ci         * outmods are not supported as far as I can tell, so this is only
929bf215546Sopenharmony_ci         * meaningful for float samplers */
930bf215546Sopenharmony_ci        midgard_outmod_float outmod  : 2;
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci        unsigned swizzle  : 8;
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci         /* These indicate how many bundles after this texture op may be
935bf215546Sopenharmony_ci          * executed in parallel with this op. We may execute only ALU and
936bf215546Sopenharmony_ci         * ld/st in parallel (not other textures), and obviously there cannot
937bf215546Sopenharmony_ci         * be any dependency (the blob appears to forbid even accessing other
938bf215546Sopenharmony_ci         * channels of a given texture register). */
939bf215546Sopenharmony_ci
940bf215546Sopenharmony_ci        unsigned out_of_order   : 2;
941bf215546Sopenharmony_ci        unsigned unknown4  : 10;
942bf215546Sopenharmony_ci
943bf215546Sopenharmony_ci        /* In immediate mode, each offset field is an immediate range [0, 7].
944bf215546Sopenharmony_ci         *
945bf215546Sopenharmony_ci         * In register mode, offset_x becomes a register (full, select, upper)
946bf215546Sopenharmony_ci         * triplet followed by a vec3 swizzle is splattered across
947bf215546Sopenharmony_ci         * offset_y/offset_z in a genuinely bizarre way.
948bf215546Sopenharmony_ci         *
949bf215546Sopenharmony_ci         * For texel fetches in immediate mode, the range is the full [-8, 7],
950bf215546Sopenharmony_ci         * but for normal texturing the top bit must be zero and a register
951bf215546Sopenharmony_ci         * used instead. It's not clear where this limitation is from.
952bf215546Sopenharmony_ci         *
953bf215546Sopenharmony_ci         * union {
954bf215546Sopenharmony_ci         *      struct {
955bf215546Sopenharmony_ci         *              signed offset_x  : 4;
956bf215546Sopenharmony_ci         *              signed offset_y  : 4;
957bf215546Sopenharmony_ci         *              signed offset_z  : 4;
958bf215546Sopenharmony_ci         *      } immediate;
959bf215546Sopenharmony_ci         *      struct {
960bf215546Sopenharmony_ci         *              bool full        : 1;
961bf215546Sopenharmony_ci         *              bool select      : 1;
962bf215546Sopenharmony_ci         *              bool upper       : 1;
963bf215546Sopenharmony_ci         *              unsigned swizzle : 8;
964bf215546Sopenharmony_ci         *              unsigned zero    : 1;
965bf215546Sopenharmony_ci         *      } register;
966bf215546Sopenharmony_ci         * }
967bf215546Sopenharmony_ci         */
968bf215546Sopenharmony_ci
969bf215546Sopenharmony_ci        unsigned offset : 12;
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_ci        /* In immediate bias mode, for a normal texture op, this is
972bf215546Sopenharmony_ci         * texture bias, computed as int(2^8 * frac(biasf)), with
973bf215546Sopenharmony_ci         * bias_int = floor(bias). For a textureLod, it's that, but
974bf215546Sopenharmony_ci         * s/bias/lod. For a texel fetch, this is the LOD as-is.
975bf215546Sopenharmony_ci         *
976bf215546Sopenharmony_ci         * In register mode, this is a midgard_tex_register_select
977bf215546Sopenharmony_ci         * structure and bias_int is zero */
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci        unsigned bias : 8;
980bf215546Sopenharmony_ci        signed bias_int  : 8;
981bf215546Sopenharmony_ci
982bf215546Sopenharmony_ci        /* If sampler/texture_register is set, the bottom 8-bits are
983bf215546Sopenharmony_ci         * midgard_tex_register_select and the top 8-bits are zero. If they are
984bf215546Sopenharmony_ci         * clear, they are immediate texture indices */
985bf215546Sopenharmony_ci
986bf215546Sopenharmony_ci        unsigned sampler_handle : 16;
987bf215546Sopenharmony_ci        unsigned texture_handle : 16;
988bf215546Sopenharmony_ci}
989bf215546Sopenharmony_cimidgard_texture_word;
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci/* Technically barriers are texture instructions but it's less work to add them
992bf215546Sopenharmony_ci * as an explicitly zeroed special case, since most fields are forced to go to
993bf215546Sopenharmony_ci * zero */
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_citypedef struct
996bf215546Sopenharmony_ci__attribute__((__packed__))
997bf215546Sopenharmony_ci{
998bf215546Sopenharmony_ci        unsigned type      : 4;
999bf215546Sopenharmony_ci        unsigned next_type : 4;
1000bf215546Sopenharmony_ci
1001bf215546Sopenharmony_ci        /* op = TEXTURE_OP_BARRIER */
1002bf215546Sopenharmony_ci        unsigned op  : 6;
1003bf215546Sopenharmony_ci        unsigned zero1    : 2;
1004bf215546Sopenharmony_ci
1005bf215546Sopenharmony_ci        /* Since helper invocations don't make any sense, these are forced to one */
1006bf215546Sopenharmony_ci        unsigned cont  : 1;
1007bf215546Sopenharmony_ci        unsigned last  : 1;
1008bf215546Sopenharmony_ci        unsigned zero2 : 14;
1009bf215546Sopenharmony_ci
1010bf215546Sopenharmony_ci        unsigned zero3 : 24;
1011bf215546Sopenharmony_ci        unsigned out_of_order : 4;
1012bf215546Sopenharmony_ci        unsigned zero4 : 4;
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci        uint64_t zero5;
1015bf215546Sopenharmony_ci} midgard_texture_barrier_word;
1016bf215546Sopenharmony_ci
1017bf215546Sopenharmony_citypedef union midgard_constants {
1018bf215546Sopenharmony_ci        double f64[2];
1019bf215546Sopenharmony_ci        uint64_t u64[2];
1020bf215546Sopenharmony_ci        int64_t i64[2];
1021bf215546Sopenharmony_ci        float f32[4];
1022bf215546Sopenharmony_ci        uint32_t u32[4];
1023bf215546Sopenharmony_ci        int32_t i32[4];
1024bf215546Sopenharmony_ci        uint16_t f16[8];
1025bf215546Sopenharmony_ci        uint16_t u16[8];
1026bf215546Sopenharmony_ci        int16_t i16[8];
1027bf215546Sopenharmony_ci        uint8_t u8[16];
1028bf215546Sopenharmony_ci        int8_t i8[16];
1029bf215546Sopenharmony_ci}
1030bf215546Sopenharmony_cimidgard_constants;
1031bf215546Sopenharmony_ci
1032bf215546Sopenharmony_cienum midgard_roundmode {
1033bf215546Sopenharmony_ci        MIDGARD_RTE = 0x0, /* round to even */
1034bf215546Sopenharmony_ci        MIDGARD_RTZ = 0x1, /* round to zero */
1035bf215546Sopenharmony_ci        MIDGARD_RTN = 0x2, /* round to negative */
1036bf215546Sopenharmony_ci        MIDGARD_RTP = 0x3, /* round to positive */
1037bf215546Sopenharmony_ci};
1038bf215546Sopenharmony_ci
1039bf215546Sopenharmony_ci#endif
1040