1bf215546Sopenharmony_ci"""
2bf215546Sopenharmony_ciCopyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3bf215546Sopenharmony_ci
4bf215546Sopenharmony_ciPermission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_cicopy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_cito deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_cithe rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ciand/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ciSoftware is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci
11bf215546Sopenharmony_ciThe above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ciparagraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ciSoftware.
14bf215546Sopenharmony_ci
15bf215546Sopenharmony_ciTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ciIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ciFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ciTHE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ciLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ciOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ciSOFTWARE.
22bf215546Sopenharmony_ci"""
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ciopcodes = {}
25bf215546Sopenharmony_ciimmediates = {}
26bf215546Sopenharmony_cienums = {}
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ciclass Opcode(object):
29bf215546Sopenharmony_ci   def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32):
30bf215546Sopenharmony_ci      self.name = name
31bf215546Sopenharmony_ci      self.dests = dests
32bf215546Sopenharmony_ci      self.srcs = srcs
33bf215546Sopenharmony_ci      self.imms = imms
34bf215546Sopenharmony_ci      self.is_float = is_float
35bf215546Sopenharmony_ci      self.can_eliminate = can_eliminate
36bf215546Sopenharmony_ci      self.encoding_16 = encoding_16
37bf215546Sopenharmony_ci      self.encoding_32 = encoding_32
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_ciclass Immediate(object):
40bf215546Sopenharmony_ci   def __init__(self, name, ctype):
41bf215546Sopenharmony_ci      self.name = name
42bf215546Sopenharmony_ci      self.ctype = ctype
43bf215546Sopenharmony_ci
44bf215546Sopenharmony_ciclass Encoding(object):
45bf215546Sopenharmony_ci   def __init__(self, description):
46bf215546Sopenharmony_ci      (exact, mask, length_short, length_long) = description
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci      # Convenience
49bf215546Sopenharmony_ci      if length_long is None:
50bf215546Sopenharmony_ci         length_long = length_short
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci      self.exact = exact
53bf215546Sopenharmony_ci      self.mask = mask
54bf215546Sopenharmony_ci      self.length_short = length_short
55bf215546Sopenharmony_ci      self.extensible = length_short != length_long
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci      if self.extensible:
58bf215546Sopenharmony_ci         assert(length_long == length_short + (4 if length_short > 8 else 2))
59bf215546Sopenharmony_ci
60bf215546Sopenharmony_cidef op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None):
61bf215546Sopenharmony_ci   encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None
62bf215546Sopenharmony_ci   encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32)
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_cidef immediate(name, ctype = "uint32_t"):
67bf215546Sopenharmony_ci   imm = Immediate(name, ctype)
68bf215546Sopenharmony_ci   immediates[name] = imm
69bf215546Sopenharmony_ci   return imm
70bf215546Sopenharmony_ci
71bf215546Sopenharmony_cidef enum(name, value_dict):
72bf215546Sopenharmony_ci   enums[name] = value_dict
73bf215546Sopenharmony_ci   return immediate(name, "enum agx_" + name)
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ciL = (1 << 15)
76bf215546Sopenharmony_ci_ = None
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ciFORMAT = immediate("format", "enum agx_format")
79bf215546Sopenharmony_ciIMM = immediate("imm")
80bf215546Sopenharmony_ciWRITEOUT = immediate("writeout")
81bf215546Sopenharmony_ciINDEX = immediate("index")
82bf215546Sopenharmony_ciCOMPONENT = immediate("component")
83bf215546Sopenharmony_ciCHANNELS = immediate("channels")
84bf215546Sopenharmony_ciTRUTH_TABLE = immediate("truth_table")
85bf215546Sopenharmony_ciROUND = immediate("round", "enum agx_round")
86bf215546Sopenharmony_ciSHIFT = immediate("shift")
87bf215546Sopenharmony_ciMASK = immediate("mask")
88bf215546Sopenharmony_ciBFI_MASK = immediate("bfi_mask")
89bf215546Sopenharmony_ciLOD_MODE = immediate("lod_mode", "enum agx_lod_mode")
90bf215546Sopenharmony_ciDIM = immediate("dim", "enum agx_dim")
91bf215546Sopenharmony_ciSCOREBOARD = immediate("scoreboard")
92bf215546Sopenharmony_ciICOND = immediate("icond", "enum agx_icond")
93bf215546Sopenharmony_ciFCOND = immediate("fcond", "enum agx_fcond")
94bf215546Sopenharmony_ciNEST = immediate("nest")
95bf215546Sopenharmony_ciINVERT_COND = immediate("invert_cond")
96bf215546Sopenharmony_ciNEST = immediate("nest")
97bf215546Sopenharmony_ciTARGET = immediate("target", "agx_block *")
98bf215546Sopenharmony_ciPERSPECTIVE = immediate("perspective", "bool")
99bf215546Sopenharmony_ciSR = enum("sr", {
100bf215546Sopenharmony_ci   0:  'threadgroup_position_in_grid.x',
101bf215546Sopenharmony_ci   1:  'threadgroup_position_in_grid.y',
102bf215546Sopenharmony_ci   2:  'threadgroup_position_in_grid.z',
103bf215546Sopenharmony_ci   4:  'threads_per_threadgroup.x',
104bf215546Sopenharmony_ci   5:  'threads_per_threadgroup.y',
105bf215546Sopenharmony_ci   6:  'threads_per_threadgroup.z',
106bf215546Sopenharmony_ci   8:  'dispatch_threads_per_threadgroup.x',
107bf215546Sopenharmony_ci   9:  'dispatch_threads_per_threadgroup.y',
108bf215546Sopenharmony_ci   10: 'dispatch_threads_per_threadgroup.z',
109bf215546Sopenharmony_ci   48: 'thread_position_in_threadgroup.x',
110bf215546Sopenharmony_ci   49: 'thread_position_in_threadgroup.y',
111bf215546Sopenharmony_ci   50: 'thread_position_in_threadgroup.z',
112bf215546Sopenharmony_ci   51: 'thread_index_in_threadgroup',
113bf215546Sopenharmony_ci   52: 'thread_index_in_subgroup',
114bf215546Sopenharmony_ci   53: 'subgroup_index_in_threadgroup',
115bf215546Sopenharmony_ci   56: 'active_thread_index_in_quad',
116bf215546Sopenharmony_ci   58: 'active_thread_index_in_subgroup',
117bf215546Sopenharmony_ci   62: 'backfacing',
118bf215546Sopenharmony_ci   80: 'thread_position_in_grid.x',
119bf215546Sopenharmony_ci   81: 'thread_position_in_grid.y',
120bf215546Sopenharmony_ci   82: 'thread_position_in_grid.z',
121bf215546Sopenharmony_ci})
122bf215546Sopenharmony_ci
123bf215546Sopenharmony_ciFUNOP = lambda x: (x << 28)
124bf215546Sopenharmony_ciFUNOP_MASK = FUNOP((1 << 14) - 1)
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_cidef funop(name, opcode):
127bf215546Sopenharmony_ci   op(name, (0x0A | L | (opcode << 28),
128bf215546Sopenharmony_ci      0x3F | L | (((1 << 14) - 1) << 28), 6, _),
129bf215546Sopenharmony_ci      srcs = 1, is_float = True)
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci# Listing of opcodes
132bf215546Sopenharmony_cifunop("floor",     0b000000)
133bf215546Sopenharmony_cifunop("srsqrt",    0b000001)
134bf215546Sopenharmony_cifunop("dfdx",      0b000100)
135bf215546Sopenharmony_cifunop("dfdy",      0b000110)
136bf215546Sopenharmony_cifunop("rcp",       0b001000)
137bf215546Sopenharmony_cifunop("rsqrt",     0b001001)
138bf215546Sopenharmony_cifunop("sin_pt_1",  0b001010)
139bf215546Sopenharmony_cifunop("log2",      0b001100)
140bf215546Sopenharmony_cifunop("exp2",      0b001101)
141bf215546Sopenharmony_cifunop("sin_pt_2",  0b001110)
142bf215546Sopenharmony_cifunop("ceil",      0b010000)
143bf215546Sopenharmony_cifunop("trunc",     0b100000)
144bf215546Sopenharmony_cifunop("roundeven", 0b110000)
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ciop("fadd",
147bf215546Sopenharmony_ci      encoding_16 = (0x26 | L, 0x3F | L, 6, _),
148bf215546Sopenharmony_ci      encoding_32 = (0x2A | L, 0x3F | L, 6, _),
149bf215546Sopenharmony_ci      srcs = 2, is_float = True)
150bf215546Sopenharmony_ci
151bf215546Sopenharmony_ciop("fma",
152bf215546Sopenharmony_ci      encoding_16 = (0x36, 0x3F, 6, 8),
153bf215546Sopenharmony_ci      encoding_32 = (0x3A, 0x3F, 6, 8),
154bf215546Sopenharmony_ci      srcs = 3, is_float = True)
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ciop("fmul",
157bf215546Sopenharmony_ci      encoding_16 = ((0x16 | L), (0x3F | L), 6, _),
158bf215546Sopenharmony_ci      encoding_32 = ((0x1A | L), (0x3F | L), 6, _),
159bf215546Sopenharmony_ci      srcs = 2, is_float = True)
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ciop("mov_imm",
162bf215546Sopenharmony_ci      encoding_32 = (0x62, 0xFF, 6, 8),
163bf215546Sopenharmony_ci      encoding_16 = (0x62, 0xFF, 4, 6),
164bf215546Sopenharmony_ci      imms = [IMM])
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ciop("iadd",
167bf215546Sopenharmony_ci      encoding_32 = (0x0E, 0x3F | L, 8, _),
168bf215546Sopenharmony_ci      srcs = 2, imms = [SHIFT])
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ciop("imad",
171bf215546Sopenharmony_ci      encoding_32 = (0x1E, 0x3F | L, 8, _),
172bf215546Sopenharmony_ci      srcs = 3, imms = [SHIFT])
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ciop("bfi",
175bf215546Sopenharmony_ci      encoding_32 = (0x2E, 0x7F | (0x3 << 26), 8, _),
176bf215546Sopenharmony_ci      srcs = 3, imms = [BFI_MASK])
177bf215546Sopenharmony_ci
178bf215546Sopenharmony_ciop("bfeil",
179bf215546Sopenharmony_ci      encoding_32 = (0x2E | L, 0x7F | L | (0x3 << 26), 8, _),
180bf215546Sopenharmony_ci      srcs = 3, imms = [BFI_MASK])
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ciop("asr",
183bf215546Sopenharmony_ci      encoding_32 = (0x2E | L | (0x1 << 26), 0x7F | L | (0x3 << 26), 8, _),
184bf215546Sopenharmony_ci      srcs = 2)
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ciop("icmpsel",
187bf215546Sopenharmony_ci      encoding_32 = (0x12, 0x7F, 8, 10),
188bf215546Sopenharmony_ci      srcs = 4, imms = [ICOND])
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ciop("fcmpsel",
191bf215546Sopenharmony_ci      encoding_32 = (0x02, 0x7F, 8, 10),
192bf215546Sopenharmony_ci      srcs = 4, imms = [FCOND])
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci# sources are coordinates, LOD, texture, sampler, offset
195bf215546Sopenharmony_ci# TODO: anything else?
196bf215546Sopenharmony_ciop("texture_sample",
197bf215546Sopenharmony_ci      encoding_32 = (0x32, 0x7F, 8, 10), # XXX WRONG SIZE
198bf215546Sopenharmony_ci      srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD])
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci# sources are base, index
201bf215546Sopenharmony_ciop("device_load",
202bf215546Sopenharmony_ci      encoding_32 = (0x05, 0x7F, 6, 8),
203bf215546Sopenharmony_ci      srcs = 2, imms = [FORMAT, MASK, SCOREBOARD])
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ciop("wait", (0x38, 0xFF, 2, _), dests = 0,
206bf215546Sopenharmony_ci      can_eliminate = False, imms = [SCOREBOARD])
207bf215546Sopenharmony_ci
208bf215546Sopenharmony_ciop("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR])
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ciop("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False)
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci# Essentially same encoding
213bf215546Sopenharmony_ciop("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0,
214bf215546Sopenharmony_ci      can_eliminate = False, imms = [FORMAT])
215bf215546Sopenharmony_ci
216bf215546Sopenharmony_ciop("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1,
217bf215546Sopenharmony_ci      can_eliminate = False, imms = [FORMAT])
218bf215546Sopenharmony_ci
219bf215546Sopenharmony_cifor (name, exact) in [("any", 0xC000), ("none", 0xC200)]:
220bf215546Sopenharmony_ci   op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0,
221bf215546Sopenharmony_ci         can_eliminate = False, imms = [TARGET])
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci# TODO: model implicit r0l destinations
224bf215546Sopenharmony_ciop("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _),
225bf215546Sopenharmony_ci      dests = 0, srcs = 0, can_eliminate = False, imms = [NEST])
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_cifor is_float in [False, True]:
228bf215546Sopenharmony_ci   mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38)
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_ci   for (cf, cf_op) in [("if", 0), ("else", 1), ("while", 2)]:
231bf215546Sopenharmony_ci      name = "{}_{}cmp".format(cf, "f" if is_float else "i")
232bf215546Sopenharmony_ci      exact = 0x42 | (0x0 if is_float else 0x10) | (cf_op << 9)
233bf215546Sopenharmony_ci      mask = 0x7F | (0x3 << 9) | mod_mask | (0x3 << 44)
234bf215546Sopenharmony_ci      imms = [NEST, FCOND if is_float else ICOND, INVERT_COND]
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci      op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False,
237bf215546Sopenharmony_ci            imms = imms, is_float = is_float)
238bf215546Sopenharmony_ci
239bf215546Sopenharmony_ciop("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE])
240bf215546Sopenharmony_ciop("convert", (0x3E | L, 0x7F | L | (0x3 << 38), 6, _), srcs = 2, imms = [ROUND])
241bf215546Sopenharmony_ciop("ld_vary", (0x21, 0xBF, 8, _), srcs = 1, imms = [CHANNELS, PERSPECTIVE])
242bf215546Sopenharmony_ciop("ld_vary_flat", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS])
243bf215546Sopenharmony_ciop("st_vary", None, dests = 0, srcs = 2, can_eliminate = False)
244bf215546Sopenharmony_ciop("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
245bf215546Sopenharmony_ciop("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False)
246bf215546Sopenharmony_ciop("writeout", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False)
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ci# Convenient aliases.
249bf215546Sopenharmony_ciop("mov", _, srcs = 1)
250bf215546Sopenharmony_ciop("not", _, srcs = 1)
251bf215546Sopenharmony_ciop("xor", _, srcs = 2)
252bf215546Sopenharmony_ciop("and", _, srcs = 2)
253bf215546Sopenharmony_ciop("or", _, srcs = 2)
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci# Indicates the logical end of the block, before final branches/control flow
256bf215546Sopenharmony_ciop("p_logical_end", _, dests = 0, srcs = 0, can_eliminate = False)
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ciop("p_combine", _, srcs = 4)
259bf215546Sopenharmony_ciop("p_split", _, srcs = 1, dests = 4)
260bf215546Sopenharmony_ciop("p_extract", _, srcs = 1, imms = [COMPONENT])
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci# Phis are special-cased in the IR as they (uniquely) can take an unbounded
263bf215546Sopenharmony_ci# number of source.
264bf215546Sopenharmony_ciop("phi", _, srcs = 0)
265