1bf215546Sopenharmony_ci""" 2bf215546Sopenharmony_ciCopyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3bf215546Sopenharmony_ci 4bf215546Sopenharmony_ciPermission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_cicopy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_cito deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_cithe rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ciand/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ciSoftware is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci 11bf215546Sopenharmony_ciThe above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ciparagraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ciSoftware. 14bf215546Sopenharmony_ci 15bf215546Sopenharmony_ciTHE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ciIMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ciFITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ciTHE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ciLIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ciOUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ciSOFTWARE. 22bf215546Sopenharmony_ci""" 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ciopcodes = {} 25bf215546Sopenharmony_ciimmediates = {} 26bf215546Sopenharmony_cienums = {} 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ciclass Opcode(object): 29bf215546Sopenharmony_ci def __init__(self, name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32): 30bf215546Sopenharmony_ci self.name = name 31bf215546Sopenharmony_ci self.dests = dests 32bf215546Sopenharmony_ci self.srcs = srcs 33bf215546Sopenharmony_ci self.imms = imms 34bf215546Sopenharmony_ci self.is_float = is_float 35bf215546Sopenharmony_ci self.can_eliminate = can_eliminate 36bf215546Sopenharmony_ci self.encoding_16 = encoding_16 37bf215546Sopenharmony_ci self.encoding_32 = encoding_32 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ciclass Immediate(object): 40bf215546Sopenharmony_ci def __init__(self, name, ctype): 41bf215546Sopenharmony_ci self.name = name 42bf215546Sopenharmony_ci self.ctype = ctype 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ciclass Encoding(object): 45bf215546Sopenharmony_ci def __init__(self, description): 46bf215546Sopenharmony_ci (exact, mask, length_short, length_long) = description 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci # Convenience 49bf215546Sopenharmony_ci if length_long is None: 50bf215546Sopenharmony_ci length_long = length_short 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci self.exact = exact 53bf215546Sopenharmony_ci self.mask = mask 54bf215546Sopenharmony_ci self.length_short = length_short 55bf215546Sopenharmony_ci self.extensible = length_short != length_long 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci if self.extensible: 58bf215546Sopenharmony_ci assert(length_long == length_short + (4 if length_short > 8 else 2)) 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_cidef op(name, encoding_32, dests = 1, srcs = 0, imms = [], is_float = False, can_eliminate = True, encoding_16 = None): 61bf215546Sopenharmony_ci encoding_16 = Encoding(encoding_16) if encoding_16 is not None else None 62bf215546Sopenharmony_ci encoding_32 = Encoding(encoding_32) if encoding_32 is not None else None 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci opcodes[name] = Opcode(name, dests, srcs, imms, is_float, can_eliminate, encoding_16, encoding_32) 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_cidef immediate(name, ctype = "uint32_t"): 67bf215546Sopenharmony_ci imm = Immediate(name, ctype) 68bf215546Sopenharmony_ci immediates[name] = imm 69bf215546Sopenharmony_ci return imm 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_cidef enum(name, value_dict): 72bf215546Sopenharmony_ci enums[name] = value_dict 73bf215546Sopenharmony_ci return immediate(name, "enum agx_" + name) 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ciL = (1 << 15) 76bf215546Sopenharmony_ci_ = None 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ciFORMAT = immediate("format", "enum agx_format") 79bf215546Sopenharmony_ciIMM = immediate("imm") 80bf215546Sopenharmony_ciWRITEOUT = immediate("writeout") 81bf215546Sopenharmony_ciINDEX = immediate("index") 82bf215546Sopenharmony_ciCOMPONENT = immediate("component") 83bf215546Sopenharmony_ciCHANNELS = immediate("channels") 84bf215546Sopenharmony_ciTRUTH_TABLE = immediate("truth_table") 85bf215546Sopenharmony_ciROUND = immediate("round", "enum agx_round") 86bf215546Sopenharmony_ciSHIFT = immediate("shift") 87bf215546Sopenharmony_ciMASK = immediate("mask") 88bf215546Sopenharmony_ciBFI_MASK = immediate("bfi_mask") 89bf215546Sopenharmony_ciLOD_MODE = immediate("lod_mode", "enum agx_lod_mode") 90bf215546Sopenharmony_ciDIM = immediate("dim", "enum agx_dim") 91bf215546Sopenharmony_ciSCOREBOARD = immediate("scoreboard") 92bf215546Sopenharmony_ciICOND = immediate("icond", "enum agx_icond") 93bf215546Sopenharmony_ciFCOND = immediate("fcond", "enum agx_fcond") 94bf215546Sopenharmony_ciNEST = immediate("nest") 95bf215546Sopenharmony_ciINVERT_COND = immediate("invert_cond") 96bf215546Sopenharmony_ciNEST = immediate("nest") 97bf215546Sopenharmony_ciTARGET = immediate("target", "agx_block *") 98bf215546Sopenharmony_ciPERSPECTIVE = immediate("perspective", "bool") 99bf215546Sopenharmony_ciSR = enum("sr", { 100bf215546Sopenharmony_ci 0: 'threadgroup_position_in_grid.x', 101bf215546Sopenharmony_ci 1: 'threadgroup_position_in_grid.y', 102bf215546Sopenharmony_ci 2: 'threadgroup_position_in_grid.z', 103bf215546Sopenharmony_ci 4: 'threads_per_threadgroup.x', 104bf215546Sopenharmony_ci 5: 'threads_per_threadgroup.y', 105bf215546Sopenharmony_ci 6: 'threads_per_threadgroup.z', 106bf215546Sopenharmony_ci 8: 'dispatch_threads_per_threadgroup.x', 107bf215546Sopenharmony_ci 9: 'dispatch_threads_per_threadgroup.y', 108bf215546Sopenharmony_ci 10: 'dispatch_threads_per_threadgroup.z', 109bf215546Sopenharmony_ci 48: 'thread_position_in_threadgroup.x', 110bf215546Sopenharmony_ci 49: 'thread_position_in_threadgroup.y', 111bf215546Sopenharmony_ci 50: 'thread_position_in_threadgroup.z', 112bf215546Sopenharmony_ci 51: 'thread_index_in_threadgroup', 113bf215546Sopenharmony_ci 52: 'thread_index_in_subgroup', 114bf215546Sopenharmony_ci 53: 'subgroup_index_in_threadgroup', 115bf215546Sopenharmony_ci 56: 'active_thread_index_in_quad', 116bf215546Sopenharmony_ci 58: 'active_thread_index_in_subgroup', 117bf215546Sopenharmony_ci 62: 'backfacing', 118bf215546Sopenharmony_ci 80: 'thread_position_in_grid.x', 119bf215546Sopenharmony_ci 81: 'thread_position_in_grid.y', 120bf215546Sopenharmony_ci 82: 'thread_position_in_grid.z', 121bf215546Sopenharmony_ci}) 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ciFUNOP = lambda x: (x << 28) 124bf215546Sopenharmony_ciFUNOP_MASK = FUNOP((1 << 14) - 1) 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_cidef funop(name, opcode): 127bf215546Sopenharmony_ci op(name, (0x0A | L | (opcode << 28), 128bf215546Sopenharmony_ci 0x3F | L | (((1 << 14) - 1) << 28), 6, _), 129bf215546Sopenharmony_ci srcs = 1, is_float = True) 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci# Listing of opcodes 132bf215546Sopenharmony_cifunop("floor", 0b000000) 133bf215546Sopenharmony_cifunop("srsqrt", 0b000001) 134bf215546Sopenharmony_cifunop("dfdx", 0b000100) 135bf215546Sopenharmony_cifunop("dfdy", 0b000110) 136bf215546Sopenharmony_cifunop("rcp", 0b001000) 137bf215546Sopenharmony_cifunop("rsqrt", 0b001001) 138bf215546Sopenharmony_cifunop("sin_pt_1", 0b001010) 139bf215546Sopenharmony_cifunop("log2", 0b001100) 140bf215546Sopenharmony_cifunop("exp2", 0b001101) 141bf215546Sopenharmony_cifunop("sin_pt_2", 0b001110) 142bf215546Sopenharmony_cifunop("ceil", 0b010000) 143bf215546Sopenharmony_cifunop("trunc", 0b100000) 144bf215546Sopenharmony_cifunop("roundeven", 0b110000) 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ciop("fadd", 147bf215546Sopenharmony_ci encoding_16 = (0x26 | L, 0x3F | L, 6, _), 148bf215546Sopenharmony_ci encoding_32 = (0x2A | L, 0x3F | L, 6, _), 149bf215546Sopenharmony_ci srcs = 2, is_float = True) 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ciop("fma", 152bf215546Sopenharmony_ci encoding_16 = (0x36, 0x3F, 6, 8), 153bf215546Sopenharmony_ci encoding_32 = (0x3A, 0x3F, 6, 8), 154bf215546Sopenharmony_ci srcs = 3, is_float = True) 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ciop("fmul", 157bf215546Sopenharmony_ci encoding_16 = ((0x16 | L), (0x3F | L), 6, _), 158bf215546Sopenharmony_ci encoding_32 = ((0x1A | L), (0x3F | L), 6, _), 159bf215546Sopenharmony_ci srcs = 2, is_float = True) 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ciop("mov_imm", 162bf215546Sopenharmony_ci encoding_32 = (0x62, 0xFF, 6, 8), 163bf215546Sopenharmony_ci encoding_16 = (0x62, 0xFF, 4, 6), 164bf215546Sopenharmony_ci imms = [IMM]) 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ciop("iadd", 167bf215546Sopenharmony_ci encoding_32 = (0x0E, 0x3F | L, 8, _), 168bf215546Sopenharmony_ci srcs = 2, imms = [SHIFT]) 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ciop("imad", 171bf215546Sopenharmony_ci encoding_32 = (0x1E, 0x3F | L, 8, _), 172bf215546Sopenharmony_ci srcs = 3, imms = [SHIFT]) 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ciop("bfi", 175bf215546Sopenharmony_ci encoding_32 = (0x2E, 0x7F | (0x3 << 26), 8, _), 176bf215546Sopenharmony_ci srcs = 3, imms = [BFI_MASK]) 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ciop("bfeil", 179bf215546Sopenharmony_ci encoding_32 = (0x2E | L, 0x7F | L | (0x3 << 26), 8, _), 180bf215546Sopenharmony_ci srcs = 3, imms = [BFI_MASK]) 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ciop("asr", 183bf215546Sopenharmony_ci encoding_32 = (0x2E | L | (0x1 << 26), 0x7F | L | (0x3 << 26), 8, _), 184bf215546Sopenharmony_ci srcs = 2) 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ciop("icmpsel", 187bf215546Sopenharmony_ci encoding_32 = (0x12, 0x7F, 8, 10), 188bf215546Sopenharmony_ci srcs = 4, imms = [ICOND]) 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ciop("fcmpsel", 191bf215546Sopenharmony_ci encoding_32 = (0x02, 0x7F, 8, 10), 192bf215546Sopenharmony_ci srcs = 4, imms = [FCOND]) 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci# sources are coordinates, LOD, texture, sampler, offset 195bf215546Sopenharmony_ci# TODO: anything else? 196bf215546Sopenharmony_ciop("texture_sample", 197bf215546Sopenharmony_ci encoding_32 = (0x32, 0x7F, 8, 10), # XXX WRONG SIZE 198bf215546Sopenharmony_ci srcs = 5, imms = [DIM, LOD_MODE, MASK, SCOREBOARD]) 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci# sources are base, index 201bf215546Sopenharmony_ciop("device_load", 202bf215546Sopenharmony_ci encoding_32 = (0x05, 0x7F, 6, 8), 203bf215546Sopenharmony_ci srcs = 2, imms = [FORMAT, MASK, SCOREBOARD]) 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ciop("wait", (0x38, 0xFF, 2, _), dests = 0, 206bf215546Sopenharmony_ci can_eliminate = False, imms = [SCOREBOARD]) 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ciop("get_sr", (0x72, 0x7F | L, 4, _), dests = 1, imms = [SR]) 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ciop("sample_mask", (0x7fc1, 0xffff, 6, _), dests = 0, srcs = 1, can_eliminate = False) 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci# Essentially same encoding 213bf215546Sopenharmony_ciop("ld_tile", (0x49, 0x7F, 8, _), dests = 1, srcs = 0, 214bf215546Sopenharmony_ci can_eliminate = False, imms = [FORMAT]) 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ciop("st_tile", (0x09, 0x7F, 8, _), dests = 0, srcs = 1, 217bf215546Sopenharmony_ci can_eliminate = False, imms = [FORMAT]) 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_cifor (name, exact) in [("any", 0xC000), ("none", 0xC200)]: 220bf215546Sopenharmony_ci op("jmp_exec_" + name, (exact, (1 << 16) - 1, 6, _), dests = 0, srcs = 0, 221bf215546Sopenharmony_ci can_eliminate = False, imms = [TARGET]) 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci# TODO: model implicit r0l destinations 224bf215546Sopenharmony_ciop("pop_exec", (0x52 | (0x3 << 9), ((1 << 48) - 1) ^ (0x3 << 7) ^ (0x3 << 11), 6, _), 225bf215546Sopenharmony_ci dests = 0, srcs = 0, can_eliminate = False, imms = [NEST]) 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_cifor is_float in [False, True]: 228bf215546Sopenharmony_ci mod_mask = 0 if is_float else (0x3 << 26) | (0x3 << 38) 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci for (cf, cf_op) in [("if", 0), ("else", 1), ("while", 2)]: 231bf215546Sopenharmony_ci name = "{}_{}cmp".format(cf, "f" if is_float else "i") 232bf215546Sopenharmony_ci exact = 0x42 | (0x0 if is_float else 0x10) | (cf_op << 9) 233bf215546Sopenharmony_ci mask = 0x7F | (0x3 << 9) | mod_mask | (0x3 << 44) 234bf215546Sopenharmony_ci imms = [NEST, FCOND if is_float else ICOND, INVERT_COND] 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci op(name, (exact, mask, 6, _), dests = 0, srcs = 2, can_eliminate = False, 237bf215546Sopenharmony_ci imms = imms, is_float = is_float) 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ciop("bitop", (0x7E, 0x7F, 6, _), srcs = 2, imms = [TRUTH_TABLE]) 240bf215546Sopenharmony_ciop("convert", (0x3E | L, 0x7F | L | (0x3 << 38), 6, _), srcs = 2, imms = [ROUND]) 241bf215546Sopenharmony_ciop("ld_vary", (0x21, 0xBF, 8, _), srcs = 1, imms = [CHANNELS, PERSPECTIVE]) 242bf215546Sopenharmony_ciop("ld_vary_flat", (0xA1, 0xBF, 8, _), srcs = 1, imms = [CHANNELS]) 243bf215546Sopenharmony_ciop("st_vary", None, dests = 0, srcs = 2, can_eliminate = False) 244bf215546Sopenharmony_ciop("stop", (0x88, 0xFFFF, 2, _), dests = 0, can_eliminate = False) 245bf215546Sopenharmony_ciop("trap", (0x08, 0xFFFF, 2, _), dests = 0, can_eliminate = False) 246bf215546Sopenharmony_ciop("writeout", (0x48, 0xFF, 4, _), dests = 0, imms = [WRITEOUT], can_eliminate = False) 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci# Convenient aliases. 249bf215546Sopenharmony_ciop("mov", _, srcs = 1) 250bf215546Sopenharmony_ciop("not", _, srcs = 1) 251bf215546Sopenharmony_ciop("xor", _, srcs = 2) 252bf215546Sopenharmony_ciop("and", _, srcs = 2) 253bf215546Sopenharmony_ciop("or", _, srcs = 2) 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci# Indicates the logical end of the block, before final branches/control flow 256bf215546Sopenharmony_ciop("p_logical_end", _, dests = 0, srcs = 0, can_eliminate = False) 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ciop("p_combine", _, srcs = 4) 259bf215546Sopenharmony_ciop("p_split", _, srcs = 1, dests = 4) 260bf215546Sopenharmony_ciop("p_extract", _, srcs = 1, imms = [COMPONENT]) 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci# Phis are special-cased in the IR as they (uniquely) can take an unbounded 263bf215546Sopenharmony_ci# number of source. 264bf215546Sopenharmony_ciop("phi", _, srcs = 0) 265