1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "va_compiler.h" 25bf215546Sopenharmony_ci#include "valhall.h" 26bf215546Sopenharmony_ci#include "valhall_enums.h" 27bf215546Sopenharmony_ci#include "bi_builder.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci/* This file contains the final passes of the compiler. Running after 30bf215546Sopenharmony_ci * scheduling and RA, the IR is now finalized, so we need to emit it to actual 31bf215546Sopenharmony_ci * bits on the wire (as well as fixup branches) 32bf215546Sopenharmony_ci */ 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci/* 35bf215546Sopenharmony_ci * Unreachable for encoding failures, when hitting an invalid instruction. 36bf215546Sopenharmony_ci * Prints the (first) failing instruction to aid debugging. 37bf215546Sopenharmony_ci */ 38bf215546Sopenharmony_ciNORETURN static void PRINTFLIKE(2, 3) 39bf215546Sopenharmony_ciinvalid_instruction(const bi_instr *I, const char *cause, ...) 40bf215546Sopenharmony_ci{ 41bf215546Sopenharmony_ci fputs("\nInvalid ", stderr); 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci va_list ap; 44bf215546Sopenharmony_ci va_start(ap, cause); 45bf215546Sopenharmony_ci vfprintf(stderr, cause, ap); 46bf215546Sopenharmony_ci va_end(ap); 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci fputs(":\n\t", stderr); 49bf215546Sopenharmony_ci bi_print_instr(I, stderr); 50bf215546Sopenharmony_ci fprintf(stderr, "\n"); 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci unreachable("Invalid instruction"); 53bf215546Sopenharmony_ci} 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci/* 56bf215546Sopenharmony_ci * Like assert, but prints the instruction if the assertion fails to aid 57bf215546Sopenharmony_ci * debugging invalid inputs to the packing module. 58bf215546Sopenharmony_ci */ 59bf215546Sopenharmony_ci#define pack_assert(I, cond) \ 60bf215546Sopenharmony_ci if (!(cond)) invalid_instruction(I, "invariant " #cond); 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_ci/* 63bf215546Sopenharmony_ci * Validate that two adjacent 32-bit sources form an aligned 64-bit register 64bf215546Sopenharmony_ci * pair. This is a compiler invariant, required on Valhall but not on Bifrost. 65bf215546Sopenharmony_ci */ 66bf215546Sopenharmony_cistatic void 67bf215546Sopenharmony_civa_validate_register_pair(const bi_instr *I, unsigned s) 68bf215546Sopenharmony_ci{ 69bf215546Sopenharmony_ci ASSERTED bi_index lo = I->src[s], hi = I->src[s + 1]; 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci pack_assert(I, lo.type == hi.type); 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci if (lo.type == BI_INDEX_REGISTER) { 74bf215546Sopenharmony_ci pack_assert(I, hi.value & 1); 75bf215546Sopenharmony_ci pack_assert(I, hi.value == lo.value + 1); 76bf215546Sopenharmony_ci } else if (lo.type == BI_INDEX_FAU && lo.value & BIR_FAU_IMMEDIATE) { 77bf215546Sopenharmony_ci /* Small constants are zero extended, so the top word encode zero */ 78bf215546Sopenharmony_ci pack_assert(I, hi.value == (BIR_FAU_IMMEDIATE | 0)); 79bf215546Sopenharmony_ci } else { 80bf215546Sopenharmony_ci pack_assert(I, hi.offset & 1); 81bf215546Sopenharmony_ci pack_assert(I, hi.offset == lo.offset + 1); 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci} 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_cistatic unsigned 86bf215546Sopenharmony_civa_pack_reg(const bi_instr *I, bi_index idx) 87bf215546Sopenharmony_ci{ 88bf215546Sopenharmony_ci pack_assert(I, idx.type == BI_INDEX_REGISTER); 89bf215546Sopenharmony_ci pack_assert(I, idx.value < 64); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci return idx.value; 92bf215546Sopenharmony_ci} 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistatic unsigned 95bf215546Sopenharmony_civa_pack_fau_special(const bi_instr *I, enum bir_fau fau) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci switch (fau) { 98bf215546Sopenharmony_ci case BIR_FAU_ATEST_PARAM: return VA_FAU_SPECIAL_PAGE_0_ATEST_DATUM; 99bf215546Sopenharmony_ci case BIR_FAU_TLS_PTR: return VA_FAU_SPECIAL_PAGE_1_THREAD_LOCAL_POINTER; 100bf215546Sopenharmony_ci case BIR_FAU_WLS_PTR: return VA_FAU_SPECIAL_PAGE_1_WORKGROUP_LOCAL_POINTER; 101bf215546Sopenharmony_ci case BIR_FAU_LANE_ID: return VA_FAU_SPECIAL_PAGE_3_LANE_ID; 102bf215546Sopenharmony_ci case BIR_FAU_PROGRAM_COUNTER: return VA_FAU_SPECIAL_PAGE_3_PROGRAM_COUNTER; 103bf215546Sopenharmony_ci case BIR_FAU_SAMPLE_POS_ARRAY:return VA_FAU_SPECIAL_PAGE_0_SAMPLE; 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci case BIR_FAU_BLEND_0...(BIR_FAU_BLEND_0 + 7): 106bf215546Sopenharmony_ci return VA_FAU_SPECIAL_PAGE_0_BLEND_DESCRIPTOR_0 + (fau - BIR_FAU_BLEND_0); 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci default: 109bf215546Sopenharmony_ci invalid_instruction(I, "FAU"); 110bf215546Sopenharmony_ci } 111bf215546Sopenharmony_ci} 112bf215546Sopenharmony_ci 113bf215546Sopenharmony_ci/* 114bf215546Sopenharmony_ci * Encode a 64-bit FAU source. The offset is ignored, so this function can be 115bf215546Sopenharmony_ci * used to encode a 32-bit FAU source by or'ing in the appropriate offset. 116bf215546Sopenharmony_ci */ 117bf215546Sopenharmony_cistatic unsigned 118bf215546Sopenharmony_civa_pack_fau_64(const bi_instr *I, bi_index idx) 119bf215546Sopenharmony_ci{ 120bf215546Sopenharmony_ci pack_assert(I, idx.type == BI_INDEX_FAU); 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci unsigned val = (idx.value & BITFIELD_MASK(5)); 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci if (idx.value & BIR_FAU_IMMEDIATE) 125bf215546Sopenharmony_ci return (0x3 << 6) | (val << 1); 126bf215546Sopenharmony_ci else if (idx.value & BIR_FAU_UNIFORM) 127bf215546Sopenharmony_ci return (0x2 << 6) | (val << 1); 128bf215546Sopenharmony_ci else 129bf215546Sopenharmony_ci return (0x7 << 5) | (va_pack_fau_special(I, idx.value) << 1); 130bf215546Sopenharmony_ci} 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_cistatic unsigned 133bf215546Sopenharmony_civa_pack_src(const bi_instr *I, unsigned s) 134bf215546Sopenharmony_ci{ 135bf215546Sopenharmony_ci bi_index idx = I->src[s]; 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci if (idx.type == BI_INDEX_REGISTER) { 138bf215546Sopenharmony_ci unsigned value = va_pack_reg(I, idx); 139bf215546Sopenharmony_ci if (idx.discard) value |= (1 << 6); 140bf215546Sopenharmony_ci return value; 141bf215546Sopenharmony_ci } else if (idx.type == BI_INDEX_FAU) { 142bf215546Sopenharmony_ci pack_assert(I, idx.offset <= 1); 143bf215546Sopenharmony_ci return va_pack_fau_64(I, idx) | idx.offset; 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci invalid_instruction(I, "type of source %u", s); 147bf215546Sopenharmony_ci} 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_cistatic unsigned 150bf215546Sopenharmony_civa_pack_wrmask(const bi_instr *I) 151bf215546Sopenharmony_ci{ 152bf215546Sopenharmony_ci switch (I->dest[0].swizzle) { 153bf215546Sopenharmony_ci case BI_SWIZZLE_H00: return 0x1; 154bf215546Sopenharmony_ci case BI_SWIZZLE_H11: return 0x2; 155bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return 0x3; 156bf215546Sopenharmony_ci default: invalid_instruction(I, "write mask"); 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci} 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_cistatic enum va_atomic_operation 161bf215546Sopenharmony_civa_pack_atom_opc(const bi_instr *I) 162bf215546Sopenharmony_ci{ 163bf215546Sopenharmony_ci switch (I->atom_opc) { 164bf215546Sopenharmony_ci case BI_ATOM_OPC_AADD: return VA_ATOMIC_OPERATION_AADD; 165bf215546Sopenharmony_ci case BI_ATOM_OPC_ASMIN: return VA_ATOMIC_OPERATION_ASMIN; 166bf215546Sopenharmony_ci case BI_ATOM_OPC_ASMAX: return VA_ATOMIC_OPERATION_ASMAX; 167bf215546Sopenharmony_ci case BI_ATOM_OPC_AUMIN: return VA_ATOMIC_OPERATION_AUMIN; 168bf215546Sopenharmony_ci case BI_ATOM_OPC_AUMAX: return VA_ATOMIC_OPERATION_AUMAX; 169bf215546Sopenharmony_ci case BI_ATOM_OPC_AAND: return VA_ATOMIC_OPERATION_AAND; 170bf215546Sopenharmony_ci case BI_ATOM_OPC_AOR: return VA_ATOMIC_OPERATION_AOR; 171bf215546Sopenharmony_ci case BI_ATOM_OPC_AXOR: return VA_ATOMIC_OPERATION_AXOR; 172bf215546Sopenharmony_ci case BI_ATOM_OPC_ACMPXCHG: 173bf215546Sopenharmony_ci case BI_ATOM_OPC_AXCHG: return VA_ATOMIC_OPERATION_AXCHG; 174bf215546Sopenharmony_ci default: invalid_instruction(I, "atomic opcode"); 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci} 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_cistatic enum va_atomic_operation_with_1 179bf215546Sopenharmony_civa_pack_atom_opc_1(const bi_instr *I) 180bf215546Sopenharmony_ci{ 181bf215546Sopenharmony_ci switch (I->atom_opc) { 182bf215546Sopenharmony_ci case BI_ATOM_OPC_AINC: return VA_ATOMIC_OPERATION_WITH_1_AINC; 183bf215546Sopenharmony_ci case BI_ATOM_OPC_ADEC: return VA_ATOMIC_OPERATION_WITH_1_ADEC; 184bf215546Sopenharmony_ci case BI_ATOM_OPC_AUMAX1: return VA_ATOMIC_OPERATION_WITH_1_AUMAX1; 185bf215546Sopenharmony_ci case BI_ATOM_OPC_ASMAX1: return VA_ATOMIC_OPERATION_WITH_1_ASMAX1; 186bf215546Sopenharmony_ci case BI_ATOM_OPC_AOR1: return VA_ATOMIC_OPERATION_WITH_1_AOR1; 187bf215546Sopenharmony_ci default: invalid_instruction(I, "atomic opcode with implicit 1"); 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci} 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_cistatic unsigned 192bf215546Sopenharmony_civa_pack_dest(const bi_instr *I) 193bf215546Sopenharmony_ci{ 194bf215546Sopenharmony_ci return va_pack_reg(I, I->dest[0]) | (va_pack_wrmask(I) << 6); 195bf215546Sopenharmony_ci} 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_cistatic enum va_widen 198bf215546Sopenharmony_civa_pack_widen_f32(const bi_instr *I, enum bi_swizzle swz) 199bf215546Sopenharmony_ci{ 200bf215546Sopenharmony_ci switch (swz) { 201bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return VA_WIDEN_NONE; 202bf215546Sopenharmony_ci case BI_SWIZZLE_H00: return VA_WIDEN_H0; 203bf215546Sopenharmony_ci case BI_SWIZZLE_H11: return VA_WIDEN_H1; 204bf215546Sopenharmony_ci default: invalid_instruction(I, "widen"); 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci} 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_cistatic enum va_swizzles_16_bit 209bf215546Sopenharmony_civa_pack_swizzle_f16(const bi_instr *I, enum bi_swizzle swz) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci switch (swz) { 212bf215546Sopenharmony_ci case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00; 213bf215546Sopenharmony_ci case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10; 214bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01; 215bf215546Sopenharmony_ci case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11; 216bf215546Sopenharmony_ci default: invalid_instruction(I, "16-bit swizzle"); 217bf215546Sopenharmony_ci } 218bf215546Sopenharmony_ci} 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_cistatic unsigned 221bf215546Sopenharmony_civa_pack_widen(const bi_instr *I, enum bi_swizzle swz, enum va_size size) 222bf215546Sopenharmony_ci{ 223bf215546Sopenharmony_ci if (size == VA_SIZE_8) { 224bf215546Sopenharmony_ci switch (swz) { 225bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return VA_SWIZZLES_8_BIT_B0123; 226bf215546Sopenharmony_ci case BI_SWIZZLE_H00: return VA_SWIZZLES_8_BIT_B0101; 227bf215546Sopenharmony_ci case BI_SWIZZLE_H11: return VA_SWIZZLES_8_BIT_B2323; 228bf215546Sopenharmony_ci case BI_SWIZZLE_B0000: return VA_SWIZZLES_8_BIT_B0000; 229bf215546Sopenharmony_ci case BI_SWIZZLE_B1111: return VA_SWIZZLES_8_BIT_B1111; 230bf215546Sopenharmony_ci case BI_SWIZZLE_B2222: return VA_SWIZZLES_8_BIT_B2222; 231bf215546Sopenharmony_ci case BI_SWIZZLE_B3333: return VA_SWIZZLES_8_BIT_B3333; 232bf215546Sopenharmony_ci default: invalid_instruction(I, "8-bit widen"); 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci } else if (size == VA_SIZE_16) { 235bf215546Sopenharmony_ci switch (swz) { 236bf215546Sopenharmony_ci case BI_SWIZZLE_H00: return VA_SWIZZLES_16_BIT_H00; 237bf215546Sopenharmony_ci case BI_SWIZZLE_H10: return VA_SWIZZLES_16_BIT_H10; 238bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return VA_SWIZZLES_16_BIT_H01; 239bf215546Sopenharmony_ci case BI_SWIZZLE_H11: return VA_SWIZZLES_16_BIT_H11; 240bf215546Sopenharmony_ci case BI_SWIZZLE_B0000: return VA_SWIZZLES_16_BIT_B00; 241bf215546Sopenharmony_ci case BI_SWIZZLE_B1111: return VA_SWIZZLES_16_BIT_B11; 242bf215546Sopenharmony_ci case BI_SWIZZLE_B2222: return VA_SWIZZLES_16_BIT_B22; 243bf215546Sopenharmony_ci case BI_SWIZZLE_B3333: return VA_SWIZZLES_16_BIT_B33; 244bf215546Sopenharmony_ci default: invalid_instruction(I, "16-bit widen"); 245bf215546Sopenharmony_ci } 246bf215546Sopenharmony_ci } else if (size == VA_SIZE_32) { 247bf215546Sopenharmony_ci switch (swz) { 248bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return VA_SWIZZLES_32_BIT_NONE; 249bf215546Sopenharmony_ci case BI_SWIZZLE_H00: return VA_SWIZZLES_32_BIT_H0; 250bf215546Sopenharmony_ci case BI_SWIZZLE_H11: return VA_SWIZZLES_32_BIT_H1; 251bf215546Sopenharmony_ci case BI_SWIZZLE_B0000: return VA_SWIZZLES_32_BIT_B0; 252bf215546Sopenharmony_ci case BI_SWIZZLE_B1111: return VA_SWIZZLES_32_BIT_B1; 253bf215546Sopenharmony_ci case BI_SWIZZLE_B2222: return VA_SWIZZLES_32_BIT_B2; 254bf215546Sopenharmony_ci case BI_SWIZZLE_B3333: return VA_SWIZZLES_32_BIT_B3; 255bf215546Sopenharmony_ci default: invalid_instruction(I, "32-bit widen"); 256bf215546Sopenharmony_ci } 257bf215546Sopenharmony_ci } else { 258bf215546Sopenharmony_ci invalid_instruction(I, "type size for widen"); 259bf215546Sopenharmony_ci } 260bf215546Sopenharmony_ci} 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_cistatic enum va_half_swizzles_8_bit 263bf215546Sopenharmony_civa_pack_halfswizzle(const bi_instr *I, enum bi_swizzle swz) 264bf215546Sopenharmony_ci{ 265bf215546Sopenharmony_ci switch (swz) { 266bf215546Sopenharmony_ci case BI_SWIZZLE_B0000: return VA_HALF_SWIZZLES_8_BIT_B00; 267bf215546Sopenharmony_ci case BI_SWIZZLE_B1111: return VA_HALF_SWIZZLES_8_BIT_B11; 268bf215546Sopenharmony_ci case BI_SWIZZLE_B2222: return VA_HALF_SWIZZLES_8_BIT_B22; 269bf215546Sopenharmony_ci case BI_SWIZZLE_B3333: return VA_HALF_SWIZZLES_8_BIT_B33; 270bf215546Sopenharmony_ci case BI_SWIZZLE_B0011: return VA_HALF_SWIZZLES_8_BIT_B01; 271bf215546Sopenharmony_ci case BI_SWIZZLE_B2233: return VA_HALF_SWIZZLES_8_BIT_B23; 272bf215546Sopenharmony_ci case BI_SWIZZLE_B0022: return VA_HALF_SWIZZLES_8_BIT_B02; 273bf215546Sopenharmony_ci default: invalid_instruction(I, "v2u8 swizzle"); 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci} 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_cistatic enum va_lanes_8_bit 278bf215546Sopenharmony_civa_pack_shift_lanes(const bi_instr *I, enum bi_swizzle swz) 279bf215546Sopenharmony_ci{ 280bf215546Sopenharmony_ci switch (swz) { 281bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return VA_LANES_8_BIT_B02; 282bf215546Sopenharmony_ci case BI_SWIZZLE_B0000: return VA_LANES_8_BIT_B00; 283bf215546Sopenharmony_ci case BI_SWIZZLE_B1111: return VA_LANES_8_BIT_B11; 284bf215546Sopenharmony_ci case BI_SWIZZLE_B2222: return VA_LANES_8_BIT_B22; 285bf215546Sopenharmony_ci case BI_SWIZZLE_B3333: return VA_LANES_8_BIT_B33; 286bf215546Sopenharmony_ci default: invalid_instruction(I, "lane shift"); 287bf215546Sopenharmony_ci } 288bf215546Sopenharmony_ci} 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_cistatic enum va_combine 291bf215546Sopenharmony_civa_pack_combine(const bi_instr *I, enum bi_swizzle swz) 292bf215546Sopenharmony_ci{ 293bf215546Sopenharmony_ci switch (swz) { 294bf215546Sopenharmony_ci case BI_SWIZZLE_H01: return VA_COMBINE_NONE; 295bf215546Sopenharmony_ci case BI_SWIZZLE_H00: return VA_COMBINE_H0; 296bf215546Sopenharmony_ci case BI_SWIZZLE_H11: return VA_COMBINE_H1; 297bf215546Sopenharmony_ci default: invalid_instruction(I, "branch lane"); 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci} 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_cistatic enum va_source_format 302bf215546Sopenharmony_civa_pack_source_format(const bi_instr *I) 303bf215546Sopenharmony_ci{ 304bf215546Sopenharmony_ci switch (I->source_format) { 305bf215546Sopenharmony_ci case BI_SOURCE_FORMAT_FLAT32: return VA_SOURCE_FORMAT_SRC_FLAT32; 306bf215546Sopenharmony_ci case BI_SOURCE_FORMAT_FLAT16: return VA_SOURCE_FORMAT_SRC_FLAT16; 307bf215546Sopenharmony_ci case BI_SOURCE_FORMAT_F32: return VA_SOURCE_FORMAT_SRC_F32; 308bf215546Sopenharmony_ci case BI_SOURCE_FORMAT_F16: return VA_SOURCE_FORMAT_SRC_F16; 309bf215546Sopenharmony_ci } 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci invalid_instruction(I, "source format"); 312bf215546Sopenharmony_ci} 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_cistatic uint64_t 315bf215546Sopenharmony_civa_pack_alu(const bi_instr *I) 316bf215546Sopenharmony_ci{ 317bf215546Sopenharmony_ci struct va_opcode_info info = valhall_opcodes[I->op]; 318bf215546Sopenharmony_ci uint64_t hex = 0; 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_ci switch (I->op) { 321bf215546Sopenharmony_ci /* Add FREXP flags */ 322bf215546Sopenharmony_ci case BI_OPCODE_FREXPE_F32: 323bf215546Sopenharmony_ci case BI_OPCODE_FREXPE_V2F16: 324bf215546Sopenharmony_ci case BI_OPCODE_FREXPM_F32: 325bf215546Sopenharmony_ci case BI_OPCODE_FREXPM_V2F16: 326bf215546Sopenharmony_ci if (I->sqrt) hex |= 1ull << 24; 327bf215546Sopenharmony_ci if (I->log) hex |= 1ull << 25; 328bf215546Sopenharmony_ci break; 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci /* Add mux type */ 331bf215546Sopenharmony_ci case BI_OPCODE_MUX_I32: 332bf215546Sopenharmony_ci case BI_OPCODE_MUX_V2I16: 333bf215546Sopenharmony_ci case BI_OPCODE_MUX_V4I8: 334bf215546Sopenharmony_ci hex |= (uint64_t) I->mux << 32; 335bf215546Sopenharmony_ci break; 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci /* Add .eq flag */ 338bf215546Sopenharmony_ci case BI_OPCODE_BRANCHZ_I16: 339bf215546Sopenharmony_ci case BI_OPCODE_BRANCHZI: 340bf215546Sopenharmony_ci pack_assert(I, I->cmpf == BI_CMPF_EQ || I->cmpf == BI_CMPF_NE); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci if (I->cmpf == BI_CMPF_EQ) hex |= (1ull << 36); 343bf215546Sopenharmony_ci 344bf215546Sopenharmony_ci if (I->op == BI_OPCODE_BRANCHZI) 345bf215546Sopenharmony_ci hex |= (0x1ull << 40); /* Absolute */ 346bf215546Sopenharmony_ci else 347bf215546Sopenharmony_ci hex |= ((uint64_t) I->branch_offset & BITFIELD_MASK(27)) << 8; 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci break; 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci /* Add arithmetic flag */ 352bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_AND_I32: 353bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_AND_V2I16: 354bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_AND_V4I8: 355bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_OR_I32: 356bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_OR_V2I16: 357bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_OR_V4I8: 358bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_XOR_I32: 359bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_XOR_V2I16: 360bf215546Sopenharmony_ci case BI_OPCODE_RSHIFT_XOR_V4I8: 361bf215546Sopenharmony_ci hex |= (uint64_t) I->arithmetic << 34; 362bf215546Sopenharmony_ci break; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci case BI_OPCODE_LEA_BUF_IMM: 365bf215546Sopenharmony_ci /* Buffer table index */ 366bf215546Sopenharmony_ci hex |= 0xD << 8; 367bf215546Sopenharmony_ci break; 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci case BI_OPCODE_LEA_ATTR_IMM: 370bf215546Sopenharmony_ci hex |= ((uint64_t) I->table) << 16; 371bf215546Sopenharmony_ci hex |= ((uint64_t) I->attribute_index) << 20; 372bf215546Sopenharmony_ci break; 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci case BI_OPCODE_IADD_IMM_I32: 375bf215546Sopenharmony_ci case BI_OPCODE_IADD_IMM_V2I16: 376bf215546Sopenharmony_ci case BI_OPCODE_IADD_IMM_V4I8: 377bf215546Sopenharmony_ci case BI_OPCODE_FADD_IMM_F32: 378bf215546Sopenharmony_ci case BI_OPCODE_FADD_IMM_V2F16: 379bf215546Sopenharmony_ci hex |= ((uint64_t) I->index) << 8; 380bf215546Sopenharmony_ci break; 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci case BI_OPCODE_CLPER_I32: 383bf215546Sopenharmony_ci hex |= ((uint64_t) I->inactive_result) << 22; 384bf215546Sopenharmony_ci hex |= ((uint64_t) I->lane_op) << 32; 385bf215546Sopenharmony_ci hex |= ((uint64_t) I->subgroup) << 36; 386bf215546Sopenharmony_ci break; 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR: 389bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR_FLAT: 390bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR_IMM: 391bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR_FLAT_IMM: 392bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR_BUF_F16: 393bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR_BUF_F32: 394bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR_BUF_IMM_F16: 395bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR_BUF_IMM_F32: 396bf215546Sopenharmony_ci case BI_OPCODE_LD_VAR_SPECIAL: 397bf215546Sopenharmony_ci if (I->op == BI_OPCODE_LD_VAR_SPECIAL) 398bf215546Sopenharmony_ci hex |= ((uint64_t) I->varying_name) << 12; /* instead of index */ 399bf215546Sopenharmony_ci else if (I->op == BI_OPCODE_LD_VAR_BUF_IMM_F16 || 400bf215546Sopenharmony_ci I->op == BI_OPCODE_LD_VAR_BUF_IMM_F32) { 401bf215546Sopenharmony_ci hex |= ((uint64_t) I->index) << 16; 402bf215546Sopenharmony_ci } else if (I->op == BI_OPCODE_LD_VAR_IMM || 403bf215546Sopenharmony_ci I->op == BI_OPCODE_LD_VAR_FLAT_IMM) { 404bf215546Sopenharmony_ci hex |= ((uint64_t) I->table) << 8; 405bf215546Sopenharmony_ci hex |= ((uint64_t) I->index) << 12; 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_source_format(I)) << 24; 409bf215546Sopenharmony_ci hex |= ((uint64_t) I->update) << 36; 410bf215546Sopenharmony_ci hex |= ((uint64_t) I->sample) << 38; 411bf215546Sopenharmony_ci break; 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci case BI_OPCODE_LD_ATTR_IMM: 414bf215546Sopenharmony_ci hex |= ((uint64_t) I->table) << 16; 415bf215546Sopenharmony_ci hex |= ((uint64_t) I->attribute_index) << 20; 416bf215546Sopenharmony_ci break; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci case BI_OPCODE_LD_TEX_IMM: 419bf215546Sopenharmony_ci case BI_OPCODE_LEA_TEX_IMM: 420bf215546Sopenharmony_ci hex |= ((uint64_t) I->table) << 16; 421bf215546Sopenharmony_ci hex |= ((uint64_t) I->texture_index) << 20; 422bf215546Sopenharmony_ci break; 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci case BI_OPCODE_ZS_EMIT: 425bf215546Sopenharmony_ci if (I->stencil) hex |= (1 << 24); 426bf215546Sopenharmony_ci if (I->z) hex |= (1 << 25); 427bf215546Sopenharmony_ci break; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci default: 430bf215546Sopenharmony_ci break; 431bf215546Sopenharmony_ci } 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci /* FMA_RSCALE.f32 special modes treated as extra opcodes */ 434bf215546Sopenharmony_ci if (I->op == BI_OPCODE_FMA_RSCALE_F32) { 435bf215546Sopenharmony_ci pack_assert(I, I->special < 4); 436bf215546Sopenharmony_ci hex |= ((uint64_t) I->special) << 48; 437bf215546Sopenharmony_ci } 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_ci /* Add the normal destination or a placeholder. Staging destinations are 440bf215546Sopenharmony_ci * added elsewhere, as they require special handling for control fields. 441bf215546Sopenharmony_ci */ 442bf215546Sopenharmony_ci if (info.has_dest && info.nr_staging_dests == 0) { 443bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_dest(I) << 40; 444bf215546Sopenharmony_ci } else if (info.nr_staging_dests == 0 && info.nr_staging_srcs == 0) { 445bf215546Sopenharmony_ci pack_assert(I, bi_is_null(I->dest[0])); 446bf215546Sopenharmony_ci hex |= 0xC0ull << 40; /* Placeholder */ 447bf215546Sopenharmony_ci } 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci bool swap12 = va_swap_12(I->op); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci /* First src is staging if we read, skip it when packing sources */ 452bf215546Sopenharmony_ci unsigned src_offset = bi_opcode_props[I->op].sr_read ? 1 : 0; 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci for (unsigned i = 0; i < info.nr_srcs; ++i) { 455bf215546Sopenharmony_ci unsigned logical_i = (swap12 && i == 1) ? 2 : (swap12 && i == 2) ? 1 : i; 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_ci struct va_src_info src_info = info.srcs[i]; 458bf215546Sopenharmony_ci enum va_size size = src_info.size; 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci bi_index src = I->src[logical_i + src_offset]; 461bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_src(I, logical_i + src_offset) << (8 * i); 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci if (src_info.notted) { 464bf215546Sopenharmony_ci if (src.neg) hex |= (1ull << 35); 465bf215546Sopenharmony_ci } else if (src_info.absneg) { 466bf215546Sopenharmony_ci unsigned neg_offs = 32 + 2 + ((2 - i) * 2); 467bf215546Sopenharmony_ci unsigned abs_offs = 33 + 2 + ((2 - i) * 2); 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci if (src.neg) hex |= 1ull << neg_offs; 470bf215546Sopenharmony_ci if (src.abs) hex |= 1ull << abs_offs; 471bf215546Sopenharmony_ci } else { 472bf215546Sopenharmony_ci if (src.neg) invalid_instruction(I, "negate"); 473bf215546Sopenharmony_ci if (src.abs) invalid_instruction(I, "absolute value"); 474bf215546Sopenharmony_ci } 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci if (src_info.swizzle) { 477bf215546Sopenharmony_ci unsigned offs = 24 + ((2 - i) * 2); 478bf215546Sopenharmony_ci unsigned S = src.swizzle; 479bf215546Sopenharmony_ci pack_assert(I, size == VA_SIZE_16 || size == VA_SIZE_32); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci uint64_t v = (size == VA_SIZE_32 ? va_pack_widen_f32(I, S) : va_pack_swizzle_f16(I, S)); 482bf215546Sopenharmony_ci hex |= v << offs; 483bf215546Sopenharmony_ci } else if (src_info.widen) { 484bf215546Sopenharmony_ci unsigned offs = (i == 1) ? 26 : 36; 485bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_widen(I, src.swizzle, src_info.size) << offs; 486bf215546Sopenharmony_ci } else if (src_info.lane) { 487bf215546Sopenharmony_ci unsigned offs = (I->op == BI_OPCODE_MKVEC_V2I8) ? 488bf215546Sopenharmony_ci ((i == 0) ? 38 : 36) : 489bf215546Sopenharmony_ci 28; 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci if (src_info.size == VA_SIZE_16) { 492bf215546Sopenharmony_ci hex |= (src.swizzle == BI_SWIZZLE_H11 ? 1 : 0) << offs; 493bf215546Sopenharmony_ci } else if (I->op == BI_OPCODE_BRANCHZ_I16) { 494bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_combine(I, src.swizzle) << 37); 495bf215546Sopenharmony_ci } else { 496bf215546Sopenharmony_ci pack_assert(I, src_info.size == VA_SIZE_8); 497bf215546Sopenharmony_ci unsigned comp = src.swizzle - BI_SWIZZLE_B0000; 498bf215546Sopenharmony_ci pack_assert(I, comp < 4); 499bf215546Sopenharmony_ci hex |= (uint64_t) comp << offs; 500bf215546Sopenharmony_ci } 501bf215546Sopenharmony_ci } else if (src_info.lanes) { 502bf215546Sopenharmony_ci pack_assert(I, src_info.size == VA_SIZE_8); 503bf215546Sopenharmony_ci pack_assert(I, i == 1); 504bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_shift_lanes(I, src.swizzle) << 26; 505bf215546Sopenharmony_ci } else if (src_info.combine) { 506bf215546Sopenharmony_ci /* Treat as swizzle, subgroup ops not yet supported */ 507bf215546Sopenharmony_ci pack_assert(I, src_info.size == VA_SIZE_32); 508bf215546Sopenharmony_ci pack_assert(I, i == 0); 509bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_widen_f32(I, src.swizzle) << 37; 510bf215546Sopenharmony_ci } else if (src_info.halfswizzle) { 511bf215546Sopenharmony_ci pack_assert(I, src_info.size == VA_SIZE_8); 512bf215546Sopenharmony_ci pack_assert(I, i == 0); 513bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_halfswizzle(I, src.swizzle) << 36; 514bf215546Sopenharmony_ci } else if (src.swizzle != BI_SWIZZLE_H01) { 515bf215546Sopenharmony_ci invalid_instruction(I, "swizzle"); 516bf215546Sopenharmony_ci } 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_ci if (info.clamp) hex |= (uint64_t) I->clamp << 32; 520bf215546Sopenharmony_ci if (info.round_mode) hex |= (uint64_t) I->round << 30; 521bf215546Sopenharmony_ci if (info.condition) hex |= (uint64_t) I->cmpf << 32; 522bf215546Sopenharmony_ci if (info.result_type) hex |= (uint64_t) I->result_type << 30; 523bf215546Sopenharmony_ci 524bf215546Sopenharmony_ci return hex; 525bf215546Sopenharmony_ci} 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_cistatic uint64_t 528bf215546Sopenharmony_civa_pack_byte_offset(const bi_instr *I) 529bf215546Sopenharmony_ci{ 530bf215546Sopenharmony_ci int16_t offset = I->byte_offset; 531bf215546Sopenharmony_ci if (offset != I->byte_offset) invalid_instruction(I, "byte offset"); 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_ci uint16_t offset_as_u16 = offset; 534bf215546Sopenharmony_ci return ((uint64_t) offset_as_u16) << 8; 535bf215546Sopenharmony_ci} 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_cistatic uint64_t 538bf215546Sopenharmony_civa_pack_byte_offset_8(const bi_instr *I) 539bf215546Sopenharmony_ci{ 540bf215546Sopenharmony_ci uint8_t offset = I->byte_offset; 541bf215546Sopenharmony_ci if (offset != I->byte_offset) invalid_instruction(I, "byte offset"); 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci return ((uint64_t) offset) << 8; 544bf215546Sopenharmony_ci} 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_cistatic uint64_t 547bf215546Sopenharmony_civa_pack_load(const bi_instr *I, bool buffer_descriptor) 548bf215546Sopenharmony_ci{ 549bf215546Sopenharmony_ci const uint8_t load_lane_identity[8] = { 550bf215546Sopenharmony_ci VA_LOAD_LANE_8_BIT_B0, 551bf215546Sopenharmony_ci VA_LOAD_LANE_16_BIT_H0, 552bf215546Sopenharmony_ci VA_LOAD_LANE_24_BIT_IDENTITY, 553bf215546Sopenharmony_ci VA_LOAD_LANE_32_BIT_W0, 554bf215546Sopenharmony_ci VA_LOAD_LANE_48_BIT_IDENTITY, 555bf215546Sopenharmony_ci VA_LOAD_LANE_64_BIT_IDENTITY, 556bf215546Sopenharmony_ci VA_LOAD_LANE_96_BIT_IDENTITY, 557bf215546Sopenharmony_ci VA_LOAD_LANE_128_BIT_IDENTITY, 558bf215546Sopenharmony_ci }; 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci unsigned memory_size = (valhall_opcodes[I->op].exact >> 27) & 0x7; 561bf215546Sopenharmony_ci uint64_t hex = (uint64_t) load_lane_identity[memory_size] << 36; 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci // unsigned 564bf215546Sopenharmony_ci hex |= (1ull << 39); 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci if (!buffer_descriptor) 567bf215546Sopenharmony_ci hex |= va_pack_byte_offset(I); 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_src(I, 0) << 0; 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci if (buffer_descriptor) 572bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_src(I, 1) << 8; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci return hex; 575bf215546Sopenharmony_ci} 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_cistatic uint64_t 578bf215546Sopenharmony_civa_pack_memory_access(const bi_instr *I) 579bf215546Sopenharmony_ci{ 580bf215546Sopenharmony_ci switch (I->seg) { 581bf215546Sopenharmony_ci case BI_SEG_TL: return VA_MEMORY_ACCESS_FORCE; 582bf215546Sopenharmony_ci case BI_SEG_POS: return VA_MEMORY_ACCESS_ISTREAM; 583bf215546Sopenharmony_ci case BI_SEG_VARY: return VA_MEMORY_ACCESS_ESTREAM; 584bf215546Sopenharmony_ci default: return VA_MEMORY_ACCESS_NONE; 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci} 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_cistatic uint64_t 589bf215546Sopenharmony_civa_pack_store(const bi_instr *I) 590bf215546Sopenharmony_ci{ 591bf215546Sopenharmony_ci uint64_t hex = va_pack_memory_access(I) << 24; 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci va_validate_register_pair(I, 1); 594bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_src(I, 1) << 0; 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci hex |= va_pack_byte_offset(I); 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci return hex; 599bf215546Sopenharmony_ci} 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_cistatic enum va_lod_mode 602bf215546Sopenharmony_civa_pack_lod_mode(const bi_instr *I) 603bf215546Sopenharmony_ci{ 604bf215546Sopenharmony_ci switch (I->va_lod_mode) { 605bf215546Sopenharmony_ci case BI_VA_LOD_MODE_ZERO_LOD: return VA_LOD_MODE_ZERO; 606bf215546Sopenharmony_ci case BI_VA_LOD_MODE_COMPUTED_LOD: return VA_LOD_MODE_COMPUTED; 607bf215546Sopenharmony_ci case BI_VA_LOD_MODE_EXPLICIT: return VA_LOD_MODE_EXPLICIT; 608bf215546Sopenharmony_ci case BI_VA_LOD_MODE_COMPUTED_BIAS: return VA_LOD_MODE_COMPUTED_BIAS; 609bf215546Sopenharmony_ci case BI_VA_LOD_MODE_GRDESC: return VA_LOD_MODE_GRDESC; 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci invalid_instruction(I, "LOD mode"); 613bf215546Sopenharmony_ci} 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_cistatic enum va_register_type 616bf215546Sopenharmony_civa_pack_register_type(const bi_instr *I) 617bf215546Sopenharmony_ci{ 618bf215546Sopenharmony_ci switch (I->register_format) { 619bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_F16: 620bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_F32: 621bf215546Sopenharmony_ci return VA_REGISTER_TYPE_F; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_U16: 624bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_U32: 625bf215546Sopenharmony_ci return VA_REGISTER_TYPE_U; 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_S16: 628bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_S32: 629bf215546Sopenharmony_ci return VA_REGISTER_TYPE_S; 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci default: 632bf215546Sopenharmony_ci invalid_instruction(I, "register type"); 633bf215546Sopenharmony_ci } 634bf215546Sopenharmony_ci} 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_cistatic enum va_register_format 637bf215546Sopenharmony_civa_pack_register_format(const bi_instr *I) 638bf215546Sopenharmony_ci{ 639bf215546Sopenharmony_ci switch (I->register_format) { 640bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_AUTO: return VA_REGISTER_FORMAT_AUTO; 641bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_F32: return VA_REGISTER_FORMAT_F32; 642bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_F16: return VA_REGISTER_FORMAT_F16; 643bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_S32: return VA_REGISTER_FORMAT_S32; 644bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_S16: return VA_REGISTER_FORMAT_S16; 645bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_U32: return VA_REGISTER_FORMAT_U32; 646bf215546Sopenharmony_ci case BI_REGISTER_FORMAT_U16: return VA_REGISTER_FORMAT_U16; 647bf215546Sopenharmony_ci default: invalid_instruction(I, "register format"); 648bf215546Sopenharmony_ci } 649bf215546Sopenharmony_ci} 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ciuint64_t 652bf215546Sopenharmony_civa_pack_instr(const bi_instr *I) 653bf215546Sopenharmony_ci{ 654bf215546Sopenharmony_ci struct va_opcode_info info = valhall_opcodes[I->op]; 655bf215546Sopenharmony_ci 656bf215546Sopenharmony_ci uint64_t hex = info.exact | (((uint64_t) I->flow) << 59); 657bf215546Sopenharmony_ci hex |= ((uint64_t) va_select_fau_page(I)) << 57; 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci if (info.slot) 660bf215546Sopenharmony_ci hex |= ((uint64_t) I->slot << 30); 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci if (info.sr_count) { 663bf215546Sopenharmony_ci bool read = bi_opcode_props[I->op].sr_read; 664bf215546Sopenharmony_ci bi_index sr = read ? I->src[0] : I->dest[0]; 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci unsigned count = read ? 667bf215546Sopenharmony_ci bi_count_read_registers(I, 0) : 668bf215546Sopenharmony_ci bi_count_write_registers(I, 0); 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_ci hex |= ((uint64_t) count << 33); 671bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_reg(I, sr) << 40; 672bf215546Sopenharmony_ci hex |= ((uint64_t) info.sr_control << 46); 673bf215546Sopenharmony_ci } 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci if (info.sr_write_count) { 676bf215546Sopenharmony_ci hex |= ((uint64_t) bi_count_write_registers(I, 0) - 1) << 36; 677bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_reg(I, I->dest[0])) << 16; 678bf215546Sopenharmony_ci } 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci if (info.vecsize) 681bf215546Sopenharmony_ci hex |= ((uint64_t) I->vecsize << 28); 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci if (info.register_format) 684bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_register_format(I)) << 24; 685bf215546Sopenharmony_ci 686bf215546Sopenharmony_ci switch (I->op) { 687bf215546Sopenharmony_ci case BI_OPCODE_LOAD_I8: 688bf215546Sopenharmony_ci case BI_OPCODE_LOAD_I16: 689bf215546Sopenharmony_ci case BI_OPCODE_LOAD_I24: 690bf215546Sopenharmony_ci case BI_OPCODE_LOAD_I32: 691bf215546Sopenharmony_ci case BI_OPCODE_LOAD_I48: 692bf215546Sopenharmony_ci case BI_OPCODE_LOAD_I64: 693bf215546Sopenharmony_ci case BI_OPCODE_LOAD_I96: 694bf215546Sopenharmony_ci case BI_OPCODE_LOAD_I128: 695bf215546Sopenharmony_ci hex |= va_pack_load(I, false); 696bf215546Sopenharmony_ci break; 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci case BI_OPCODE_LD_BUFFER_I8: 699bf215546Sopenharmony_ci case BI_OPCODE_LD_BUFFER_I16: 700bf215546Sopenharmony_ci case BI_OPCODE_LD_BUFFER_I24: 701bf215546Sopenharmony_ci case BI_OPCODE_LD_BUFFER_I32: 702bf215546Sopenharmony_ci case BI_OPCODE_LD_BUFFER_I48: 703bf215546Sopenharmony_ci case BI_OPCODE_LD_BUFFER_I64: 704bf215546Sopenharmony_ci case BI_OPCODE_LD_BUFFER_I96: 705bf215546Sopenharmony_ci case BI_OPCODE_LD_BUFFER_I128: 706bf215546Sopenharmony_ci hex |= va_pack_load(I, true); 707bf215546Sopenharmony_ci break; 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci case BI_OPCODE_STORE_I8: 710bf215546Sopenharmony_ci case BI_OPCODE_STORE_I16: 711bf215546Sopenharmony_ci case BI_OPCODE_STORE_I24: 712bf215546Sopenharmony_ci case BI_OPCODE_STORE_I32: 713bf215546Sopenharmony_ci case BI_OPCODE_STORE_I48: 714bf215546Sopenharmony_ci case BI_OPCODE_STORE_I64: 715bf215546Sopenharmony_ci case BI_OPCODE_STORE_I96: 716bf215546Sopenharmony_ci case BI_OPCODE_STORE_I128: 717bf215546Sopenharmony_ci hex |= va_pack_store(I); 718bf215546Sopenharmony_ci break; 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci case BI_OPCODE_ATOM1_RETURN_I32: 721bf215546Sopenharmony_ci /* Permit omitting the destination for plain ATOM1 */ 722bf215546Sopenharmony_ci if (!bi_count_write_registers(I, 0)) { 723bf215546Sopenharmony_ci hex |= (0x40ull << 40); // fake read 724bf215546Sopenharmony_ci } 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci /* 64-bit source */ 727bf215546Sopenharmony_ci va_validate_register_pair(I, 0); 728bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_src(I, 0) << 0; 729bf215546Sopenharmony_ci hex |= va_pack_byte_offset_8(I); 730bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_atom_opc_1(I)) << 22; 731bf215546Sopenharmony_ci break; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci case BI_OPCODE_ATOM_I32: 734bf215546Sopenharmony_ci case BI_OPCODE_ATOM_RETURN_I32: 735bf215546Sopenharmony_ci /* 64-bit source */ 736bf215546Sopenharmony_ci va_validate_register_pair(I, 1); 737bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_src(I, 1) << 0; 738bf215546Sopenharmony_ci hex |= va_pack_byte_offset_8(I); 739bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_atom_opc(I)) << 22; 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci if (I->op == BI_OPCODE_ATOM_RETURN_I32) 742bf215546Sopenharmony_ci hex |= (0xc0ull << 40); // flags 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci if (I->atom_opc == BI_ATOM_OPC_ACMPXCHG) 745bf215546Sopenharmony_ci hex |= (1 << 26); /* .compare */ 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci break; 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci case BI_OPCODE_ST_CVT: 750bf215546Sopenharmony_ci /* Staging read */ 751bf215546Sopenharmony_ci hex |= va_pack_store(I); 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci /* Conversion descriptor */ 754bf215546Sopenharmony_ci hex |= (uint64_t) va_pack_src(I, 3) << 16; 755bf215546Sopenharmony_ci break; 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci case BI_OPCODE_BLEND: 758bf215546Sopenharmony_ci { 759bf215546Sopenharmony_ci /* Source 0 - Blend descriptor (64-bit) */ 760bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_src(I, 2)) << 0; 761bf215546Sopenharmony_ci va_validate_register_pair(I, 2); 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci /* Target */ 764bf215546Sopenharmony_ci if (I->branch_offset & 0x7) invalid_instruction(I, "unaligned branch"); 765bf215546Sopenharmony_ci hex |= ((I->branch_offset >> 3) << 8); 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci /* Source 2 - coverage mask */ 768bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_reg(I, I->src[1])) << 16; 769bf215546Sopenharmony_ci 770bf215546Sopenharmony_ci /* Vector size */ 771bf215546Sopenharmony_ci unsigned vecsize = 4; 772bf215546Sopenharmony_ci hex |= ((uint64_t) (vecsize - 1) << 28); 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci break; 775bf215546Sopenharmony_ci } 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci case BI_OPCODE_TEX_SINGLE: 778bf215546Sopenharmony_ci case BI_OPCODE_TEX_FETCH: 779bf215546Sopenharmony_ci case BI_OPCODE_TEX_GATHER: 780bf215546Sopenharmony_ci { 781bf215546Sopenharmony_ci /* Image to read from */ 782bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_src(I, 1)) << 0; 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci if (I->op == BI_OPCODE_TEX_FETCH && I->shadow) 785bf215546Sopenharmony_ci invalid_instruction(I, "TEX_FETCH does not support .shadow"); 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci if (I->array_enable) hex |= (1ull << 10); 788bf215546Sopenharmony_ci if (I->texel_offset) hex |= (1ull << 11); 789bf215546Sopenharmony_ci if (I->shadow) hex |= (1ull << 12); 790bf215546Sopenharmony_ci if (I->skip) hex |= (1ull << 39); 791bf215546Sopenharmony_ci if (!bi_is_regfmt_16(I->register_format)) hex |= (1ull << 46); 792bf215546Sopenharmony_ci 793bf215546Sopenharmony_ci if (I->op == BI_OPCODE_TEX_SINGLE) 794bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_lod_mode(I)) << 13; 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci if (I->op == BI_OPCODE_TEX_GATHER) { 797bf215546Sopenharmony_ci if (I->integer_coordinates) hex |= (1 << 13); 798bf215546Sopenharmony_ci hex |= ((uint64_t) I->fetch_component) << 14; 799bf215546Sopenharmony_ci } 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci hex |= (VA_WRITE_MASK_RGBA << 22); 802bf215546Sopenharmony_ci hex |= ((uint64_t) va_pack_register_type(I)) << 26; 803bf215546Sopenharmony_ci hex |= ((uint64_t) I->dimension) << 28; 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci break; 806bf215546Sopenharmony_ci } 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci default: 809bf215546Sopenharmony_ci if (!info.exact && I->op != BI_OPCODE_NOP) 810bf215546Sopenharmony_ci invalid_instruction(I, "opcode"); 811bf215546Sopenharmony_ci 812bf215546Sopenharmony_ci hex |= va_pack_alu(I); 813bf215546Sopenharmony_ci break; 814bf215546Sopenharmony_ci } 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci return hex; 817bf215546Sopenharmony_ci} 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_cistatic unsigned 820bf215546Sopenharmony_civa_instructions_in_block(bi_block *block) 821bf215546Sopenharmony_ci{ 822bf215546Sopenharmony_ci unsigned offset = 0; 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci bi_foreach_instr_in_block(block, _) { 825bf215546Sopenharmony_ci offset++; 826bf215546Sopenharmony_ci } 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci return offset; 829bf215546Sopenharmony_ci} 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci/* Calculate branch_offset from a branch_target for a direct relative branch */ 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_cistatic void 834bf215546Sopenharmony_civa_lower_branch_target(bi_context *ctx, bi_block *start, bi_instr *I) 835bf215546Sopenharmony_ci{ 836bf215546Sopenharmony_ci /* Precondition: unlowered relative branch */ 837bf215546Sopenharmony_ci bi_block *target = I->branch_target; 838bf215546Sopenharmony_ci assert(target != NULL); 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci /* Signed since we might jump backwards */ 841bf215546Sopenharmony_ci signed offset = 0; 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci /* Determine if the target block is strictly greater in source order */ 844bf215546Sopenharmony_ci bool forwards = target->index > start->index; 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_ci if (forwards) { 847bf215546Sopenharmony_ci /* We have to jump through this block */ 848bf215546Sopenharmony_ci bi_foreach_instr_in_block_from(start, _, I) { 849bf215546Sopenharmony_ci offset++; 850bf215546Sopenharmony_ci } 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci /* We then need to jump over every following block until the target */ 853bf215546Sopenharmony_ci bi_foreach_block_from(ctx, start, blk) { 854bf215546Sopenharmony_ci /* End just before the target */ 855bf215546Sopenharmony_ci if (blk == target) 856bf215546Sopenharmony_ci break; 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci /* Count other blocks */ 859bf215546Sopenharmony_ci if (blk != start) 860bf215546Sopenharmony_ci offset += va_instructions_in_block(blk); 861bf215546Sopenharmony_ci } 862bf215546Sopenharmony_ci } else { 863bf215546Sopenharmony_ci /* Jump through the beginning of this block */ 864bf215546Sopenharmony_ci bi_foreach_instr_in_block_from_rev(start, ins, I) { 865bf215546Sopenharmony_ci if (ins != I) 866bf215546Sopenharmony_ci offset--; 867bf215546Sopenharmony_ci } 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci /* Jump over preceding blocks up to and including the target to get to 870bf215546Sopenharmony_ci * the beginning of the target */ 871bf215546Sopenharmony_ci bi_foreach_block_from_rev(ctx, start, blk) { 872bf215546Sopenharmony_ci if (blk == start) 873bf215546Sopenharmony_ci continue; 874bf215546Sopenharmony_ci 875bf215546Sopenharmony_ci offset -= va_instructions_in_block(blk); 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci /* End just after the target */ 878bf215546Sopenharmony_ci if (blk == target) 879bf215546Sopenharmony_ci break; 880bf215546Sopenharmony_ci } 881bf215546Sopenharmony_ci } 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci /* Offset is relative to the next instruction, so bias */ 884bf215546Sopenharmony_ci offset--; 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci /* Update the instruction */ 887bf215546Sopenharmony_ci I->branch_offset = offset; 888bf215546Sopenharmony_ci} 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci/* 891bf215546Sopenharmony_ci * Late lowering to insert blend shader calls after BLEND instructions. Required 892bf215546Sopenharmony_ci * to support blend shaders, so this pass may be omitted if it is known that 893bf215546Sopenharmony_ci * blend shaders are never used. 894bf215546Sopenharmony_ci * 895bf215546Sopenharmony_ci * This lowering runs late because it introduces control flow changes without 896bf215546Sopenharmony_ci * modifying the control flow graph. It hardcodes registers, meaning running 897bf215546Sopenharmony_ci * after RA makes sense. Finally, it hardcodes a manually sized instruction 898bf215546Sopenharmony_ci * sequence, requiring it to run after scheduling. 899bf215546Sopenharmony_ci * 900bf215546Sopenharmony_ci * As it is Valhall specific, running it as a pre-pack lowering is sensible. 901bf215546Sopenharmony_ci */ 902bf215546Sopenharmony_cistatic void 903bf215546Sopenharmony_civa_lower_blend(bi_context *ctx) 904bf215546Sopenharmony_ci{ 905bf215546Sopenharmony_ci /* Link register (ABI between fragment and blend shaders) */ 906bf215546Sopenharmony_ci bi_index lr = bi_register(48); 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci /* Program counter for *next* instruction */ 909bf215546Sopenharmony_ci bi_index pc = bi_fau(BIR_FAU_PROGRAM_COUNTER, false); 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci bi_foreach_instr_global_safe(ctx, I) { 912bf215546Sopenharmony_ci if (I->op != BI_OPCODE_BLEND) 913bf215546Sopenharmony_ci continue; 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_after_instr(I)); 916bf215546Sopenharmony_ci 917bf215546Sopenharmony_ci unsigned prolog_length = 2 * 8; 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci if (I->flow == VA_FLOW_END) 920bf215546Sopenharmony_ci bi_iadd_imm_i32_to(&b, lr, va_zero_lut(), 0); 921bf215546Sopenharmony_ci else 922bf215546Sopenharmony_ci bi_iadd_imm_i32_to(&b, lr, pc, prolog_length - 8); 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci bi_branchzi(&b, va_zero_lut(), I->src[3], BI_CMPF_EQ); 925bf215546Sopenharmony_ci 926bf215546Sopenharmony_ci /* For fixed function: skip the prologue, or return */ 927bf215546Sopenharmony_ci if (I->flow != VA_FLOW_END) 928bf215546Sopenharmony_ci I->branch_offset = prolog_length; 929bf215546Sopenharmony_ci } 930bf215546Sopenharmony_ci} 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_civoid 933bf215546Sopenharmony_cibi_pack_valhall(bi_context *ctx, struct util_dynarray *emission) 934bf215546Sopenharmony_ci{ 935bf215546Sopenharmony_ci unsigned orig_size = emission->size; 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci va_validate(stderr, ctx); 938bf215546Sopenharmony_ci 939bf215546Sopenharmony_ci /* Late lowering */ 940bf215546Sopenharmony_ci if (ctx->stage == MESA_SHADER_FRAGMENT && !ctx->inputs->is_blend) 941bf215546Sopenharmony_ci va_lower_blend(ctx); 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci bi_foreach_block(ctx, block) { 944bf215546Sopenharmony_ci bi_foreach_instr_in_block(block, I) { 945bf215546Sopenharmony_ci if (I->op == BI_OPCODE_BRANCHZ_I16) 946bf215546Sopenharmony_ci va_lower_branch_target(ctx, block, I); 947bf215546Sopenharmony_ci 948bf215546Sopenharmony_ci uint64_t hex = va_pack_instr(I); 949bf215546Sopenharmony_ci util_dynarray_append(emission, uint64_t, hex); 950bf215546Sopenharmony_ci } 951bf215546Sopenharmony_ci } 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci /* Pad with zeroes, but keep empty programs empty so they may be omitted 954bf215546Sopenharmony_ci * altogether. Failing to do this would result in a program containing only 955bf215546Sopenharmony_ci * zeroes, which is invalid and will raise an encoding fault. 956bf215546Sopenharmony_ci * 957bf215546Sopenharmony_ci * Pad an extra 16 byte (one instruction) to separate primary and secondary 958bf215546Sopenharmony_ci * shader disassembles. This is not strictly necessary, but it's a good 959bf215546Sopenharmony_ci * practice. 128 bytes is the optimal program alignment on Trym, so pad 960bf215546Sopenharmony_ci * secondary shaders up to 128 bytes. This may help the instruction cache. 961bf215546Sopenharmony_ci */ 962bf215546Sopenharmony_ci if (orig_size != emission->size) { 963bf215546Sopenharmony_ci unsigned aligned = ALIGN_POT(emission->size + 16, 128); 964bf215546Sopenharmony_ci unsigned count = aligned - emission->size; 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci memset(util_dynarray_grow(emission, uint8_t, count), 0, count); 967bf215546Sopenharmony_ci } 968bf215546Sopenharmony_ci} 969