1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Intel Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include <math.h> 25bf215546Sopenharmony_ci#include "vtn_private.h" 26bf215546Sopenharmony_ci#include "spirv_info.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci/* 29bf215546Sopenharmony_ci * Normally, column vectors in SPIR-V correspond to a single NIR SSA 30bf215546Sopenharmony_ci * definition. But for matrix multiplies, we want to do one routine for 31bf215546Sopenharmony_ci * multiplying a matrix by a matrix and then pretend that vectors are matrices 32bf215546Sopenharmony_ci * with one column. So we "wrap" these things, and unwrap the result before we 33bf215546Sopenharmony_ci * send it off. 34bf215546Sopenharmony_ci */ 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_cistatic struct vtn_ssa_value * 37bf215546Sopenharmony_ciwrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) 38bf215546Sopenharmony_ci{ 39bf215546Sopenharmony_ci if (val == NULL) 40bf215546Sopenharmony_ci return NULL; 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci if (glsl_type_is_matrix(val->type)) 43bf215546Sopenharmony_ci return val; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); 46bf215546Sopenharmony_ci dest->type = glsl_get_bare_type(val->type); 47bf215546Sopenharmony_ci dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); 48bf215546Sopenharmony_ci dest->elems[0] = val; 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci return dest; 51bf215546Sopenharmony_ci} 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_cistatic struct vtn_ssa_value * 54bf215546Sopenharmony_ciunwrap_matrix(struct vtn_ssa_value *val) 55bf215546Sopenharmony_ci{ 56bf215546Sopenharmony_ci if (glsl_type_is_matrix(val->type)) 57bf215546Sopenharmony_ci return val; 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci return val->elems[0]; 60bf215546Sopenharmony_ci} 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_cistatic struct vtn_ssa_value * 63bf215546Sopenharmony_cimatrix_multiply(struct vtn_builder *b, 64bf215546Sopenharmony_ci struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) 65bf215546Sopenharmony_ci{ 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci struct vtn_ssa_value *src0 = wrap_matrix(b, _src0); 68bf215546Sopenharmony_ci struct vtn_ssa_value *src1 = wrap_matrix(b, _src1); 69bf215546Sopenharmony_ci struct vtn_ssa_value *src0_transpose = wrap_matrix(b, _src0->transposed); 70bf215546Sopenharmony_ci struct vtn_ssa_value *src1_transpose = wrap_matrix(b, _src1->transposed); 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_ci unsigned src0_rows = glsl_get_vector_elements(src0->type); 73bf215546Sopenharmony_ci unsigned src0_columns = glsl_get_matrix_columns(src0->type); 74bf215546Sopenharmony_ci unsigned src1_columns = glsl_get_matrix_columns(src1->type); 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci const struct glsl_type *dest_type; 77bf215546Sopenharmony_ci if (src1_columns > 1) { 78bf215546Sopenharmony_ci dest_type = glsl_matrix_type(glsl_get_base_type(src0->type), 79bf215546Sopenharmony_ci src0_rows, src1_columns); 80bf215546Sopenharmony_ci } else { 81bf215546Sopenharmony_ci dest_type = glsl_vector_type(glsl_get_base_type(src0->type), src0_rows); 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci dest = wrap_matrix(b, dest); 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci bool transpose_result = false; 88bf215546Sopenharmony_ci if (src0_transpose && src1_transpose) { 89bf215546Sopenharmony_ci /* transpose(A) * transpose(B) = transpose(B * A) */ 90bf215546Sopenharmony_ci src1 = src0_transpose; 91bf215546Sopenharmony_ci src0 = src1_transpose; 92bf215546Sopenharmony_ci src0_transpose = NULL; 93bf215546Sopenharmony_ci src1_transpose = NULL; 94bf215546Sopenharmony_ci transpose_result = true; 95bf215546Sopenharmony_ci } 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci if (src0_transpose && !src1_transpose && 98bf215546Sopenharmony_ci glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { 99bf215546Sopenharmony_ci /* We already have the rows of src0 and the columns of src1 available, 100bf215546Sopenharmony_ci * so we can just take the dot product of each row with each column to 101bf215546Sopenharmony_ci * get the result. 102bf215546Sopenharmony_ci */ 103bf215546Sopenharmony_ci 104bf215546Sopenharmony_ci for (unsigned i = 0; i < src1_columns; i++) { 105bf215546Sopenharmony_ci nir_ssa_def *vec_src[4]; 106bf215546Sopenharmony_ci for (unsigned j = 0; j < src0_rows; j++) { 107bf215546Sopenharmony_ci vec_src[j] = nir_fdot(&b->nb, src0_transpose->elems[j]->def, 108bf215546Sopenharmony_ci src1->elems[i]->def); 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci dest->elems[i]->def = nir_vec(&b->nb, vec_src, src0_rows); 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci } else { 113bf215546Sopenharmony_ci /* We don't handle the case where src1 is transposed but not src0, since 114bf215546Sopenharmony_ci * the general case only uses individual components of src1 so the 115bf215546Sopenharmony_ci * optimizer should chew through the transpose we emitted for src1. 116bf215546Sopenharmony_ci */ 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci for (unsigned i = 0; i < src1_columns; i++) { 119bf215546Sopenharmony_ci /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ 120bf215546Sopenharmony_ci dest->elems[i]->def = 121bf215546Sopenharmony_ci nir_fmul(&b->nb, src0->elems[src0_columns - 1]->def, 122bf215546Sopenharmony_ci nir_channel(&b->nb, src1->elems[i]->def, src0_columns - 1)); 123bf215546Sopenharmony_ci for (int j = src0_columns - 2; j >= 0; j--) { 124bf215546Sopenharmony_ci dest->elems[i]->def = 125bf215546Sopenharmony_ci nir_ffma(&b->nb, src0->elems[j]->def, 126bf215546Sopenharmony_ci nir_channel(&b->nb, src1->elems[i]->def, j), 127bf215546Sopenharmony_ci dest->elems[i]->def); 128bf215546Sopenharmony_ci } 129bf215546Sopenharmony_ci } 130bf215546Sopenharmony_ci } 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci dest = unwrap_matrix(dest); 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci if (transpose_result) 135bf215546Sopenharmony_ci dest = vtn_ssa_transpose(b, dest); 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci return dest; 138bf215546Sopenharmony_ci} 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_cistatic struct vtn_ssa_value * 141bf215546Sopenharmony_cimat_times_scalar(struct vtn_builder *b, 142bf215546Sopenharmony_ci struct vtn_ssa_value *mat, 143bf215546Sopenharmony_ci nir_ssa_def *scalar) 144bf215546Sopenharmony_ci{ 145bf215546Sopenharmony_ci struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); 146bf215546Sopenharmony_ci for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { 147bf215546Sopenharmony_ci if (glsl_base_type_is_integer(glsl_get_base_type(mat->type))) 148bf215546Sopenharmony_ci dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); 149bf215546Sopenharmony_ci else 150bf215546Sopenharmony_ci dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci return dest; 154bf215546Sopenharmony_ci} 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_cinir_ssa_def * 157bf215546Sopenharmony_civtn_mediump_downconvert(struct vtn_builder *b, enum glsl_base_type base_type, nir_ssa_def *def) 158bf215546Sopenharmony_ci{ 159bf215546Sopenharmony_ci if (def->bit_size == 16) 160bf215546Sopenharmony_ci return def; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci switch (base_type) { 163bf215546Sopenharmony_ci case GLSL_TYPE_FLOAT: 164bf215546Sopenharmony_ci return nir_f2fmp(&b->nb, def); 165bf215546Sopenharmony_ci case GLSL_TYPE_INT: 166bf215546Sopenharmony_ci case GLSL_TYPE_UINT: 167bf215546Sopenharmony_ci return nir_i2imp(&b->nb, def); 168bf215546Sopenharmony_ci /* Workaround for 3DMark Wild Life which has RelaxedPrecision on 169bf215546Sopenharmony_ci * OpLogical* operations (which is forbidden by spec). 170bf215546Sopenharmony_ci */ 171bf215546Sopenharmony_ci case GLSL_TYPE_BOOL: 172bf215546Sopenharmony_ci return def; 173bf215546Sopenharmony_ci default: 174bf215546Sopenharmony_ci unreachable("bad relaxed precision input type"); 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci} 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_cistruct vtn_ssa_value * 179bf215546Sopenharmony_civtn_mediump_downconvert_value(struct vtn_builder *b, struct vtn_ssa_value *src) 180bf215546Sopenharmony_ci{ 181bf215546Sopenharmony_ci if (!src) 182bf215546Sopenharmony_ci return src; 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci struct vtn_ssa_value *srcmp = vtn_create_ssa_value(b, src->type); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci if (src->transposed) { 187bf215546Sopenharmony_ci srcmp->transposed = vtn_mediump_downconvert_value(b, src->transposed); 188bf215546Sopenharmony_ci } else { 189bf215546Sopenharmony_ci enum glsl_base_type base_type = glsl_get_base_type(src->type); 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci if (glsl_type_is_vector_or_scalar(src->type)) { 192bf215546Sopenharmony_ci srcmp->def = vtn_mediump_downconvert(b, base_type, src->def); 193bf215546Sopenharmony_ci } else { 194bf215546Sopenharmony_ci assert(glsl_get_base_type(src->type) == GLSL_TYPE_FLOAT); 195bf215546Sopenharmony_ci for (int i = 0; i < glsl_get_matrix_columns(src->type); i++) 196bf215546Sopenharmony_ci srcmp->elems[i]->def = vtn_mediump_downconvert(b, base_type, src->elems[i]->def); 197bf215546Sopenharmony_ci } 198bf215546Sopenharmony_ci } 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci return srcmp; 201bf215546Sopenharmony_ci} 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_cistatic struct vtn_ssa_value * 204bf215546Sopenharmony_civtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, 205bf215546Sopenharmony_ci struct vtn_ssa_value *src0, struct vtn_ssa_value *src1) 206bf215546Sopenharmony_ci{ 207bf215546Sopenharmony_ci switch (opcode) { 208bf215546Sopenharmony_ci case SpvOpFNegate: { 209bf215546Sopenharmony_ci struct vtn_ssa_value *dest = vtn_create_ssa_value(b, src0->type); 210bf215546Sopenharmony_ci unsigned cols = glsl_get_matrix_columns(src0->type); 211bf215546Sopenharmony_ci for (unsigned i = 0; i < cols; i++) 212bf215546Sopenharmony_ci dest->elems[i]->def = nir_fneg(&b->nb, src0->elems[i]->def); 213bf215546Sopenharmony_ci return dest; 214bf215546Sopenharmony_ci } 215bf215546Sopenharmony_ci 216bf215546Sopenharmony_ci case SpvOpFAdd: { 217bf215546Sopenharmony_ci struct vtn_ssa_value *dest = vtn_create_ssa_value(b, src0->type); 218bf215546Sopenharmony_ci unsigned cols = glsl_get_matrix_columns(src0->type); 219bf215546Sopenharmony_ci for (unsigned i = 0; i < cols; i++) 220bf215546Sopenharmony_ci dest->elems[i]->def = 221bf215546Sopenharmony_ci nir_fadd(&b->nb, src0->elems[i]->def, src1->elems[i]->def); 222bf215546Sopenharmony_ci return dest; 223bf215546Sopenharmony_ci } 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci case SpvOpFSub: { 226bf215546Sopenharmony_ci struct vtn_ssa_value *dest = vtn_create_ssa_value(b, src0->type); 227bf215546Sopenharmony_ci unsigned cols = glsl_get_matrix_columns(src0->type); 228bf215546Sopenharmony_ci for (unsigned i = 0; i < cols; i++) 229bf215546Sopenharmony_ci dest->elems[i]->def = 230bf215546Sopenharmony_ci nir_fsub(&b->nb, src0->elems[i]->def, src1->elems[i]->def); 231bf215546Sopenharmony_ci return dest; 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci case SpvOpTranspose: 235bf215546Sopenharmony_ci return vtn_ssa_transpose(b, src0); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci case SpvOpMatrixTimesScalar: 238bf215546Sopenharmony_ci if (src0->transposed) { 239bf215546Sopenharmony_ci return vtn_ssa_transpose(b, mat_times_scalar(b, src0->transposed, 240bf215546Sopenharmony_ci src1->def)); 241bf215546Sopenharmony_ci } else { 242bf215546Sopenharmony_ci return mat_times_scalar(b, src0, src1->def); 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci break; 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci case SpvOpVectorTimesMatrix: 247bf215546Sopenharmony_ci case SpvOpMatrixTimesVector: 248bf215546Sopenharmony_ci case SpvOpMatrixTimesMatrix: 249bf215546Sopenharmony_ci if (opcode == SpvOpVectorTimesMatrix) { 250bf215546Sopenharmony_ci return matrix_multiply(b, vtn_ssa_transpose(b, src1), src0); 251bf215546Sopenharmony_ci } else { 252bf215546Sopenharmony_ci return matrix_multiply(b, src0, src1); 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci break; 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci default: vtn_fail_with_opcode("unknown matrix opcode", opcode); 257bf215546Sopenharmony_ci } 258bf215546Sopenharmony_ci} 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_cistatic nir_alu_type 261bf215546Sopenharmony_ciconvert_op_src_type(SpvOp opcode) 262bf215546Sopenharmony_ci{ 263bf215546Sopenharmony_ci switch (opcode) { 264bf215546Sopenharmony_ci case SpvOpFConvert: 265bf215546Sopenharmony_ci case SpvOpConvertFToS: 266bf215546Sopenharmony_ci case SpvOpConvertFToU: 267bf215546Sopenharmony_ci return nir_type_float; 268bf215546Sopenharmony_ci case SpvOpSConvert: 269bf215546Sopenharmony_ci case SpvOpConvertSToF: 270bf215546Sopenharmony_ci case SpvOpSatConvertSToU: 271bf215546Sopenharmony_ci return nir_type_int; 272bf215546Sopenharmony_ci case SpvOpUConvert: 273bf215546Sopenharmony_ci case SpvOpConvertUToF: 274bf215546Sopenharmony_ci case SpvOpSatConvertUToS: 275bf215546Sopenharmony_ci return nir_type_uint; 276bf215546Sopenharmony_ci default: 277bf215546Sopenharmony_ci unreachable("Unhandled conversion op"); 278bf215546Sopenharmony_ci } 279bf215546Sopenharmony_ci} 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_cistatic nir_alu_type 282bf215546Sopenharmony_ciconvert_op_dst_type(SpvOp opcode) 283bf215546Sopenharmony_ci{ 284bf215546Sopenharmony_ci switch (opcode) { 285bf215546Sopenharmony_ci case SpvOpFConvert: 286bf215546Sopenharmony_ci case SpvOpConvertSToF: 287bf215546Sopenharmony_ci case SpvOpConvertUToF: 288bf215546Sopenharmony_ci return nir_type_float; 289bf215546Sopenharmony_ci case SpvOpSConvert: 290bf215546Sopenharmony_ci case SpvOpConvertFToS: 291bf215546Sopenharmony_ci case SpvOpSatConvertUToS: 292bf215546Sopenharmony_ci return nir_type_int; 293bf215546Sopenharmony_ci case SpvOpUConvert: 294bf215546Sopenharmony_ci case SpvOpConvertFToU: 295bf215546Sopenharmony_ci case SpvOpSatConvertSToU: 296bf215546Sopenharmony_ci return nir_type_uint; 297bf215546Sopenharmony_ci default: 298bf215546Sopenharmony_ci unreachable("Unhandled conversion op"); 299bf215546Sopenharmony_ci } 300bf215546Sopenharmony_ci} 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_cinir_op 303bf215546Sopenharmony_civtn_nir_alu_op_for_spirv_opcode(struct vtn_builder *b, 304bf215546Sopenharmony_ci SpvOp opcode, bool *swap, bool *exact, 305bf215546Sopenharmony_ci unsigned src_bit_size, unsigned dst_bit_size) 306bf215546Sopenharmony_ci{ 307bf215546Sopenharmony_ci /* Indicates that the first two arguments should be swapped. This is 308bf215546Sopenharmony_ci * used for implementing greater-than and less-than-or-equal. 309bf215546Sopenharmony_ci */ 310bf215546Sopenharmony_ci *swap = false; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci *exact = false; 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci switch (opcode) { 315bf215546Sopenharmony_ci case SpvOpSNegate: return nir_op_ineg; 316bf215546Sopenharmony_ci case SpvOpFNegate: return nir_op_fneg; 317bf215546Sopenharmony_ci case SpvOpNot: return nir_op_inot; 318bf215546Sopenharmony_ci case SpvOpIAdd: return nir_op_iadd; 319bf215546Sopenharmony_ci case SpvOpFAdd: return nir_op_fadd; 320bf215546Sopenharmony_ci case SpvOpISub: return nir_op_isub; 321bf215546Sopenharmony_ci case SpvOpFSub: return nir_op_fsub; 322bf215546Sopenharmony_ci case SpvOpIMul: return nir_op_imul; 323bf215546Sopenharmony_ci case SpvOpFMul: return nir_op_fmul; 324bf215546Sopenharmony_ci case SpvOpUDiv: return nir_op_udiv; 325bf215546Sopenharmony_ci case SpvOpSDiv: return nir_op_idiv; 326bf215546Sopenharmony_ci case SpvOpFDiv: return nir_op_fdiv; 327bf215546Sopenharmony_ci case SpvOpUMod: return nir_op_umod; 328bf215546Sopenharmony_ci case SpvOpSMod: return nir_op_imod; 329bf215546Sopenharmony_ci case SpvOpFMod: return nir_op_fmod; 330bf215546Sopenharmony_ci case SpvOpSRem: return nir_op_irem; 331bf215546Sopenharmony_ci case SpvOpFRem: return nir_op_frem; 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci case SpvOpShiftRightLogical: return nir_op_ushr; 334bf215546Sopenharmony_ci case SpvOpShiftRightArithmetic: return nir_op_ishr; 335bf215546Sopenharmony_ci case SpvOpShiftLeftLogical: return nir_op_ishl; 336bf215546Sopenharmony_ci case SpvOpLogicalOr: return nir_op_ior; 337bf215546Sopenharmony_ci case SpvOpLogicalEqual: return nir_op_ieq; 338bf215546Sopenharmony_ci case SpvOpLogicalNotEqual: return nir_op_ine; 339bf215546Sopenharmony_ci case SpvOpLogicalAnd: return nir_op_iand; 340bf215546Sopenharmony_ci case SpvOpLogicalNot: return nir_op_inot; 341bf215546Sopenharmony_ci case SpvOpBitwiseOr: return nir_op_ior; 342bf215546Sopenharmony_ci case SpvOpBitwiseXor: return nir_op_ixor; 343bf215546Sopenharmony_ci case SpvOpBitwiseAnd: return nir_op_iand; 344bf215546Sopenharmony_ci case SpvOpSelect: return nir_op_bcsel; 345bf215546Sopenharmony_ci case SpvOpIEqual: return nir_op_ieq; 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci case SpvOpBitFieldInsert: return nir_op_bitfield_insert; 348bf215546Sopenharmony_ci case SpvOpBitFieldSExtract: return nir_op_ibitfield_extract; 349bf215546Sopenharmony_ci case SpvOpBitFieldUExtract: return nir_op_ubitfield_extract; 350bf215546Sopenharmony_ci case SpvOpBitReverse: return nir_op_bitfield_reverse; 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci case SpvOpUCountLeadingZerosINTEL: return nir_op_uclz; 353bf215546Sopenharmony_ci /* SpvOpUCountTrailingZerosINTEL is handled elsewhere. */ 354bf215546Sopenharmony_ci case SpvOpAbsISubINTEL: return nir_op_uabs_isub; 355bf215546Sopenharmony_ci case SpvOpAbsUSubINTEL: return nir_op_uabs_usub; 356bf215546Sopenharmony_ci case SpvOpIAddSatINTEL: return nir_op_iadd_sat; 357bf215546Sopenharmony_ci case SpvOpUAddSatINTEL: return nir_op_uadd_sat; 358bf215546Sopenharmony_ci case SpvOpIAverageINTEL: return nir_op_ihadd; 359bf215546Sopenharmony_ci case SpvOpUAverageINTEL: return nir_op_uhadd; 360bf215546Sopenharmony_ci case SpvOpIAverageRoundedINTEL: return nir_op_irhadd; 361bf215546Sopenharmony_ci case SpvOpUAverageRoundedINTEL: return nir_op_urhadd; 362bf215546Sopenharmony_ci case SpvOpISubSatINTEL: return nir_op_isub_sat; 363bf215546Sopenharmony_ci case SpvOpUSubSatINTEL: return nir_op_usub_sat; 364bf215546Sopenharmony_ci case SpvOpIMul32x16INTEL: return nir_op_imul_32x16; 365bf215546Sopenharmony_ci case SpvOpUMul32x16INTEL: return nir_op_umul_32x16; 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci /* The ordered / unordered operators need special implementation besides 368bf215546Sopenharmony_ci * the logical operator to use since they also need to check if operands are 369bf215546Sopenharmony_ci * ordered. 370bf215546Sopenharmony_ci */ 371bf215546Sopenharmony_ci case SpvOpFOrdEqual: *exact = true; return nir_op_feq; 372bf215546Sopenharmony_ci case SpvOpFUnordEqual: *exact = true; return nir_op_feq; 373bf215546Sopenharmony_ci case SpvOpINotEqual: return nir_op_ine; 374bf215546Sopenharmony_ci case SpvOpLessOrGreater: /* Deprecated, use OrdNotEqual */ 375bf215546Sopenharmony_ci case SpvOpFOrdNotEqual: *exact = true; return nir_op_fneu; 376bf215546Sopenharmony_ci case SpvOpFUnordNotEqual: *exact = true; return nir_op_fneu; 377bf215546Sopenharmony_ci case SpvOpULessThan: return nir_op_ult; 378bf215546Sopenharmony_ci case SpvOpSLessThan: return nir_op_ilt; 379bf215546Sopenharmony_ci case SpvOpFOrdLessThan: *exact = true; return nir_op_flt; 380bf215546Sopenharmony_ci case SpvOpFUnordLessThan: *exact = true; return nir_op_flt; 381bf215546Sopenharmony_ci case SpvOpUGreaterThan: *swap = true; return nir_op_ult; 382bf215546Sopenharmony_ci case SpvOpSGreaterThan: *swap = true; return nir_op_ilt; 383bf215546Sopenharmony_ci case SpvOpFOrdGreaterThan: *swap = true; *exact = true; return nir_op_flt; 384bf215546Sopenharmony_ci case SpvOpFUnordGreaterThan: *swap = true; *exact = true; return nir_op_flt; 385bf215546Sopenharmony_ci case SpvOpULessThanEqual: *swap = true; return nir_op_uge; 386bf215546Sopenharmony_ci case SpvOpSLessThanEqual: *swap = true; return nir_op_ige; 387bf215546Sopenharmony_ci case SpvOpFOrdLessThanEqual: *swap = true; *exact = true; return nir_op_fge; 388bf215546Sopenharmony_ci case SpvOpFUnordLessThanEqual: *swap = true; *exact = true; return nir_op_fge; 389bf215546Sopenharmony_ci case SpvOpUGreaterThanEqual: return nir_op_uge; 390bf215546Sopenharmony_ci case SpvOpSGreaterThanEqual: return nir_op_ige; 391bf215546Sopenharmony_ci case SpvOpFOrdGreaterThanEqual: *exact = true; return nir_op_fge; 392bf215546Sopenharmony_ci case SpvOpFUnordGreaterThanEqual: *exact = true; return nir_op_fge; 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci /* Conversions: */ 395bf215546Sopenharmony_ci case SpvOpQuantizeToF16: return nir_op_fquantize2f16; 396bf215546Sopenharmony_ci case SpvOpUConvert: 397bf215546Sopenharmony_ci case SpvOpConvertFToU: 398bf215546Sopenharmony_ci case SpvOpConvertFToS: 399bf215546Sopenharmony_ci case SpvOpConvertSToF: 400bf215546Sopenharmony_ci case SpvOpConvertUToF: 401bf215546Sopenharmony_ci case SpvOpSConvert: 402bf215546Sopenharmony_ci case SpvOpFConvert: { 403bf215546Sopenharmony_ci nir_alu_type src_type = convert_op_src_type(opcode) | src_bit_size; 404bf215546Sopenharmony_ci nir_alu_type dst_type = convert_op_dst_type(opcode) | dst_bit_size; 405bf215546Sopenharmony_ci return nir_type_conversion_op(src_type, dst_type, nir_rounding_mode_undef); 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_ci case SpvOpPtrCastToGeneric: return nir_op_mov; 409bf215546Sopenharmony_ci case SpvOpGenericCastToPtr: return nir_op_mov; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci /* Derivatives: */ 412bf215546Sopenharmony_ci case SpvOpDPdx: return nir_op_fddx; 413bf215546Sopenharmony_ci case SpvOpDPdy: return nir_op_fddy; 414bf215546Sopenharmony_ci case SpvOpDPdxFine: return nir_op_fddx_fine; 415bf215546Sopenharmony_ci case SpvOpDPdyFine: return nir_op_fddy_fine; 416bf215546Sopenharmony_ci case SpvOpDPdxCoarse: return nir_op_fddx_coarse; 417bf215546Sopenharmony_ci case SpvOpDPdyCoarse: return nir_op_fddy_coarse; 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci case SpvOpIsNormal: return nir_op_fisnormal; 420bf215546Sopenharmony_ci case SpvOpIsFinite: return nir_op_fisfinite; 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci default: 423bf215546Sopenharmony_ci vtn_fail("No NIR equivalent: %u", opcode); 424bf215546Sopenharmony_ci } 425bf215546Sopenharmony_ci} 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_cistatic void 428bf215546Sopenharmony_cihandle_no_contraction(struct vtn_builder *b, UNUSED struct vtn_value *val, 429bf215546Sopenharmony_ci UNUSED int member, const struct vtn_decoration *dec, 430bf215546Sopenharmony_ci UNUSED void *_void) 431bf215546Sopenharmony_ci{ 432bf215546Sopenharmony_ci vtn_assert(dec->scope == VTN_DEC_DECORATION); 433bf215546Sopenharmony_ci if (dec->decoration != SpvDecorationNoContraction) 434bf215546Sopenharmony_ci return; 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci b->nb.exact = true; 437bf215546Sopenharmony_ci} 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_civoid 440bf215546Sopenharmony_civtn_handle_no_contraction(struct vtn_builder *b, struct vtn_value *val) 441bf215546Sopenharmony_ci{ 442bf215546Sopenharmony_ci vtn_foreach_decoration(b, val, handle_no_contraction, NULL); 443bf215546Sopenharmony_ci} 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_cinir_rounding_mode 446bf215546Sopenharmony_civtn_rounding_mode_to_nir(struct vtn_builder *b, SpvFPRoundingMode mode) 447bf215546Sopenharmony_ci{ 448bf215546Sopenharmony_ci switch (mode) { 449bf215546Sopenharmony_ci case SpvFPRoundingModeRTE: 450bf215546Sopenharmony_ci return nir_rounding_mode_rtne; 451bf215546Sopenharmony_ci case SpvFPRoundingModeRTZ: 452bf215546Sopenharmony_ci return nir_rounding_mode_rtz; 453bf215546Sopenharmony_ci case SpvFPRoundingModeRTP: 454bf215546Sopenharmony_ci vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 455bf215546Sopenharmony_ci "FPRoundingModeRTP is only supported in kernels"); 456bf215546Sopenharmony_ci return nir_rounding_mode_ru; 457bf215546Sopenharmony_ci case SpvFPRoundingModeRTN: 458bf215546Sopenharmony_ci vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 459bf215546Sopenharmony_ci "FPRoundingModeRTN is only supported in kernels"); 460bf215546Sopenharmony_ci return nir_rounding_mode_rd; 461bf215546Sopenharmony_ci default: 462bf215546Sopenharmony_ci vtn_fail("Unsupported rounding mode: %s", 463bf215546Sopenharmony_ci spirv_fproundingmode_to_string(mode)); 464bf215546Sopenharmony_ci break; 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci} 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_cistruct conversion_opts { 469bf215546Sopenharmony_ci nir_rounding_mode rounding_mode; 470bf215546Sopenharmony_ci bool saturate; 471bf215546Sopenharmony_ci}; 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_cistatic void 474bf215546Sopenharmony_cihandle_conversion_opts(struct vtn_builder *b, UNUSED struct vtn_value *val, 475bf215546Sopenharmony_ci UNUSED int member, 476bf215546Sopenharmony_ci const struct vtn_decoration *dec, void *_opts) 477bf215546Sopenharmony_ci{ 478bf215546Sopenharmony_ci struct conversion_opts *opts = _opts; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci switch (dec->decoration) { 481bf215546Sopenharmony_ci case SpvDecorationFPRoundingMode: 482bf215546Sopenharmony_ci opts->rounding_mode = vtn_rounding_mode_to_nir(b, dec->operands[0]); 483bf215546Sopenharmony_ci break; 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci case SpvDecorationSaturatedConversion: 486bf215546Sopenharmony_ci vtn_fail_if(b->shader->info.stage != MESA_SHADER_KERNEL, 487bf215546Sopenharmony_ci "Saturated conversions are only allowed in kernels"); 488bf215546Sopenharmony_ci opts->saturate = true; 489bf215546Sopenharmony_ci break; 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci default: 492bf215546Sopenharmony_ci break; 493bf215546Sopenharmony_ci } 494bf215546Sopenharmony_ci} 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_cistatic void 497bf215546Sopenharmony_cihandle_no_wrap(UNUSED struct vtn_builder *b, UNUSED struct vtn_value *val, 498bf215546Sopenharmony_ci UNUSED int member, 499bf215546Sopenharmony_ci const struct vtn_decoration *dec, void *_alu) 500bf215546Sopenharmony_ci{ 501bf215546Sopenharmony_ci nir_alu_instr *alu = _alu; 502bf215546Sopenharmony_ci switch (dec->decoration) { 503bf215546Sopenharmony_ci case SpvDecorationNoSignedWrap: 504bf215546Sopenharmony_ci alu->no_signed_wrap = true; 505bf215546Sopenharmony_ci break; 506bf215546Sopenharmony_ci case SpvDecorationNoUnsignedWrap: 507bf215546Sopenharmony_ci alu->no_unsigned_wrap = true; 508bf215546Sopenharmony_ci break; 509bf215546Sopenharmony_ci default: 510bf215546Sopenharmony_ci /* Do nothing. */ 511bf215546Sopenharmony_ci break; 512bf215546Sopenharmony_ci } 513bf215546Sopenharmony_ci} 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_cistatic void 516bf215546Sopenharmony_civtn_value_is_relaxed_precision_cb(struct vtn_builder *b, 517bf215546Sopenharmony_ci struct vtn_value *val, int member, 518bf215546Sopenharmony_ci const struct vtn_decoration *dec, void *void_ctx) 519bf215546Sopenharmony_ci{ 520bf215546Sopenharmony_ci bool *relaxed_precision = void_ctx; 521bf215546Sopenharmony_ci switch (dec->decoration) { 522bf215546Sopenharmony_ci case SpvDecorationRelaxedPrecision: 523bf215546Sopenharmony_ci *relaxed_precision = true; 524bf215546Sopenharmony_ci break; 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci default: 527bf215546Sopenharmony_ci break; 528bf215546Sopenharmony_ci } 529bf215546Sopenharmony_ci} 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_cibool 532bf215546Sopenharmony_civtn_value_is_relaxed_precision(struct vtn_builder *b, struct vtn_value *val) 533bf215546Sopenharmony_ci{ 534bf215546Sopenharmony_ci bool result = false; 535bf215546Sopenharmony_ci vtn_foreach_decoration(b, val, 536bf215546Sopenharmony_ci vtn_value_is_relaxed_precision_cb, &result); 537bf215546Sopenharmony_ci return result; 538bf215546Sopenharmony_ci} 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_cistatic bool 541bf215546Sopenharmony_civtn_alu_op_mediump_16bit(struct vtn_builder *b, SpvOp opcode, struct vtn_value *dest_val) 542bf215546Sopenharmony_ci{ 543bf215546Sopenharmony_ci if (!b->options->mediump_16bit_alu || !vtn_value_is_relaxed_precision(b, dest_val)) 544bf215546Sopenharmony_ci return false; 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci switch (opcode) { 547bf215546Sopenharmony_ci case SpvOpDPdx: 548bf215546Sopenharmony_ci case SpvOpDPdy: 549bf215546Sopenharmony_ci case SpvOpDPdxFine: 550bf215546Sopenharmony_ci case SpvOpDPdyFine: 551bf215546Sopenharmony_ci case SpvOpDPdxCoarse: 552bf215546Sopenharmony_ci case SpvOpDPdyCoarse: 553bf215546Sopenharmony_ci case SpvOpFwidth: 554bf215546Sopenharmony_ci case SpvOpFwidthFine: 555bf215546Sopenharmony_ci case SpvOpFwidthCoarse: 556bf215546Sopenharmony_ci return b->options->mediump_16bit_derivatives; 557bf215546Sopenharmony_ci default: 558bf215546Sopenharmony_ci return true; 559bf215546Sopenharmony_ci } 560bf215546Sopenharmony_ci} 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_cistatic nir_ssa_def * 563bf215546Sopenharmony_civtn_mediump_upconvert(struct vtn_builder *b, enum glsl_base_type base_type, nir_ssa_def *def) 564bf215546Sopenharmony_ci{ 565bf215546Sopenharmony_ci if (def->bit_size != 16) 566bf215546Sopenharmony_ci return def; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci switch (base_type) { 569bf215546Sopenharmony_ci case GLSL_TYPE_FLOAT: 570bf215546Sopenharmony_ci return nir_f2f32(&b->nb, def); 571bf215546Sopenharmony_ci case GLSL_TYPE_INT: 572bf215546Sopenharmony_ci return nir_i2i32(&b->nb, def); 573bf215546Sopenharmony_ci case GLSL_TYPE_UINT: 574bf215546Sopenharmony_ci return nir_u2u32(&b->nb, def); 575bf215546Sopenharmony_ci default: 576bf215546Sopenharmony_ci unreachable("bad relaxed precision output type"); 577bf215546Sopenharmony_ci } 578bf215546Sopenharmony_ci} 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_civoid 581bf215546Sopenharmony_civtn_mediump_upconvert_value(struct vtn_builder *b, struct vtn_ssa_value *value) 582bf215546Sopenharmony_ci{ 583bf215546Sopenharmony_ci enum glsl_base_type base_type = glsl_get_base_type(value->type); 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci if (glsl_type_is_vector_or_scalar(value->type)) { 586bf215546Sopenharmony_ci value->def = vtn_mediump_upconvert(b, base_type, value->def); 587bf215546Sopenharmony_ci } else { 588bf215546Sopenharmony_ci for (int i = 0; i < glsl_get_matrix_columns(value->type); i++) 589bf215546Sopenharmony_ci value->elems[i]->def = vtn_mediump_upconvert(b, base_type, value->elems[i]->def); 590bf215546Sopenharmony_ci } 591bf215546Sopenharmony_ci} 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_civoid 594bf215546Sopenharmony_civtn_handle_alu(struct vtn_builder *b, SpvOp opcode, 595bf215546Sopenharmony_ci const uint32_t *w, unsigned count) 596bf215546Sopenharmony_ci{ 597bf215546Sopenharmony_ci struct vtn_value *dest_val = vtn_untyped_value(b, w[2]); 598bf215546Sopenharmony_ci const struct glsl_type *dest_type = vtn_get_type(b, w[1])->type; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci vtn_handle_no_contraction(b, dest_val); 601bf215546Sopenharmony_ci bool mediump_16bit = vtn_alu_op_mediump_16bit(b, opcode, dest_val); 602bf215546Sopenharmony_ci 603bf215546Sopenharmony_ci /* Collect the various SSA sources */ 604bf215546Sopenharmony_ci const unsigned num_inputs = count - 3; 605bf215546Sopenharmony_ci struct vtn_ssa_value *vtn_src[4] = { NULL, }; 606bf215546Sopenharmony_ci for (unsigned i = 0; i < num_inputs; i++) { 607bf215546Sopenharmony_ci vtn_src[i] = vtn_ssa_value(b, w[i + 3]); 608bf215546Sopenharmony_ci if (mediump_16bit) 609bf215546Sopenharmony_ci vtn_src[i] = vtn_mediump_downconvert_value(b, vtn_src[i]); 610bf215546Sopenharmony_ci } 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ci if (glsl_type_is_matrix(vtn_src[0]->type) || 613bf215546Sopenharmony_ci (num_inputs >= 2 && glsl_type_is_matrix(vtn_src[1]->type))) { 614bf215546Sopenharmony_ci struct vtn_ssa_value *dest = vtn_handle_matrix_alu(b, opcode, vtn_src[0], vtn_src[1]); 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci if (mediump_16bit) 617bf215546Sopenharmony_ci vtn_mediump_upconvert_value(b, dest); 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci vtn_push_ssa_value(b, w[2], dest); 620bf215546Sopenharmony_ci b->nb.exact = b->exact; 621bf215546Sopenharmony_ci return; 622bf215546Sopenharmony_ci } 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci struct vtn_ssa_value *dest = vtn_create_ssa_value(b, dest_type); 625bf215546Sopenharmony_ci nir_ssa_def *src[4] = { NULL, }; 626bf215546Sopenharmony_ci for (unsigned i = 0; i < num_inputs; i++) { 627bf215546Sopenharmony_ci vtn_assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); 628bf215546Sopenharmony_ci src[i] = vtn_src[i]->def; 629bf215546Sopenharmony_ci } 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci switch (opcode) { 632bf215546Sopenharmony_ci case SpvOpAny: 633bf215546Sopenharmony_ci dest->def = nir_bany(&b->nb, src[0]); 634bf215546Sopenharmony_ci break; 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci case SpvOpAll: 637bf215546Sopenharmony_ci dest->def = nir_ball(&b->nb, src[0]); 638bf215546Sopenharmony_ci break; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci case SpvOpOuterProduct: { 641bf215546Sopenharmony_ci for (unsigned i = 0; i < src[1]->num_components; i++) { 642bf215546Sopenharmony_ci dest->elems[i]->def = 643bf215546Sopenharmony_ci nir_fmul(&b->nb, src[0], nir_channel(&b->nb, src[1], i)); 644bf215546Sopenharmony_ci } 645bf215546Sopenharmony_ci break; 646bf215546Sopenharmony_ci } 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci case SpvOpDot: 649bf215546Sopenharmony_ci dest->def = nir_fdot(&b->nb, src[0], src[1]); 650bf215546Sopenharmony_ci break; 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci case SpvOpIAddCarry: 653bf215546Sopenharmony_ci vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); 654bf215546Sopenharmony_ci dest->elems[0]->def = nir_iadd(&b->nb, src[0], src[1]); 655bf215546Sopenharmony_ci dest->elems[1]->def = nir_uadd_carry(&b->nb, src[0], src[1]); 656bf215546Sopenharmony_ci break; 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci case SpvOpISubBorrow: 659bf215546Sopenharmony_ci vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); 660bf215546Sopenharmony_ci dest->elems[0]->def = nir_isub(&b->nb, src[0], src[1]); 661bf215546Sopenharmony_ci dest->elems[1]->def = nir_usub_borrow(&b->nb, src[0], src[1]); 662bf215546Sopenharmony_ci break; 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci case SpvOpUMulExtended: { 665bf215546Sopenharmony_ci vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); 666bf215546Sopenharmony_ci if (src[0]->bit_size == 32) { 667bf215546Sopenharmony_ci nir_ssa_def *umul = nir_umul_2x32_64(&b->nb, src[0], src[1]); 668bf215546Sopenharmony_ci dest->elems[0]->def = nir_unpack_64_2x32_split_x(&b->nb, umul); 669bf215546Sopenharmony_ci dest->elems[1]->def = nir_unpack_64_2x32_split_y(&b->nb, umul); 670bf215546Sopenharmony_ci } else { 671bf215546Sopenharmony_ci dest->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); 672bf215546Sopenharmony_ci dest->elems[1]->def = nir_umul_high(&b->nb, src[0], src[1]); 673bf215546Sopenharmony_ci } 674bf215546Sopenharmony_ci break; 675bf215546Sopenharmony_ci } 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci case SpvOpSMulExtended: { 678bf215546Sopenharmony_ci vtn_assert(glsl_type_is_struct_or_ifc(dest_type)); 679bf215546Sopenharmony_ci if (src[0]->bit_size == 32) { 680bf215546Sopenharmony_ci nir_ssa_def *umul = nir_imul_2x32_64(&b->nb, src[0], src[1]); 681bf215546Sopenharmony_ci dest->elems[0]->def = nir_unpack_64_2x32_split_x(&b->nb, umul); 682bf215546Sopenharmony_ci dest->elems[1]->def = nir_unpack_64_2x32_split_y(&b->nb, umul); 683bf215546Sopenharmony_ci } else { 684bf215546Sopenharmony_ci dest->elems[0]->def = nir_imul(&b->nb, src[0], src[1]); 685bf215546Sopenharmony_ci dest->elems[1]->def = nir_imul_high(&b->nb, src[0], src[1]); 686bf215546Sopenharmony_ci } 687bf215546Sopenharmony_ci break; 688bf215546Sopenharmony_ci } 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci case SpvOpFwidth: 691bf215546Sopenharmony_ci dest->def = nir_fadd(&b->nb, 692bf215546Sopenharmony_ci nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), 693bf215546Sopenharmony_ci nir_fabs(&b->nb, nir_fddy(&b->nb, src[0]))); 694bf215546Sopenharmony_ci break; 695bf215546Sopenharmony_ci case SpvOpFwidthFine: 696bf215546Sopenharmony_ci dest->def = nir_fadd(&b->nb, 697bf215546Sopenharmony_ci nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), 698bf215546Sopenharmony_ci nir_fabs(&b->nb, nir_fddy_fine(&b->nb, src[0]))); 699bf215546Sopenharmony_ci break; 700bf215546Sopenharmony_ci case SpvOpFwidthCoarse: 701bf215546Sopenharmony_ci dest->def = nir_fadd(&b->nb, 702bf215546Sopenharmony_ci nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), 703bf215546Sopenharmony_ci nir_fabs(&b->nb, nir_fddy_coarse(&b->nb, src[0]))); 704bf215546Sopenharmony_ci break; 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci case SpvOpVectorTimesScalar: 707bf215546Sopenharmony_ci /* The builder will take care of splatting for us. */ 708bf215546Sopenharmony_ci dest->def = nir_fmul(&b->nb, src[0], src[1]); 709bf215546Sopenharmony_ci break; 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci case SpvOpIsNan: { 712bf215546Sopenharmony_ci const bool save_exact = b->nb.exact; 713bf215546Sopenharmony_ci 714bf215546Sopenharmony_ci b->nb.exact = true; 715bf215546Sopenharmony_ci dest->def = nir_fneu(&b->nb, src[0], src[0]); 716bf215546Sopenharmony_ci b->nb.exact = save_exact; 717bf215546Sopenharmony_ci break; 718bf215546Sopenharmony_ci } 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci case SpvOpOrdered: { 721bf215546Sopenharmony_ci const bool save_exact = b->nb.exact; 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci b->nb.exact = true; 724bf215546Sopenharmony_ci dest->def = nir_iand(&b->nb, nir_feq(&b->nb, src[0], src[0]), 725bf215546Sopenharmony_ci nir_feq(&b->nb, src[1], src[1])); 726bf215546Sopenharmony_ci b->nb.exact = save_exact; 727bf215546Sopenharmony_ci break; 728bf215546Sopenharmony_ci } 729bf215546Sopenharmony_ci 730bf215546Sopenharmony_ci case SpvOpUnordered: { 731bf215546Sopenharmony_ci const bool save_exact = b->nb.exact; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci b->nb.exact = true; 734bf215546Sopenharmony_ci dest->def = nir_ior(&b->nb, nir_fneu(&b->nb, src[0], src[0]), 735bf215546Sopenharmony_ci nir_fneu(&b->nb, src[1], src[1])); 736bf215546Sopenharmony_ci b->nb.exact = save_exact; 737bf215546Sopenharmony_ci break; 738bf215546Sopenharmony_ci } 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci case SpvOpIsInf: { 741bf215546Sopenharmony_ci nir_ssa_def *inf = nir_imm_floatN_t(&b->nb, INFINITY, src[0]->bit_size); 742bf215546Sopenharmony_ci dest->def = nir_ieq(&b->nb, nir_fabs(&b->nb, src[0]), inf); 743bf215546Sopenharmony_ci break; 744bf215546Sopenharmony_ci } 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci case SpvOpFUnordEqual: { 747bf215546Sopenharmony_ci const bool save_exact = b->nb.exact; 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci b->nb.exact = true; 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci /* This could also be implemented as !(a < b || b < a). If one or both 752bf215546Sopenharmony_ci * of the source are numbers, later optimization passes can easily 753bf215546Sopenharmony_ci * eliminate the isnan() checks. This may trim the sequence down to a 754bf215546Sopenharmony_ci * single (a == b) operation. Otherwise, the optimizer can transform 755bf215546Sopenharmony_ci * whatever is left to !(a < b || b < a). Since some applications will 756bf215546Sopenharmony_ci * open-code this sequence, these optimizations are needed anyway. 757bf215546Sopenharmony_ci */ 758bf215546Sopenharmony_ci dest->def = 759bf215546Sopenharmony_ci nir_ior(&b->nb, 760bf215546Sopenharmony_ci nir_feq(&b->nb, src[0], src[1]), 761bf215546Sopenharmony_ci nir_ior(&b->nb, 762bf215546Sopenharmony_ci nir_fneu(&b->nb, src[0], src[0]), 763bf215546Sopenharmony_ci nir_fneu(&b->nb, src[1], src[1]))); 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci b->nb.exact = save_exact; 766bf215546Sopenharmony_ci break; 767bf215546Sopenharmony_ci } 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci case SpvOpFUnordLessThan: 770bf215546Sopenharmony_ci case SpvOpFUnordGreaterThan: 771bf215546Sopenharmony_ci case SpvOpFUnordLessThanEqual: 772bf215546Sopenharmony_ci case SpvOpFUnordGreaterThanEqual: { 773bf215546Sopenharmony_ci bool swap; 774bf215546Sopenharmony_ci bool unused_exact; 775bf215546Sopenharmony_ci unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type); 776bf215546Sopenharmony_ci unsigned dst_bit_size = glsl_get_bit_size(dest_type); 777bf215546Sopenharmony_ci nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, 778bf215546Sopenharmony_ci &unused_exact, 779bf215546Sopenharmony_ci src_bit_size, dst_bit_size); 780bf215546Sopenharmony_ci 781bf215546Sopenharmony_ci if (swap) { 782bf215546Sopenharmony_ci nir_ssa_def *tmp = src[0]; 783bf215546Sopenharmony_ci src[0] = src[1]; 784bf215546Sopenharmony_ci src[1] = tmp; 785bf215546Sopenharmony_ci } 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci const bool save_exact = b->nb.exact; 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci b->nb.exact = true; 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci /* Use the property FUnordLessThan(a, b) ≡ !FOrdGreaterThanEqual(a, b). */ 792bf215546Sopenharmony_ci switch (op) { 793bf215546Sopenharmony_ci case nir_op_fge: op = nir_op_flt; break; 794bf215546Sopenharmony_ci case nir_op_flt: op = nir_op_fge; break; 795bf215546Sopenharmony_ci default: unreachable("Impossible opcode."); 796bf215546Sopenharmony_ci } 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci dest->def = 799bf215546Sopenharmony_ci nir_inot(&b->nb, 800bf215546Sopenharmony_ci nir_build_alu(&b->nb, op, src[0], src[1], NULL, NULL)); 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci b->nb.exact = save_exact; 803bf215546Sopenharmony_ci break; 804bf215546Sopenharmony_ci } 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci case SpvOpLessOrGreater: 807bf215546Sopenharmony_ci case SpvOpFOrdNotEqual: { 808bf215546Sopenharmony_ci /* For all the SpvOpFOrd* comparisons apart from NotEqual, the value 809bf215546Sopenharmony_ci * from the ALU will probably already be false if the operands are not 810bf215546Sopenharmony_ci * ordered so we don’t need to handle it specially. 811bf215546Sopenharmony_ci */ 812bf215546Sopenharmony_ci const bool save_exact = b->nb.exact; 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci b->nb.exact = true; 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci /* This could also be implemented as (a < b || b < a). If one or both 817bf215546Sopenharmony_ci * of the source are numbers, later optimization passes can easily 818bf215546Sopenharmony_ci * eliminate the isnan() checks. This may trim the sequence down to a 819bf215546Sopenharmony_ci * single (a != b) operation. Otherwise, the optimizer can transform 820bf215546Sopenharmony_ci * whatever is left to (a < b || b < a). Since some applications will 821bf215546Sopenharmony_ci * open-code this sequence, these optimizations are needed anyway. 822bf215546Sopenharmony_ci */ 823bf215546Sopenharmony_ci dest->def = 824bf215546Sopenharmony_ci nir_iand(&b->nb, 825bf215546Sopenharmony_ci nir_fneu(&b->nb, src[0], src[1]), 826bf215546Sopenharmony_ci nir_iand(&b->nb, 827bf215546Sopenharmony_ci nir_feq(&b->nb, src[0], src[0]), 828bf215546Sopenharmony_ci nir_feq(&b->nb, src[1], src[1]))); 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci b->nb.exact = save_exact; 831bf215546Sopenharmony_ci break; 832bf215546Sopenharmony_ci } 833bf215546Sopenharmony_ci 834bf215546Sopenharmony_ci case SpvOpUConvert: 835bf215546Sopenharmony_ci case SpvOpConvertFToU: 836bf215546Sopenharmony_ci case SpvOpConvertFToS: 837bf215546Sopenharmony_ci case SpvOpConvertSToF: 838bf215546Sopenharmony_ci case SpvOpConvertUToF: 839bf215546Sopenharmony_ci case SpvOpSConvert: 840bf215546Sopenharmony_ci case SpvOpFConvert: 841bf215546Sopenharmony_ci case SpvOpSatConvertSToU: 842bf215546Sopenharmony_ci case SpvOpSatConvertUToS: { 843bf215546Sopenharmony_ci unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type); 844bf215546Sopenharmony_ci unsigned dst_bit_size = glsl_get_bit_size(dest_type); 845bf215546Sopenharmony_ci nir_alu_type src_type = convert_op_src_type(opcode) | src_bit_size; 846bf215546Sopenharmony_ci nir_alu_type dst_type = convert_op_dst_type(opcode) | dst_bit_size; 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_ci struct conversion_opts opts = { 849bf215546Sopenharmony_ci .rounding_mode = nir_rounding_mode_undef, 850bf215546Sopenharmony_ci .saturate = false, 851bf215546Sopenharmony_ci }; 852bf215546Sopenharmony_ci vtn_foreach_decoration(b, dest_val, handle_conversion_opts, &opts); 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci if (opcode == SpvOpSatConvertSToU || opcode == SpvOpSatConvertUToS) 855bf215546Sopenharmony_ci opts.saturate = true; 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci if (b->shader->info.stage == MESA_SHADER_KERNEL) { 858bf215546Sopenharmony_ci if (opts.rounding_mode == nir_rounding_mode_undef && !opts.saturate) { 859bf215546Sopenharmony_ci nir_op op = nir_type_conversion_op(src_type, dst_type, 860bf215546Sopenharmony_ci nir_rounding_mode_undef); 861bf215546Sopenharmony_ci dest->def = nir_build_alu(&b->nb, op, src[0], NULL, NULL, NULL); 862bf215546Sopenharmony_ci } else { 863bf215546Sopenharmony_ci dest->def = nir_convert_alu_types(&b->nb, dst_bit_size, src[0], 864bf215546Sopenharmony_ci src_type, dst_type, 865bf215546Sopenharmony_ci opts.rounding_mode, opts.saturate); 866bf215546Sopenharmony_ci } 867bf215546Sopenharmony_ci } else { 868bf215546Sopenharmony_ci vtn_fail_if(opts.rounding_mode != nir_rounding_mode_undef && 869bf215546Sopenharmony_ci dst_type != nir_type_float16, 870bf215546Sopenharmony_ci "Rounding modes are only allowed on conversions to " 871bf215546Sopenharmony_ci "16-bit float types"); 872bf215546Sopenharmony_ci nir_op op = nir_type_conversion_op(src_type, dst_type, 873bf215546Sopenharmony_ci opts.rounding_mode); 874bf215546Sopenharmony_ci dest->def = nir_build_alu(&b->nb, op, src[0], NULL, NULL, NULL); 875bf215546Sopenharmony_ci } 876bf215546Sopenharmony_ci break; 877bf215546Sopenharmony_ci } 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci case SpvOpBitFieldInsert: 880bf215546Sopenharmony_ci case SpvOpBitFieldSExtract: 881bf215546Sopenharmony_ci case SpvOpBitFieldUExtract: 882bf215546Sopenharmony_ci case SpvOpShiftLeftLogical: 883bf215546Sopenharmony_ci case SpvOpShiftRightArithmetic: 884bf215546Sopenharmony_ci case SpvOpShiftRightLogical: { 885bf215546Sopenharmony_ci bool swap; 886bf215546Sopenharmony_ci bool exact; 887bf215546Sopenharmony_ci unsigned src0_bit_size = glsl_get_bit_size(vtn_src[0]->type); 888bf215546Sopenharmony_ci unsigned dst_bit_size = glsl_get_bit_size(dest_type); 889bf215546Sopenharmony_ci nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, &exact, 890bf215546Sopenharmony_ci src0_bit_size, dst_bit_size); 891bf215546Sopenharmony_ci 892bf215546Sopenharmony_ci assert(!exact); 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci assert (op == nir_op_ushr || op == nir_op_ishr || op == nir_op_ishl || 895bf215546Sopenharmony_ci op == nir_op_bitfield_insert || op == nir_op_ubitfield_extract || 896bf215546Sopenharmony_ci op == nir_op_ibitfield_extract); 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) { 899bf215546Sopenharmony_ci unsigned src_bit_size = 900bf215546Sopenharmony_ci nir_alu_type_get_type_size(nir_op_infos[op].input_types[i]); 901bf215546Sopenharmony_ci if (src_bit_size == 0) 902bf215546Sopenharmony_ci continue; 903bf215546Sopenharmony_ci if (src_bit_size != src[i]->bit_size) { 904bf215546Sopenharmony_ci assert(src_bit_size == 32); 905bf215546Sopenharmony_ci /* Convert the Shift, Offset and Count operands to 32 bits, which is the bitsize 906bf215546Sopenharmony_ci * supported by the NIR instructions. See discussion here: 907bf215546Sopenharmony_ci * 908bf215546Sopenharmony_ci * https://lists.freedesktop.org/archives/mesa-dev/2018-April/193026.html 909bf215546Sopenharmony_ci */ 910bf215546Sopenharmony_ci src[i] = nir_u2u32(&b->nb, src[i]); 911bf215546Sopenharmony_ci } 912bf215546Sopenharmony_ci } 913bf215546Sopenharmony_ci dest->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); 914bf215546Sopenharmony_ci break; 915bf215546Sopenharmony_ci } 916bf215546Sopenharmony_ci 917bf215546Sopenharmony_ci case SpvOpSignBitSet: 918bf215546Sopenharmony_ci dest->def = nir_i2b(&b->nb, 919bf215546Sopenharmony_ci nir_ushr(&b->nb, src[0], nir_imm_int(&b->nb, src[0]->bit_size - 1))); 920bf215546Sopenharmony_ci break; 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci case SpvOpUCountTrailingZerosINTEL: 923bf215546Sopenharmony_ci dest->def = nir_umin(&b->nb, 924bf215546Sopenharmony_ci nir_find_lsb(&b->nb, src[0]), 925bf215546Sopenharmony_ci nir_imm_int(&b->nb, 32u)); 926bf215546Sopenharmony_ci break; 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci case SpvOpBitCount: { 929bf215546Sopenharmony_ci /* bit_count always returns int32, but the SPIR-V opcode just says the return 930bf215546Sopenharmony_ci * value needs to be big enough to store the number of bits. 931bf215546Sopenharmony_ci */ 932bf215546Sopenharmony_ci dest->def = nir_u2u(&b->nb, nir_bit_count(&b->nb, src[0]), glsl_get_bit_size(dest_type)); 933bf215546Sopenharmony_ci break; 934bf215546Sopenharmony_ci } 935bf215546Sopenharmony_ci 936bf215546Sopenharmony_ci case SpvOpSDotKHR: 937bf215546Sopenharmony_ci case SpvOpUDotKHR: 938bf215546Sopenharmony_ci case SpvOpSUDotKHR: 939bf215546Sopenharmony_ci case SpvOpSDotAccSatKHR: 940bf215546Sopenharmony_ci case SpvOpUDotAccSatKHR: 941bf215546Sopenharmony_ci case SpvOpSUDotAccSatKHR: 942bf215546Sopenharmony_ci unreachable("Should have called vtn_handle_integer_dot instead."); 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci default: { 945bf215546Sopenharmony_ci bool swap; 946bf215546Sopenharmony_ci bool exact; 947bf215546Sopenharmony_ci unsigned src_bit_size = glsl_get_bit_size(vtn_src[0]->type); 948bf215546Sopenharmony_ci unsigned dst_bit_size = glsl_get_bit_size(dest_type); 949bf215546Sopenharmony_ci nir_op op = vtn_nir_alu_op_for_spirv_opcode(b, opcode, &swap, 950bf215546Sopenharmony_ci &exact, 951bf215546Sopenharmony_ci src_bit_size, dst_bit_size); 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci if (swap) { 954bf215546Sopenharmony_ci nir_ssa_def *tmp = src[0]; 955bf215546Sopenharmony_ci src[0] = src[1]; 956bf215546Sopenharmony_ci src[1] = tmp; 957bf215546Sopenharmony_ci } 958bf215546Sopenharmony_ci 959bf215546Sopenharmony_ci switch (op) { 960bf215546Sopenharmony_ci case nir_op_ishl: 961bf215546Sopenharmony_ci case nir_op_ishr: 962bf215546Sopenharmony_ci case nir_op_ushr: 963bf215546Sopenharmony_ci if (src[1]->bit_size != 32) 964bf215546Sopenharmony_ci src[1] = nir_u2u32(&b->nb, src[1]); 965bf215546Sopenharmony_ci break; 966bf215546Sopenharmony_ci default: 967bf215546Sopenharmony_ci break; 968bf215546Sopenharmony_ci } 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_ci const bool save_exact = b->nb.exact; 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci if (exact) 973bf215546Sopenharmony_ci b->nb.exact = true; 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci dest->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]); 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci b->nb.exact = save_exact; 978bf215546Sopenharmony_ci break; 979bf215546Sopenharmony_ci } /* default */ 980bf215546Sopenharmony_ci } 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci switch (opcode) { 983bf215546Sopenharmony_ci case SpvOpIAdd: 984bf215546Sopenharmony_ci case SpvOpIMul: 985bf215546Sopenharmony_ci case SpvOpISub: 986bf215546Sopenharmony_ci case SpvOpShiftLeftLogical: 987bf215546Sopenharmony_ci case SpvOpSNegate: { 988bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(dest->def->parent_instr); 989bf215546Sopenharmony_ci vtn_foreach_decoration(b, dest_val, handle_no_wrap, alu); 990bf215546Sopenharmony_ci break; 991bf215546Sopenharmony_ci } 992bf215546Sopenharmony_ci default: 993bf215546Sopenharmony_ci /* Do nothing. */ 994bf215546Sopenharmony_ci break; 995bf215546Sopenharmony_ci } 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_ci if (mediump_16bit) 998bf215546Sopenharmony_ci vtn_mediump_upconvert_value(b, dest); 999bf215546Sopenharmony_ci vtn_push_ssa_value(b, w[2], dest); 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci b->nb.exact = b->exact; 1002bf215546Sopenharmony_ci} 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_civoid 1005bf215546Sopenharmony_civtn_handle_integer_dot(struct vtn_builder *b, SpvOp opcode, 1006bf215546Sopenharmony_ci const uint32_t *w, unsigned count) 1007bf215546Sopenharmony_ci{ 1008bf215546Sopenharmony_ci struct vtn_value *dest_val = vtn_untyped_value(b, w[2]); 1009bf215546Sopenharmony_ci const struct glsl_type *dest_type = vtn_get_type(b, w[1])->type; 1010bf215546Sopenharmony_ci const unsigned dest_size = glsl_get_bit_size(dest_type); 1011bf215546Sopenharmony_ci 1012bf215546Sopenharmony_ci vtn_handle_no_contraction(b, dest_val); 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci /* Collect the various SSA sources. 1015bf215546Sopenharmony_ci * 1016bf215546Sopenharmony_ci * Due to the optional "Packed Vector Format" field, determine number of 1017bf215546Sopenharmony_ci * inputs from the opcode. This differs from vtn_handle_alu. 1018bf215546Sopenharmony_ci */ 1019bf215546Sopenharmony_ci const unsigned num_inputs = (opcode == SpvOpSDotAccSatKHR || 1020bf215546Sopenharmony_ci opcode == SpvOpUDotAccSatKHR || 1021bf215546Sopenharmony_ci opcode == SpvOpSUDotAccSatKHR) ? 3 : 2; 1022bf215546Sopenharmony_ci 1023bf215546Sopenharmony_ci vtn_assert(count >= num_inputs + 3); 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci struct vtn_ssa_value *vtn_src[3] = { NULL, }; 1026bf215546Sopenharmony_ci nir_ssa_def *src[3] = { NULL, }; 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci for (unsigned i = 0; i < num_inputs; i++) { 1029bf215546Sopenharmony_ci vtn_src[i] = vtn_ssa_value(b, w[i + 3]); 1030bf215546Sopenharmony_ci src[i] = vtn_src[i]->def; 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci vtn_assert(glsl_type_is_vector_or_scalar(vtn_src[i]->type)); 1033bf215546Sopenharmony_ci } 1034bf215546Sopenharmony_ci 1035bf215546Sopenharmony_ci /* For all of the opcodes *except* SpvOpSUDotKHR and SpvOpSUDotAccSatKHR, 1036bf215546Sopenharmony_ci * the SPV_KHR_integer_dot_product spec says: 1037bf215546Sopenharmony_ci * 1038bf215546Sopenharmony_ci * _Vector 1_ and _Vector 2_ must have the same type. 1039bf215546Sopenharmony_ci * 1040bf215546Sopenharmony_ci * The practical requirement is the same bit-size and the same number of 1041bf215546Sopenharmony_ci * components. 1042bf215546Sopenharmony_ci */ 1043bf215546Sopenharmony_ci vtn_fail_if(glsl_get_bit_size(vtn_src[0]->type) != 1044bf215546Sopenharmony_ci glsl_get_bit_size(vtn_src[1]->type) || 1045bf215546Sopenharmony_ci glsl_get_vector_elements(vtn_src[0]->type) != 1046bf215546Sopenharmony_ci glsl_get_vector_elements(vtn_src[1]->type), 1047bf215546Sopenharmony_ci "Vector 1 and vector 2 source of opcode %s must have the same " 1048bf215546Sopenharmony_ci "type", 1049bf215546Sopenharmony_ci spirv_op_to_string(opcode)); 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci if (num_inputs == 3) { 1052bf215546Sopenharmony_ci /* The SPV_KHR_integer_dot_product spec says: 1053bf215546Sopenharmony_ci * 1054bf215546Sopenharmony_ci * The type of Accumulator must be the same as Result Type. 1055bf215546Sopenharmony_ci * 1056bf215546Sopenharmony_ci * The handling of SpvOpSDotAccSatKHR and friends with the packed 4x8 1057bf215546Sopenharmony_ci * types (far below) assumes these types have the same size. 1058bf215546Sopenharmony_ci */ 1059bf215546Sopenharmony_ci vtn_fail_if(dest_type != vtn_src[2]->type, 1060bf215546Sopenharmony_ci "Accumulator type must be the same as Result Type for " 1061bf215546Sopenharmony_ci "opcode %s", 1062bf215546Sopenharmony_ci spirv_op_to_string(opcode)); 1063bf215546Sopenharmony_ci } 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci unsigned packed_bit_size = 8; 1066bf215546Sopenharmony_ci if (glsl_type_is_vector(vtn_src[0]->type)) { 1067bf215546Sopenharmony_ci /* FINISHME: Is this actually as good or better for platforms that don't 1068bf215546Sopenharmony_ci * have the special instructions (i.e., one or both of has_dot_4x8 or 1069bf215546Sopenharmony_ci * has_sudot_4x8 is false)? 1070bf215546Sopenharmony_ci */ 1071bf215546Sopenharmony_ci if (glsl_get_vector_elements(vtn_src[0]->type) == 4 && 1072bf215546Sopenharmony_ci glsl_get_bit_size(vtn_src[0]->type) == 8 && 1073bf215546Sopenharmony_ci glsl_get_bit_size(dest_type) <= 32) { 1074bf215546Sopenharmony_ci src[0] = nir_pack_32_4x8(&b->nb, src[0]); 1075bf215546Sopenharmony_ci src[1] = nir_pack_32_4x8(&b->nb, src[1]); 1076bf215546Sopenharmony_ci } else if (glsl_get_vector_elements(vtn_src[0]->type) == 2 && 1077bf215546Sopenharmony_ci glsl_get_bit_size(vtn_src[0]->type) == 16 && 1078bf215546Sopenharmony_ci glsl_get_bit_size(dest_type) <= 32 && 1079bf215546Sopenharmony_ci opcode != SpvOpSUDotKHR && 1080bf215546Sopenharmony_ci opcode != SpvOpSUDotAccSatKHR) { 1081bf215546Sopenharmony_ci src[0] = nir_pack_32_2x16(&b->nb, src[0]); 1082bf215546Sopenharmony_ci src[1] = nir_pack_32_2x16(&b->nb, src[1]); 1083bf215546Sopenharmony_ci packed_bit_size = 16; 1084bf215546Sopenharmony_ci } 1085bf215546Sopenharmony_ci } else if (glsl_type_is_scalar(vtn_src[0]->type) && 1086bf215546Sopenharmony_ci glsl_type_is_32bit(vtn_src[0]->type)) { 1087bf215546Sopenharmony_ci /* The SPV_KHR_integer_dot_product spec says: 1088bf215546Sopenharmony_ci * 1089bf215546Sopenharmony_ci * When _Vector 1_ and _Vector 2_ are scalar integer types, _Packed 1090bf215546Sopenharmony_ci * Vector Format_ must be specified to select how the integers are to 1091bf215546Sopenharmony_ci * be interpreted as vectors. 1092bf215546Sopenharmony_ci * 1093bf215546Sopenharmony_ci * The "Packed Vector Format" value follows the last input. 1094bf215546Sopenharmony_ci */ 1095bf215546Sopenharmony_ci vtn_assert(count == (num_inputs + 4)); 1096bf215546Sopenharmony_ci const SpvPackedVectorFormat pack_format = w[num_inputs + 3]; 1097bf215546Sopenharmony_ci vtn_fail_if(pack_format != SpvPackedVectorFormatPackedVectorFormat4x8BitKHR, 1098bf215546Sopenharmony_ci "Unsupported vector packing format %d for opcode %s", 1099bf215546Sopenharmony_ci pack_format, spirv_op_to_string(opcode)); 1100bf215546Sopenharmony_ci } else { 1101bf215546Sopenharmony_ci vtn_fail_with_opcode("Invalid source types.", opcode); 1102bf215546Sopenharmony_ci } 1103bf215546Sopenharmony_ci 1104bf215546Sopenharmony_ci nir_ssa_def *dest = NULL; 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_ci if (src[0]->num_components > 1) { 1107bf215546Sopenharmony_ci const nir_op s_conversion_op = 1108bf215546Sopenharmony_ci nir_type_conversion_op(nir_type_int, nir_type_int | dest_size, 1109bf215546Sopenharmony_ci nir_rounding_mode_undef); 1110bf215546Sopenharmony_ci 1111bf215546Sopenharmony_ci const nir_op u_conversion_op = 1112bf215546Sopenharmony_ci nir_type_conversion_op(nir_type_uint, nir_type_uint | dest_size, 1113bf215546Sopenharmony_ci nir_rounding_mode_undef); 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_ci nir_op src0_conversion_op; 1116bf215546Sopenharmony_ci nir_op src1_conversion_op; 1117bf215546Sopenharmony_ci 1118bf215546Sopenharmony_ci switch (opcode) { 1119bf215546Sopenharmony_ci case SpvOpSDotKHR: 1120bf215546Sopenharmony_ci case SpvOpSDotAccSatKHR: 1121bf215546Sopenharmony_ci src0_conversion_op = s_conversion_op; 1122bf215546Sopenharmony_ci src1_conversion_op = s_conversion_op; 1123bf215546Sopenharmony_ci break; 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_ci case SpvOpUDotKHR: 1126bf215546Sopenharmony_ci case SpvOpUDotAccSatKHR: 1127bf215546Sopenharmony_ci src0_conversion_op = u_conversion_op; 1128bf215546Sopenharmony_ci src1_conversion_op = u_conversion_op; 1129bf215546Sopenharmony_ci break; 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci case SpvOpSUDotKHR: 1132bf215546Sopenharmony_ci case SpvOpSUDotAccSatKHR: 1133bf215546Sopenharmony_ci src0_conversion_op = s_conversion_op; 1134bf215546Sopenharmony_ci src1_conversion_op = u_conversion_op; 1135bf215546Sopenharmony_ci break; 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci default: 1138bf215546Sopenharmony_ci unreachable("Invalid opcode."); 1139bf215546Sopenharmony_ci } 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_ci /* The SPV_KHR_integer_dot_product spec says: 1142bf215546Sopenharmony_ci * 1143bf215546Sopenharmony_ci * All components of the input vectors are sign-extended to the bit 1144bf215546Sopenharmony_ci * width of the result's type. The sign-extended input vectors are 1145bf215546Sopenharmony_ci * then multiplied component-wise and all components of the vector 1146bf215546Sopenharmony_ci * resulting from the component-wise multiplication are added 1147bf215546Sopenharmony_ci * together. The resulting value will equal the low-order N bits of 1148bf215546Sopenharmony_ci * the correct result R, where N is the result width and R is 1149bf215546Sopenharmony_ci * computed with enough precision to avoid overflow and underflow. 1150bf215546Sopenharmony_ci */ 1151bf215546Sopenharmony_ci const unsigned vector_components = 1152bf215546Sopenharmony_ci glsl_get_vector_elements(vtn_src[0]->type); 1153bf215546Sopenharmony_ci 1154bf215546Sopenharmony_ci for (unsigned i = 0; i < vector_components; i++) { 1155bf215546Sopenharmony_ci nir_ssa_def *const src0 = 1156bf215546Sopenharmony_ci nir_build_alu(&b->nb, src0_conversion_op, 1157bf215546Sopenharmony_ci nir_channel(&b->nb, src[0], i), NULL, NULL, NULL); 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci nir_ssa_def *const src1 = 1160bf215546Sopenharmony_ci nir_build_alu(&b->nb, src1_conversion_op, 1161bf215546Sopenharmony_ci nir_channel(&b->nb, src[1], i), NULL, NULL, NULL); 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci nir_ssa_def *const mul_result = nir_imul(&b->nb, src0, src1); 1164bf215546Sopenharmony_ci 1165bf215546Sopenharmony_ci dest = (i == 0) ? mul_result : nir_iadd(&b->nb, dest, mul_result); 1166bf215546Sopenharmony_ci } 1167bf215546Sopenharmony_ci 1168bf215546Sopenharmony_ci if (num_inputs == 3) { 1169bf215546Sopenharmony_ci /* For SpvOpSDotAccSatKHR, the SPV_KHR_integer_dot_product spec says: 1170bf215546Sopenharmony_ci * 1171bf215546Sopenharmony_ci * Signed integer dot product of _Vector 1_ and _Vector 2_ and 1172bf215546Sopenharmony_ci * signed saturating addition of the result with _Accumulator_. 1173bf215546Sopenharmony_ci * 1174bf215546Sopenharmony_ci * For SpvOpUDotAccSatKHR, the SPV_KHR_integer_dot_product spec says: 1175bf215546Sopenharmony_ci * 1176bf215546Sopenharmony_ci * Unsigned integer dot product of _Vector 1_ and _Vector 2_ and 1177bf215546Sopenharmony_ci * unsigned saturating addition of the result with _Accumulator_. 1178bf215546Sopenharmony_ci * 1179bf215546Sopenharmony_ci * For SpvOpSUDotAccSatKHR, the SPV_KHR_integer_dot_product spec says: 1180bf215546Sopenharmony_ci * 1181bf215546Sopenharmony_ci * Mixed-signedness integer dot product of _Vector 1_ and _Vector 1182bf215546Sopenharmony_ci * 2_ and signed saturating addition of the result with 1183bf215546Sopenharmony_ci * _Accumulator_. 1184bf215546Sopenharmony_ci */ 1185bf215546Sopenharmony_ci dest = (opcode == SpvOpUDotAccSatKHR) 1186bf215546Sopenharmony_ci ? nir_uadd_sat(&b->nb, dest, src[2]) 1187bf215546Sopenharmony_ci : nir_iadd_sat(&b->nb, dest, src[2]); 1188bf215546Sopenharmony_ci } 1189bf215546Sopenharmony_ci } else { 1190bf215546Sopenharmony_ci assert(src[0]->num_components == 1 && src[1]->num_components == 1); 1191bf215546Sopenharmony_ci assert(src[0]->bit_size == 32 && src[1]->bit_size == 32); 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci nir_ssa_def *const zero = nir_imm_zero(&b->nb, 1, 32); 1194bf215546Sopenharmony_ci bool is_signed = opcode == SpvOpSDotKHR || opcode == SpvOpSUDotKHR || 1195bf215546Sopenharmony_ci opcode == SpvOpSDotAccSatKHR || opcode == SpvOpSUDotAccSatKHR; 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_ci if (packed_bit_size == 16) { 1198bf215546Sopenharmony_ci switch (opcode) { 1199bf215546Sopenharmony_ci case SpvOpSDotKHR: 1200bf215546Sopenharmony_ci dest = nir_sdot_2x16_iadd(&b->nb, src[0], src[1], zero); 1201bf215546Sopenharmony_ci break; 1202bf215546Sopenharmony_ci case SpvOpUDotKHR: 1203bf215546Sopenharmony_ci dest = nir_udot_2x16_uadd(&b->nb, src[0], src[1], zero); 1204bf215546Sopenharmony_ci break; 1205bf215546Sopenharmony_ci case SpvOpSDotAccSatKHR: 1206bf215546Sopenharmony_ci if (dest_size == 32) 1207bf215546Sopenharmony_ci dest = nir_sdot_2x16_iadd_sat(&b->nb, src[0], src[1], src[2]); 1208bf215546Sopenharmony_ci else 1209bf215546Sopenharmony_ci dest = nir_sdot_2x16_iadd(&b->nb, src[0], src[1], zero); 1210bf215546Sopenharmony_ci break; 1211bf215546Sopenharmony_ci case SpvOpUDotAccSatKHR: 1212bf215546Sopenharmony_ci if (dest_size == 32) 1213bf215546Sopenharmony_ci dest = nir_udot_2x16_uadd_sat(&b->nb, src[0], src[1], src[2]); 1214bf215546Sopenharmony_ci else 1215bf215546Sopenharmony_ci dest = nir_udot_2x16_uadd(&b->nb, src[0], src[1], zero); 1216bf215546Sopenharmony_ci break; 1217bf215546Sopenharmony_ci default: 1218bf215546Sopenharmony_ci unreachable("Invalid opcode."); 1219bf215546Sopenharmony_ci } 1220bf215546Sopenharmony_ci } else { 1221bf215546Sopenharmony_ci switch (opcode) { 1222bf215546Sopenharmony_ci case SpvOpSDotKHR: 1223bf215546Sopenharmony_ci dest = nir_sdot_4x8_iadd(&b->nb, src[0], src[1], zero); 1224bf215546Sopenharmony_ci break; 1225bf215546Sopenharmony_ci case SpvOpUDotKHR: 1226bf215546Sopenharmony_ci dest = nir_udot_4x8_uadd(&b->nb, src[0], src[1], zero); 1227bf215546Sopenharmony_ci break; 1228bf215546Sopenharmony_ci case SpvOpSUDotKHR: 1229bf215546Sopenharmony_ci dest = nir_sudot_4x8_iadd(&b->nb, src[0], src[1], zero); 1230bf215546Sopenharmony_ci break; 1231bf215546Sopenharmony_ci case SpvOpSDotAccSatKHR: 1232bf215546Sopenharmony_ci if (dest_size == 32) 1233bf215546Sopenharmony_ci dest = nir_sdot_4x8_iadd_sat(&b->nb, src[0], src[1], src[2]); 1234bf215546Sopenharmony_ci else 1235bf215546Sopenharmony_ci dest = nir_sdot_4x8_iadd(&b->nb, src[0], src[1], zero); 1236bf215546Sopenharmony_ci break; 1237bf215546Sopenharmony_ci case SpvOpUDotAccSatKHR: 1238bf215546Sopenharmony_ci if (dest_size == 32) 1239bf215546Sopenharmony_ci dest = nir_udot_4x8_uadd_sat(&b->nb, src[0], src[1], src[2]); 1240bf215546Sopenharmony_ci else 1241bf215546Sopenharmony_ci dest = nir_udot_4x8_uadd(&b->nb, src[0], src[1], zero); 1242bf215546Sopenharmony_ci break; 1243bf215546Sopenharmony_ci case SpvOpSUDotAccSatKHR: 1244bf215546Sopenharmony_ci if (dest_size == 32) 1245bf215546Sopenharmony_ci dest = nir_sudot_4x8_iadd_sat(&b->nb, src[0], src[1], src[2]); 1246bf215546Sopenharmony_ci else 1247bf215546Sopenharmony_ci dest = nir_sudot_4x8_iadd(&b->nb, src[0], src[1], zero); 1248bf215546Sopenharmony_ci break; 1249bf215546Sopenharmony_ci default: 1250bf215546Sopenharmony_ci unreachable("Invalid opcode."); 1251bf215546Sopenharmony_ci } 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci if (dest_size != 32) { 1255bf215546Sopenharmony_ci /* When the accumulator is 32-bits, a NIR dot-product with saturate 1256bf215546Sopenharmony_ci * is generated above. In all other cases a regular dot-product is 1257bf215546Sopenharmony_ci * generated above, and separate addition with saturate is generated 1258bf215546Sopenharmony_ci * here. 1259bf215546Sopenharmony_ci * 1260bf215546Sopenharmony_ci * The SPV_KHR_integer_dot_product spec says: 1261bf215546Sopenharmony_ci * 1262bf215546Sopenharmony_ci * If any of the multiplications or additions, with the exception 1263bf215546Sopenharmony_ci * of the final accumulation, overflow or underflow, the result of 1264bf215546Sopenharmony_ci * the instruction is undefined. 1265bf215546Sopenharmony_ci * 1266bf215546Sopenharmony_ci * Therefore it is safe to cast the dot-product result down to the 1267bf215546Sopenharmony_ci * size of the accumulator before doing the addition. Since the 1268bf215546Sopenharmony_ci * result of the dot-product cannot overflow 32-bits, this is also 1269bf215546Sopenharmony_ci * safe to cast up. 1270bf215546Sopenharmony_ci */ 1271bf215546Sopenharmony_ci if (num_inputs == 3) { 1272bf215546Sopenharmony_ci dest = is_signed 1273bf215546Sopenharmony_ci ? nir_iadd_sat(&b->nb, nir_i2i(&b->nb, dest, dest_size), src[2]) 1274bf215546Sopenharmony_ci : nir_uadd_sat(&b->nb, nir_u2u(&b->nb, dest, dest_size), src[2]); 1275bf215546Sopenharmony_ci } else { 1276bf215546Sopenharmony_ci dest = is_signed 1277bf215546Sopenharmony_ci ? nir_i2i(&b->nb, dest, dest_size) 1278bf215546Sopenharmony_ci : nir_u2u(&b->nb, dest, dest_size); 1279bf215546Sopenharmony_ci } 1280bf215546Sopenharmony_ci } 1281bf215546Sopenharmony_ci } 1282bf215546Sopenharmony_ci 1283bf215546Sopenharmony_ci vtn_push_nir_ssa(b, w[2], dest); 1284bf215546Sopenharmony_ci 1285bf215546Sopenharmony_ci b->nb.exact = b->exact; 1286bf215546Sopenharmony_ci} 1287bf215546Sopenharmony_ci 1288bf215546Sopenharmony_civoid 1289bf215546Sopenharmony_civtn_handle_bitcast(struct vtn_builder *b, const uint32_t *w, unsigned count) 1290bf215546Sopenharmony_ci{ 1291bf215546Sopenharmony_ci vtn_assert(count == 4); 1292bf215546Sopenharmony_ci /* From the definition of OpBitcast in the SPIR-V 1.2 spec: 1293bf215546Sopenharmony_ci * 1294bf215546Sopenharmony_ci * "If Result Type has the same number of components as Operand, they 1295bf215546Sopenharmony_ci * must also have the same component width, and results are computed per 1296bf215546Sopenharmony_ci * component. 1297bf215546Sopenharmony_ci * 1298bf215546Sopenharmony_ci * If Result Type has a different number of components than Operand, the 1299bf215546Sopenharmony_ci * total number of bits in Result Type must equal the total number of 1300bf215546Sopenharmony_ci * bits in Operand. Let L be the type, either Result Type or Operand’s 1301bf215546Sopenharmony_ci * type, that has the larger number of components. Let S be the other 1302bf215546Sopenharmony_ci * type, with the smaller number of components. The number of components 1303bf215546Sopenharmony_ci * in L must be an integer multiple of the number of components in S. 1304bf215546Sopenharmony_ci * The first component (that is, the only or lowest-numbered component) 1305bf215546Sopenharmony_ci * of S maps to the first components of L, and so on, up to the last 1306bf215546Sopenharmony_ci * component of S mapping to the last components of L. Within this 1307bf215546Sopenharmony_ci * mapping, any single component of S (mapping to multiple components of 1308bf215546Sopenharmony_ci * L) maps its lower-ordered bits to the lower-numbered components of L." 1309bf215546Sopenharmony_ci */ 1310bf215546Sopenharmony_ci 1311bf215546Sopenharmony_ci struct vtn_type *type = vtn_get_type(b, w[1]); 1312bf215546Sopenharmony_ci struct nir_ssa_def *src = vtn_get_nir_ssa(b, w[3]); 1313bf215546Sopenharmony_ci 1314bf215546Sopenharmony_ci vtn_fail_if(src->num_components * src->bit_size != 1315bf215546Sopenharmony_ci glsl_get_vector_elements(type->type) * glsl_get_bit_size(type->type), 1316bf215546Sopenharmony_ci "Source and destination of OpBitcast must have the same " 1317bf215546Sopenharmony_ci "total number of bits"); 1318bf215546Sopenharmony_ci nir_ssa_def *val = 1319bf215546Sopenharmony_ci nir_bitcast_vector(&b->nb, src, glsl_get_bit_size(type->type)); 1320bf215546Sopenharmony_ci vtn_push_nir_ssa(b, w[2], val); 1321bf215546Sopenharmony_ci} 1322