1/* 2 * Copyright © 2018 Valve Corporation 3 * Copyright © 2017 Red Hat 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 */ 25 26#include "vtn_private.h" 27#include "GLSL.ext.AMD.h" 28 29bool 30vtn_handle_amd_gcn_shader_instruction(struct vtn_builder *b, SpvOp ext_opcode, 31 const uint32_t *w, unsigned count) 32{ 33 nir_ssa_def *def; 34 switch ((enum GcnShaderAMD)ext_opcode) { 35 case CubeFaceIndexAMD: 36 def = nir_cube_face_index_amd(&b->nb, vtn_get_nir_ssa(b, w[5])); 37 break; 38 case CubeFaceCoordAMD: 39 def = nir_cube_face_coord_amd(&b->nb, vtn_get_nir_ssa(b, w[5])); 40 break; 41 case TimeAMD: { 42 def = nir_pack_64_2x32(&b->nb, nir_shader_clock(&b->nb, NIR_SCOPE_SUBGROUP)); 43 break; 44 } 45 default: 46 unreachable("Invalid opcode"); 47 } 48 49 vtn_push_nir_ssa(b, w[2], def); 50 51 return true; 52} 53 54bool 55vtn_handle_amd_shader_ballot_instruction(struct vtn_builder *b, SpvOp ext_opcode, 56 const uint32_t *w, unsigned count) 57{ 58 unsigned num_args; 59 nir_intrinsic_op op; 60 switch ((enum ShaderBallotAMD)ext_opcode) { 61 case SwizzleInvocationsAMD: 62 num_args = 1; 63 op = nir_intrinsic_quad_swizzle_amd; 64 break; 65 case SwizzleInvocationsMaskedAMD: 66 num_args = 1; 67 op = nir_intrinsic_masked_swizzle_amd; 68 break; 69 case WriteInvocationAMD: 70 num_args = 3; 71 op = nir_intrinsic_write_invocation_amd; 72 break; 73 case MbcntAMD: 74 num_args = 1; 75 op = nir_intrinsic_mbcnt_amd; 76 break; 77 default: 78 unreachable("Invalid opcode"); 79 } 80 81 const struct glsl_type *dest_type = vtn_get_type(b, w[1])->type; 82 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op); 83 nir_ssa_dest_init_for_type(&intrin->instr, &intrin->dest, dest_type, NULL); 84 if (nir_intrinsic_infos[op].src_components[0] == 0) 85 intrin->num_components = intrin->dest.ssa.num_components; 86 87 for (unsigned i = 0; i < num_args; i++) 88 intrin->src[i] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[i + 5])); 89 90 if (intrin->intrinsic == nir_intrinsic_quad_swizzle_amd) { 91 struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant); 92 unsigned mask = val->constant->values[0].u32 | 93 val->constant->values[1].u32 << 2 | 94 val->constant->values[2].u32 << 4 | 95 val->constant->values[3].u32 << 6; 96 nir_intrinsic_set_swizzle_mask(intrin, mask); 97 98 } else if (intrin->intrinsic == nir_intrinsic_masked_swizzle_amd) { 99 struct vtn_value *val = vtn_value(b, w[6], vtn_value_type_constant); 100 unsigned mask = val->constant->values[0].u32 | 101 val->constant->values[1].u32 << 5 | 102 val->constant->values[2].u32 << 10; 103 nir_intrinsic_set_swizzle_mask(intrin, mask); 104 } else if (intrin->intrinsic == nir_intrinsic_mbcnt_amd) { 105 /* The v_mbcnt instruction has an additional source that is added to the result. 106 * This is exposed by the NIR intrinsic but not by SPIR-V, so we add zero here. 107 */ 108 intrin->src[1] = nir_src_for_ssa(nir_imm_int(&b->nb, 0)); 109 } 110 111 nir_builder_instr_insert(&b->nb, &intrin->instr); 112 vtn_push_nir_ssa(b, w[2], &intrin->dest.ssa); 113 114 return true; 115} 116 117bool 118vtn_handle_amd_shader_trinary_minmax_instruction(struct vtn_builder *b, SpvOp ext_opcode, 119 const uint32_t *w, unsigned count) 120{ 121 struct nir_builder *nb = &b->nb; 122 123 unsigned num_inputs = count - 5; 124 assert(num_inputs == 3); 125 nir_ssa_def *src[3] = { NULL, }; 126 for (unsigned i = 0; i < num_inputs; i++) 127 src[i] = vtn_get_nir_ssa(b, w[i + 5]); 128 129 /* place constants at src[1-2] for easier constant-folding */ 130 for (unsigned i = 1; i <= 2; i++) { 131 if (nir_src_as_const_value(nir_src_for_ssa(src[0]))) { 132 nir_ssa_def* tmp = src[i]; 133 src[i] = src[0]; 134 src[0] = tmp; 135 } 136 } 137 nir_ssa_def *def; 138 switch ((enum ShaderTrinaryMinMaxAMD)ext_opcode) { 139 case FMin3AMD: 140 def = nir_fmin(nb, src[0], nir_fmin(nb, src[1], src[2])); 141 break; 142 case UMin3AMD: 143 def = nir_umin(nb, src[0], nir_umin(nb, src[1], src[2])); 144 break; 145 case SMin3AMD: 146 def = nir_imin(nb, src[0], nir_imin(nb, src[1], src[2])); 147 break; 148 case FMax3AMD: 149 def = nir_fmax(nb, src[0], nir_fmax(nb, src[1], src[2])); 150 break; 151 case UMax3AMD: 152 def = nir_umax(nb, src[0], nir_umax(nb, src[1], src[2])); 153 break; 154 case SMax3AMD: 155 def = nir_imax(nb, src[0], nir_imax(nb, src[1], src[2])); 156 break; 157 case FMid3AMD: 158 def = nir_fmin(nb, nir_fmax(nb, src[0], nir_fmin(nb, src[1], src[2])), 159 nir_fmax(nb, src[1], src[2])); 160 break; 161 case UMid3AMD: 162 def = nir_umin(nb, nir_umax(nb, src[0], nir_umin(nb, src[1], src[2])), 163 nir_umax(nb, src[1], src[2])); 164 break; 165 case SMid3AMD: 166 def = nir_imin(nb, nir_imax(nb, src[0], nir_imin(nb, src[1], src[2])), 167 nir_imax(nb, src[1], src[2])); 168 break; 169 default: 170 unreachable("unknown opcode\n"); 171 break; 172 } 173 174 vtn_push_nir_ssa(b, w[2], def); 175 176 return true; 177} 178 179bool 180vtn_handle_amd_shader_explicit_vertex_parameter_instruction(struct vtn_builder *b, SpvOp ext_opcode, 181 const uint32_t *w, unsigned count) 182{ 183 nir_intrinsic_op op; 184 switch ((enum ShaderExplicitVertexParameterAMD)ext_opcode) { 185 case InterpolateAtVertexAMD: 186 op = nir_intrinsic_interp_deref_at_vertex; 187 break; 188 default: 189 unreachable("unknown opcode"); 190 } 191 192 nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->nb.shader, op); 193 194 struct vtn_pointer *ptr = 195 vtn_value(b, w[5], vtn_value_type_pointer)->pointer; 196 nir_deref_instr *deref = vtn_pointer_to_deref(b, ptr); 197 198 /* If the value we are interpolating has an index into a vector then 199 * interpolate the vector and index the result of that instead. This is 200 * necessary because the index will get generated as a series of nir_bcsel 201 * instructions so it would no longer be an input variable. 202 */ 203 const bool vec_array_deref = deref->deref_type == nir_deref_type_array && 204 glsl_type_is_vector(nir_deref_instr_parent(deref)->type); 205 206 nir_deref_instr *vec_deref = NULL; 207 if (vec_array_deref) { 208 vec_deref = deref; 209 deref = nir_deref_instr_parent(deref); 210 } 211 intrin->src[0] = nir_src_for_ssa(&deref->dest.ssa); 212 intrin->src[1] = nir_src_for_ssa(vtn_get_nir_ssa(b, w[6])); 213 214 intrin->num_components = glsl_get_vector_elements(deref->type); 215 nir_ssa_dest_init(&intrin->instr, &intrin->dest, 216 glsl_get_vector_elements(deref->type), 217 glsl_get_bit_size(deref->type), NULL); 218 219 nir_builder_instr_insert(&b->nb, &intrin->instr); 220 221 nir_ssa_def *def; 222 if (vec_array_deref) { 223 assert(vec_deref); 224 def = nir_vector_extract(&b->nb, &intrin->dest.ssa, 225 vec_deref->arr.index.ssa); 226 } else { 227 def = &intrin->dest.ssa; 228 } 229 vtn_push_nir_ssa(b, w[2], def); 230 231 return true; 232} 233