Lines Matching refs:ctx
56 * The caller is responsible for initializing ctx::module and ctx::builder.
58 void ac_llvm_context_init(struct ac_llvm_context *ctx, struct ac_llvm_compiler *compiler,
64 ctx->context = LLVMContextCreate();
66 LLVMContextSetOpaquePointers(ctx->context, false);
69 ctx->gfx_level = gfx_level;
70 ctx->family = family;
71 ctx->has_3d_cube_border_color_mipmap = has_3d_cube_border_color_mipmap;
72 ctx->wave_size = wave_size;
73 ctx->ballot_mask_bits = ballot_mask_bits;
74 ctx->float_mode = float_mode;
75 ctx->module = ac_create_module(compiler->tm, ctx->context);
76 ctx->builder = ac_create_builder(ctx->context, float_mode);
78 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
79 ctx->i1 = LLVMInt1TypeInContext(ctx->context);
80 ctx->i8 = LLVMInt8TypeInContext(ctx->context);
81 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
82 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
83 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
84 ctx->i128 = LLVMIntTypeInContext(ctx->context, 128);
85 ctx->intptr = ctx->i32;
86 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
87 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
88 ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
89 ctx->v2i16 = LLVMVectorType(ctx->i16, 2);
90 ctx->v4i16 = LLVMVectorType(ctx->i16, 4);
91 ctx->v2f16 = LLVMVectorType(ctx->f16, 2);
92 ctx->v4f16 = LLVMVectorType(ctx->f16, 4);
93 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
94 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
95 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
96 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
97 ctx->v3f32 = LLVMVectorType(ctx->f32, 3);
98 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
99 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
100 ctx->iN_wavemask = LLVMIntTypeInContext(ctx->context, ctx->wave_size);
101 ctx->iN_ballotmask = LLVMIntTypeInContext(ctx->context, ballot_mask_bits);
103 ctx->i8_0 = LLVMConstInt(ctx->i8, 0, false);
104 ctx->i8_1 = LLVMConstInt(ctx->i8, 1, false);
105 ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
106 ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
107 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
108 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
109 ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
110 ctx->i64_1 = LLVMConstInt(ctx->i64, 1, false);
111 ctx->i128_0 = LLVMConstInt(ctx->i128, 0, false);
112 ctx->i128_1 = LLVMConstInt(ctx->i128, 1, false);
113 ctx->f16_0 = LLVMConstReal(ctx->f16, 0.0);
114 ctx->f16_1 = LLVMConstReal(ctx->f16, 1.0);
115 ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
116 ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
117 ctx->f64_0 = LLVMConstReal(ctx->f64, 0.0);
118 ctx->f64_1 = LLVMConstReal(ctx->f64, 1.0);
120 ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
121 ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
123 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context, "range", 5);
125 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context, "invariant.load", 14);
127 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
129 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
130 ctx->flow = calloc(1, sizeof(*ctx->flow));
133 void ac_llvm_context_dispose(struct ac_llvm_context *ctx)
135 free(ctx->flow->stack);
136 free(ctx->flow);
137 ctx->flow = NULL;
158 int ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
171 if (type == ctx->f16)
173 if (type == ctx->f32)
175 if (type == ctx->f64)
208 static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
210 if (t == ctx->i1)
211 return ctx->i1;
212 else if (t == ctx->i8)
213 return ctx->i8;
214 else if (t == ctx->f16 || t == ctx->i16)
215 return ctx->i16;
216 else if (t == ctx->f32 || t == ctx->i32)
217 return ctx->i32;
218 else if (t == ctx->f64 || t == ctx->i64)
219 return ctx->i64;
224 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
228 return LLVMVectorType(to_integer_type_scalar(ctx, elem_type), LLVMGetVectorSize(t));
233 return ctx->i64;
236 return ctx->i32;
241 return to_integer_type_scalar(ctx, t);
244 LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
248 return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
250 return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
253 LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v)
258 return ac_to_integer(ctx, v);
261 static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
263 if (t == ctx->i8)
264 return ctx->i8;
265 else if (t == ctx->i16 || t == ctx->f16)
266 return ctx->f16;
267 else if (t == ctx->i32 || t == ctx->f32)
268 return ctx->f32;
269 else if (t == ctx->i64 || t == ctx->f64)
270 return ctx->f64;
275 LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
279 return LLVMVectorType(to_float_type_scalar(ctx, elem_type), LLVMGetVectorSize(t));
281 return to_float_type_scalar(ctx, t);
284 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
287 return LLVMBuildBitCast(ctx->builder, v, ac_to_float_type(ctx, type), "");
290 LLVMValueRef ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
305 LLVMValueRef function = LLVMGetNamedFunction(ctx->module, name);
308 function = LLVMAddFunction(ctx->module, name, function_type);
314 ac_add_func_attributes(ctx->context, function, attrib_mask);
317 call = LLVMBuildCall2(ctx->builder, function_type, function, params, param_count, "");
319 ac_add_func_attributes(ctx->context, call, attrib_mask);
386 LLVMValueRef ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type, unsigned count_incoming,
389 LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
394 void ac_build_s_barrier(struct ac_llvm_context *ctx, gl_shader_stage stage)
399 if (ctx->gfx_level == GFX6 && stage == MESA_SHADER_TESS_CTRL)
402 ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
412 void ac_build_optimization_barrier(struct ac_llvm_context *ctx, LLVMValueRef *pgpr, bool sgpr)
416 LLVMBuilderRef builder = ctx->builder;
423 LLVMTypeRef ftype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
426 } else if (LLVMTypeOf(*pgpr) == ctx->i32) {
429 LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
433 } else if (LLVMTypeOf(*pgpr) == ctx->i16) {
436 LLVMTypeRef ftype = LLVMFunctionType(ctx->i16, &ctx->i16, 1, false);
447 LLVMTypeRef ftype = LLVMFunctionType(ctx->i32, &ctx->i32, 1, false);
450 unsigned bitsize = ac_get_elem_bits(ctx, type);
457 vgpr = LLVMBuildZExt(ctx->builder, vgpr, ctx->i32, "");
464 vgpr = LLVMBuildBitCast(builder, vgpr, LLVMVectorType(ctx->i32, vgpr_size / 4), "");
465 vgpr0 = LLVMBuildExtractElement(builder, vgpr, ctx->i32_0, "");
467 vgpr = LLVMBuildInsertElement(builder, vgpr, vgpr0, ctx->i32_0, "");
477 LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx, nir_scope scope)
482 LLVMValueRef tmp = ac_build_intrinsic(ctx, name, ctx->i64, NULL, 0, 0);
483 return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2i32, "");
486 LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value)
490 if (LLVMTypeOf(value) == ctx->i1)
491 value = LLVMBuildZExt(ctx->builder, value, ctx->i32, "");
493 if (ctx->wave_size == 64)
498 LLVMValueRef args[3] = {value, ctx->i32_0, LLVMConstInt(ctx->i32, LLVMIntNE, 0)};
503 ac_build_optimization_barrier(ctx, &args[0], false);
505 args[0] = ac_to_integer(ctx, args[0]);
508 ctx, name, ctx->iN_wavemask, args, 3,
512 LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx, LLVMValueRef value)
516 if (ctx->wave_size == 64)
523 ctx->i1false,
524 LLVMConstInt(ctx->i32, LLVMIntNE, 0),
528 ctx, name, ctx->iN_wavemask, args, 3,
532 LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value)
534 LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
535 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
536 return LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
539 LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value)
541 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
542 return LLVMBuildICmp(ctx->builder, LLVMIntNE, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0),
546 LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value)
548 LLVMValueRef active_set = ac_build_ballot(ctx, ctx->i32_1);
549 LLVMValueRef vote_set = ac_build_ballot(ctx, value);
551 LLVMValueRef all = LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, active_set, "");
553 LLVMBuildICmp(ctx->builder, LLVMIntEQ, vote_set, LLVMConstInt(ctx->iN_wavemask, 0, 0), "");
554 return LLVMBuildOr(ctx->builder, all, none, "");
557 LLVMValueRef ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
572 LLVMValueRef index = LLVMConstInt(ctx->i32, i - component, false);
573 vec = LLVMBuildInsertElement(ctx->builder, vec, value, index, "");
578 LLVMValueRef ac_build_gather_values_extended(struct ac_llvm_context *ctx, LLVMValueRef *values,
582 LLVMBuilderRef builder = ctx->builder;
596 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
602 LLVMValueRef ac_build_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
605 return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
608 LLVMValueRef ac_build_concat(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
615 elems[i] = ac_llvm_extract_elem(ctx, a, i);
617 elems[a_size + i] = ac_llvm_extract_elem(ctx, b, i);
619 return ac_build_gather_values(ctx, elems, a_size + b_size);
625 LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx, LLVMValueRef value,
640 chan[i] = ac_llvm_extract_elem(ctx, value, i);
654 return ac_build_gather_values(ctx, chan, dst_channels);
659 LLVMValueRef ac_extract_components(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned start,
665 chan[i] = ac_llvm_extract_elem(ctx, value, i + start);
667 return ac_build_gather_values(ctx, chan, channels);
673 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx, LLVMValueRef value,
676 return ac_build_expand(ctx, value, num_channels, 4);
679 LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
691 return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1, AC_FUNC_ATTR_READNONE);
694 LLVMValueRef ac_build_fdiv(struct ac_llvm_context *ctx, LLVMValueRef num, LLVMValueRef den)
700 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && type_size == 8)
701 return LLVMBuildFDiv(ctx->builder, num, den, "");
711 ac_build_intrinsic(ctx, name, LLVMTypeOf(den), &den, 1, AC_FUNC_ATTR_READNONE);
713 return LLVMBuildFMul(ctx->builder, num, rcp, "");
718 LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx, LLVMValueRef num,
722 LLVMBuilderRef builder = ctx->builder;
725 num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
726 LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
727 num = LLVMBuildAdd(builder, num, LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
728 num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
729 num = LLVMBuildTrunc(builder, num, ctx->i32, "");
736 LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx, LLVMValueRef num,
740 LLVMBuilderRef builder = ctx->builder;
744 num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
745 LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
746 num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
747 num = LLVMBuildTrunc(builder, num, ctx->i32, "");
753 LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx, LLVMValueRef num,
756 LLVMBuilderRef builder = ctx->builder;
758 num = LLVMBuildMul(builder, LLVMBuildZExt(builder, num, ctx->i64, ""),
759 LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
760 num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
761 num = LLVMBuildTrunc(builder, num, ctx->i32, "");
775 static void build_cube_intrinsic(struct ac_llvm_context *ctx, LLVMValueRef in[3],
778 LLVMTypeRef f32 = ctx->f32;
780 out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc", f32, in, 3, AC_FUNC_ATTR_READNONE);
781 out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc", f32, in, 3, AC_FUNC_ATTR_READNONE);
782 out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema", f32, in, 3, AC_FUNC_ATTR_READNONE);
783 out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid", f32, in, 3, AC_FUNC_ATTR_READNONE);
795 static void build_cube_select(struct ac_llvm_context *ctx,
800 LLVMBuilderRef builder = ctx->builder;
836 tmp = ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &tmp, 1, AC_FUNC_ATTR_READNONE);
840 void ac_prepare_cube_coords(struct ac_llvm_context *ctx, bool is_deriv, bool is_array, bool is_lod,
844 LLVMBuilderRef builder = ctx->builder;
850 LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);
871 if (ctx->gfx_level <= GFX8) {
873 ge0 = LLVMBuildFCmp(builder, LLVMRealOGE, tmp, ctx->f32_0, "");
874 tmp = LLVMBuildSelect(builder, ge0, tmp, ctx->f32_0, "");
880 build_cube_intrinsic(ctx, coords_arg, &selcoords);
883 ac_build_intrinsic(ctx, "llvm.fabs.f32", ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
884 invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
921 build_cube_select(ctx, &selcoords, &derivs_arg[axis * 3], deriv_st, &deriv_ma);
938 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
943 coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
949 LLVMValueRef ac_build_fs_interp(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
955 if (ctx->gfx_level >= GFX11) {
963 p = ac_build_intrinsic(ctx, "llvm.amdgcn.lds.param.load",
964 ctx->f32, args, 3, AC_FUNC_ATTR_READNONE);
970 p10 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.inreg.p10",
971 ctx->f32, args, 3, AC_FUNC_ATTR_READNONE);
977 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.inreg.p2",
978 ctx->f32, args, 3, AC_FUNC_ATTR_READNONE);
988 p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
989 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
997 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
998 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
1002 LLVMValueRef ac_build_fs_interp_f16(struct ac_llvm_context *ctx, LLVMValueRef llvm_chan,
1008 if (ctx->gfx_level >= GFX11) {
1016 p = ac_build_intrinsic(ctx, "llvm.amdgcn.lds.param.load",
1017 ctx->f32, args, 3, AC_FUNC_ATTR_READNONE);
1022 args[3] = high_16bits ? ctx->i1true : ctx->i1false;
1024 p10 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.inreg.p10.f16",
1025 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
1030 args[3] = high_16bits ? ctx->i1true : ctx->i1false;
1032 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.inreg.p2.f16",
1033 ctx->f16, args, 4, AC_FUNC_ATTR_READNONE);
1041 args[3] = high_16bits ? ctx->i1true : ctx->i1false;
1044 p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16", ctx->f32, args, 5,
1051 args[4] = high_16bits ? ctx->i1true : ctx->i1false;
1054 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16", ctx->f16, args, 6,
1059 LLVMValueRef ac_build_fs_interp_mov(struct ac_llvm_context *ctx, LLVMValueRef parameter,
1065 if (ctx->gfx_level >= GFX11) {
1072 p = ac_build_intrinsic(ctx, "llvm.amdgcn.lds.param.load",
1073 ctx->f32, args, 3, AC_FUNC_ATTR_READNONE);
1074 p = ac_build_quad_swizzle(ctx, p, 0, 0, 0 ,0);
1075 return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.f32", ctx->f32, &p, 1, AC_FUNC_ATTR_READNONE);
1082 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov", ctx->f32, args, 4,
1087 LLVMValueRef ac_build_gep_ptr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
1090 return LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
1093 LLVMValueRef ac_build_gep0(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index)
1096 ctx->i32_0,
1099 return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
1102 LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMValueRef index)
1104 LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
1105 return LLVMBuildPointerCast(ctx->builder, offset_ptr, LLVMTypeOf(ptr), "");
1108 void ac_build_indexed_store(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index,
1111 LLVMBuildStore(ctx->builder, value, ac_build_gep0(ctx, base_ptr, index));
1142 static LLVMValueRef ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
1150 pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, &index, 1, "");
1152 pointer = LLVMBuildGEP(ctx->builder, base_ptr, &index, 1, "");
1155 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
1156 result = LLVMBuildLoad(ctx->builder, pointer, "");
1158 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
1163 LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr, LLVMValueRef index)
1165 return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
1168 LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
1171 return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
1176 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
1179 return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
1183 LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
1186 return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
1189 static unsigned get_load_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy)
1192 (ctx->gfx_level >= GFX10 && ctx->gfx_level < GFX11 && cache_policy & ac_glc ? ac_dlc : 0);
1195 static unsigned get_store_cache_policy(struct ac_llvm_context *ctx, unsigned cache_policy)
1197 if (ctx->gfx_level >= GFX11)
1202 static void ac_build_buffer_store_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
1210 args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
1212 args[idx++] = vindex ? vindex : ctx->i32_0;
1213 args[idx++] = voffset ? voffset : ctx->i32_0;
1214 args[idx++] = soffset ? soffset : ctx->i32_0;
1215 args[idx++] = LLVMConstInt(ctx->i32, get_store_cache_policy(ctx, cache_policy), 0);
1228 ac_build_intrinsic(ctx, name, ctx->voidt, args, idx, AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY);
1231 void ac_build_buffer_store_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef data,
1234 ac_build_buffer_store_common(ctx, rsrc, data, vindex, voffset, NULL, cache_policy, true);
1238 void ac_build_buffer_store_dword(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
1245 if (num_channels == 3 && !ac_has_vec3_support(ctx->gfx_level, false)) {
1249 v[i] = LLVMBuildExtractElement(ctx->builder, vdata, LLVMConstInt(ctx->i32, i, 0), "");
1251 v01 = ac_build_gather_values(ctx, v, 2);
1253 voffset2 = LLVMBuildAdd(ctx->builder, voffset ? voffset : ctx->i32_0,
1254 LLVMConstInt(ctx->i32, 8, 0), "");
1256 ac_build_buffer_store_dword(ctx, rsrc, v01, vindex, voffset, soffset, cache_policy);
1257 ac_build_buffer_store_dword(ctx, rsrc, v[2], vindex, voffset2, soffset, cache_policy);
1261 ac_build_buffer_store_common(ctx, rsrc, ac_to_float(ctx, vdata), vindex, voffset, soffset,
1265 static LLVMValueRef ac_build_buffer_load_common(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
1274 args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
1276 args[idx++] = vindex ? vindex : ctx->i32_0;
1277 args[idx++] = voffset ? voffset : ctx->i32_0;
1278 args[idx++] = soffset ? soffset : ctx->i32_0;
1279 args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
1281 !ac_has_vec3_support(ctx->gfx_level, use_format) && num_channels == 3 ? 4 : num_channels;
1286 assert(!use_format || (channel_type != ctx->f16 && channel_type != ctx->i16) ||
1287 ctx->gfx_level >= GFX8);
1299 return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate));
1302 LLVMValueRef ac_build_buffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc, int num_channels,
1308 (!(cache_policy & ac_glc) || ctx->gfx_level >= GFX8)) {
1313 LLVMValueRef offset = voffset ? voffset : ctx->i32_0;
1315 offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
1319 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, 4, 0), "");
1324 LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0),
1326 result[i] = ac_build_intrinsic(ctx, "llvm.amdgcn.s.buffer.load.f32", ctx->f32, args, 3,
1332 if (num_channels == 3 && !ac_has_vec3_support(ctx->gfx_level, false))
1333 result[num_channels++] = LLVMGetUndef(ctx->f32);
1334 return ac_build_gather_values(ctx, result, num_channels);
1337 return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, soffset, num_channels,
1341 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
1349 cache_policy = get_load_cache_policy(ctx, cache_policy);
1367 LLVMTypeRef param_types[] = {ctx->v2i32, ctx->v4i32};
1368 LLVMTypeRef calltype = LLVMFunctionType(LLVMVectorType(ctx->f32, 5), param_types, 2, false);
1371 LLVMValueRef addr_comp[2] = {vindex ? vindex : ctx->i32_0,
1372 voffset ? voffset : ctx->i32_0};
1374 LLVMValueRef args[] = {ac_build_gather_values(ctx, addr_comp, 2),
1375 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "")};
1376 LLVMValueRef res = LLVMBuildCall2(ctx->builder, calltype, inlineasm, args, 2, "");
1378 return ac_build_concat(ctx, ac_trim_vector(ctx, res, num_channels),
1379 ac_llvm_extract_elem(ctx, res, 4));
1382 return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, ctx->i32_0, num_channels,
1383 d16 ? ctx->f16 : ctx->f32, cache_policy, can_speculate, true,
1387 static LLVMValueRef ac_build_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
1395 args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
1397 args[idx++] = vindex ? vindex : ctx->i32_0;
1398 args[idx++] = voffset ? voffset : ctx->i32_0;
1399 args[idx++] = soffset ? soffset : ctx->i32_0;
1400 args[idx++] = LLVMConstInt(ctx->i32, ac_get_tbuffer_format(ctx->gfx_level, dfmt, nfmt), 0);
1401 args[idx++] = LLVMConstInt(ctx->i32, get_load_cache_policy(ctx, cache_policy), 0);
1403 !ac_has_vec3_support(ctx->gfx_level, true) && num_channels == 3 ? 4 : num_channels;
1407 LLVMTypeRef type = func > 1 ? LLVMVectorType(ctx->i32, func) : ctx->i32;
1412 return ac_build_intrinsic(ctx, name, type, args, idx, ac_get_load_intr_attribs(can_speculate));
1415 LLVMValueRef ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
1421 return ac_build_tbuffer_load(ctx, rsrc, vindex, voffset, soffset, num_channels, dfmt,
1425 LLVMValueRef ac_build_buffer_load_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
1429 return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i16,
1433 LLVMValueRef ac_build_buffer_load_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
1437 return ac_build_buffer_load_common(ctx, rsrc, NULL, voffset, soffset, 1, ctx->i8, cache_policy,
1447 static LLVMValueRef ac_ufN_to_float(struct ac_llvm_context *ctx, LLVMValueRef src,
1450 assert(LLVMTypeOf(src) == ctx->i32);
1455 LLVMBuildAnd(ctx->builder, src, LLVMConstInt(ctx->i32, (1 << mant_bits) - 1, false), "");
1462 shifted = LLVMBuildShl(ctx->builder, src, LLVMConstInt(ctx->i32, normal_shift, false), "");
1464 LLVMBuildAdd(ctx->builder, shifted, LLVMConstInt(ctx->i32, bias_shift << 23, false), "");
1468 naninf = LLVMBuildOr(ctx->builder, normal, LLVMConstInt(ctx->i32, 0xff << 23, false), "");
1475 mantissa, ctx->i1true, /* result can be undef when arg is 0 */
1478 ac_build_intrinsic(ctx, "llvm.ctlz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
1481 tmp = LLVMBuildSub(ctx->builder, ctlz, LLVMConstInt(ctx->i32, 8, false), "");
1482 denormal = LLVMBuildShl(ctx->builder, mantissa, tmp, "");
1485 tmp = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, denormal_exp, false), ctlz, "");
1486 tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(ctx->i32, 23, false), "");
1487 denormal = LLVMBuildAdd(ctx->builder, denormal, tmp, "");
1492 tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
1493 LLVMConstInt(ctx->i32, ((1ULL << exp_bits) - 1) << mant_bits, false), "");
1494 result = LLVMBuildSelect(ctx->builder, tmp, naninf, normal, "");
1496 tmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, src,
1497 LLVMConstInt(ctx->i32, 1ULL << mant_bits, false), "");
1498 result = LLVMBuildSelect(ctx->builder, tmp, result, denormal, "");
1500 tmp = LLVMBuildICmp(ctx->builder, LLVMIntNE, src, ctx->i32_0, "");
1501 result = LLVMBuildSelect(ctx->builder, tmp, result, ctx->i32_0, "");
1503 return ac_to_float(ctx, result);
1524 LLVMValueRef ac_build_opencoded_load_format(struct ac_llvm_context *ctx, unsigned log_size,
1544 if ((ctx->gfx_level == GFX6 || ctx->gfx_level >= GFX10) && !known_aligned) {
1558 LLVMBuildAdd(ctx->builder, soffset, LLVMConstInt(ctx->i32, i << load_log_size, false), "");
1560 load_log_size == 0 ? ctx->i8 : load_log_size == 1 ? ctx->i16 : ctx->i32;
1563 ac_build_buffer_load_common(ctx, rsrc, vindex, voffset, tmp, num_channels, channel_type,
1566 loads[i] = ac_to_integer(ctx, loads[i]);
1571 LLVMTypeRef dst_type = log_recombine == 2 ? ctx->i32 : ctx->i16;
1576 tmp = LLVMBuildZExt(ctx->builder, loads[src], dst_type, "");
1580 tmp = LLVMBuildShl(ctx->builder, tmp, LLVMConstInt(dst_type, 8 * i, false), "");
1581 accum = LLVMBuildOr(ctx->builder, accum, tmp, "");
1596 tmp = LLVMConstInt(ctx->i32, i, false);
1597 loads[i] = LLVMBuildExtractElement(ctx->builder, loaded, tmp, "");
1606 LLVMTypeRef dst_type = LLVMIntTypeInContext(ctx->context, dst_bits);
1611 tmp = LLVMBuildLShr(ctx->builder, loaded, tmp, "");
1612 loads[dst - 1] = LLVMBuildTrunc(ctx->builder, tmp, dst_type, "");
1621 tmp = ac_build_gather_values(ctx, &loads[2 * i], 2);
1622 loads[i] = LLVMBuildBitCast(ctx->builder, tmp, ctx->f64, "");
1627 LLVMValueRef i32_2047 = LLVMConstInt(ctx->i32, 2047, false);
1628 LLVMValueRef r = LLVMBuildAnd(ctx->builder, data, i32_2047, "");
1629 tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 11, false), "");
1630 LLVMValueRef g = LLVMBuildAnd(ctx->builder, tmp, i32_2047, "");
1631 LLVMValueRef b = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 22, false), "");
1633 loads[0] = ac_to_integer(ctx, ac_ufN_to_float(ctx, r, 5, 6));
1634 loads[1] = ac_to_integer(ctx, ac_ufN_to_float(ctx, g, 5, 6));
1635 loads[2] = ac_to_integer(ctx, ac_ufN_to_float(ctx, b, 5, 5));
1643 LLVMTypeRef i10 = LLVMIntTypeInContext(ctx->context, 10);
1644 LLVMTypeRef i2 = LLVMIntTypeInContext(ctx->context, 2);
1645 loads[0] = LLVMBuildTrunc(ctx->builder, data, i10, "");
1646 tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 10, false), "");
1647 loads[1] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
1648 tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 20, false), "");
1649 loads[2] = LLVMBuildTrunc(ctx->builder, tmp, i10, "");
1650 tmp = LLVMBuildLShr(ctx->builder, data, LLVMConstInt(ctx->i32, 30, false), "");
1651 loads[3] = LLVMBuildTrunc(ctx->builder, tmp, i2, "");
1660 tmp = ac_to_float(ctx, loads[chan]);
1662 tmp = LLVMBuildFPTrunc(ctx->builder, tmp, ctx->f32, "");
1664 tmp = LLVMBuildFPExt(ctx->builder, tmp, ctx->f32, "");
1665 loads[chan] = ac_to_integer(ctx, tmp);
1671 loads[chan] = LLVMBuildZExt(ctx->builder, loads[chan], ctx->i32, "");
1676 loads[chan] = LLVMBuildSExt(ctx->builder, loads[chan], ctx->i32, "");
1684 tmp = LLVMBuildUIToFP(ctx->builder, loads[chan], ctx->f32, "");
1686 tmp = LLVMBuildSIToFP(ctx->builder, loads[chan], ctx->f32, "");
1692 scale = LLVMConstReal(ctx->f32, 1.0 / 0x10000);
1695 scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << bits) - 1));
1698 scale = LLVMConstReal(ctx->f32, 1.0 / (((uint64_t)1 << (bits - 1)) - 1));
1701 tmp = LLVMBuildFMul(ctx->builder, tmp, scale, "");
1705 LLVMValueRef neg_one = LLVMConstReal(ctx->f32, -1.0);
1706 LLVMValueRef clamp = LLVMBuildFCmp(ctx->builder, LLVMRealULT, tmp, neg_one, "");
1707 tmp = LLVMBuildSelect(ctx->builder, clamp, neg_one, tmp, "");
1710 loads[chan] = ac_to_integer(ctx, tmp);
1716 loads[num_channels] = num_channels == 3 ? ctx->i32_1 : ctx->i32_0;
1718 loads[num_channels] = ac_to_integer(ctx, num_channels == 3 ? ctx->f32_1 : ctx->f32_0);
1729 return ac_build_gather_values(ctx, loads, 4);
1732 void ac_build_buffer_store_short(struct ac_llvm_context *ctx, LLVMValueRef rsrc,
1736 vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i16, "");
1738 ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false);
1741 void ac_build_buffer_store_byte(struct ac_llvm_context *ctx, LLVMValueRef rsrc, LLVMValueRef vdata,
1744 vdata = LLVMBuildBitCast(ctx->builder, vdata, ctx->i8, "");
1746 ac_build_buffer_store_common(ctx, rsrc, vdata, NULL, voffset, soffset, cache_policy, false);
1756 void ac_set_range_metadata(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned lo,
1766 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1769 LLVMValueRef ac_get_thread_id(struct ac_llvm_context *ctx)
1771 return ac_build_mbcnt(ctx, LLVMConstInt(ctx->iN_wavemask, ~0ull, 0));
1798 LLVMValueRef ac_build_ddxy(struct ac_llvm_context *ctx, uint32_t mask, int idx, LLVMValueRef val)
1806 result_type = ac_to_float_type(ctx, LLVMTypeOf(val));
1808 if (result_type == ctx->f16)
1809 val = LLVMBuildZExt(ctx->builder, val, ctx->i32, "");
1810 else if (result_type == ctx->v2f16)
1811 val = LLVMBuildBitCast(ctx->builder, val, ctx->i32, "");
1818 tl = ac_build_quad_swizzle(ctx, val, tl_lanes[0], tl_lanes[1], tl_lanes[2], tl_lanes[3]);
1820 ac_build_quad_swizzle(ctx, val, trbl_lanes[0], trbl_lanes[1], trbl_lanes[2], trbl_lanes[3]);
1822 if (result_type == ctx->f16) {
1823 tl = LLVMBuildTrunc(ctx->builder, tl, ctx->i16, "");
1824 trbl = LLVMBuildTrunc(ctx->builder, trbl, ctx->i16, "");
1827 tl = LLVMBuildBitCast(ctx->builder, tl, result_type, "");
1828 trbl = LLVMBuildBitCast(ctx->builder, trbl, result_type, "");
1829 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1834 return ac_build_intrinsic(ctx, name, result_type, &result, 1, 0);
1837 void ac_build_sendmsg(struct ac_llvm_context *ctx, uint32_t msg, LLVMValueRef wave_id)
1840 args[0] = LLVMConstInt(ctx->i32, msg, false);
1842 ac_build_intrinsic(ctx, "llvm.amdgcn.s.sendmsg", ctx->voidt, args, 2, 0);
1845 LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type)
1848 ac_build_intrinsic(ctx, "llvm.amdgcn.sffbh.i32", dst_type, &arg, 1, AC_FUNC_ATTR_READNONE);
1852 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false), msb, "");
1854 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
1856 LLVMBuildOr(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, ctx->i32_0, ""),
1857 LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, all_ones, ""), "");
1859 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
1862 LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx, LLVMValueRef arg, LLVMTypeRef dst_type)
1870 bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
1874 type = ctx->i64;
1875 highest_bit = LLVMConstInt(ctx->i64, 63, false);
1876 zero = ctx->i64_0;
1880 type = ctx->i32;
1881 highest_bit = LLVMConstInt(ctx->i32, 31, false);
1882 zero = ctx->i32_0;
1886 type = ctx->i16;
1887 highest_bit = LLVMConstInt(ctx->i16, 15, false);
1888 zero = ctx->i16_0;
1892 type = ctx->i8;
1893 highest_bit = LLVMConstInt(ctx->i8, 7, false);
1894 zero = ctx->i8_0;
1903 ctx->i1true,
1906 LLVMValueRef msb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE);
1910 msb = LLVMBuildSub(ctx->builder, highest_bit, msb, "");
1913 msb = LLVMBuildTrunc(ctx->builder, msb, ctx->i32, "");
1915 msb = LLVMBuildSExt(ctx->builder, msb, ctx->i32, "");
1919 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg, zero, ""),
1920 LLVMConstInt(ctx->i32, -1, true), msb, "");
1923 LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
1930 return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE);
1933 LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
1940 return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2, AC_FUNC_ATTR_READNONE);
1943 LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
1945 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSLE, a, b, "");
1946 return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
1949 LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
1951 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, a, b, "");
1952 return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
1955 LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
1957 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntULE, a, b, "");
1958 return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
1961 LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b)
1963 LLVMValueRef cmp = LLVMBuildICmp(ctx->builder, LLVMIntUGE, a, b, "");
1964 return LLVMBuildSelect(ctx->builder, cmp, a, b, "");
1967 LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
1970 return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
1974 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
1978 args[0] = LLVMConstInt(ctx->i32, a->target, 0);
1979 args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
1982 assert(ctx->gfx_level < GFX11);
1984 args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], ctx->v2i16, "");
1985 args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], ctx->v2i16, "");
1986 args[4] = LLVMConstInt(ctx->i1, a->done, 0);
1987 args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
1989 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16", ctx->voidt, args, 6, 0);
1991 args[2] = LLVMBuildBitCast(ctx->builder, a->out[0], ctx->f32, "");
1992 args[3] = LLVMBuildBitCast(ctx->builder, a->out[1], ctx->f32, "");
1993 args[4] = LLVMBuildBitCast(ctx->builder, a->out[2], ctx->f32, "");
1994 args[5] = LLVMBuildBitCast(ctx->builder, a->out[3], ctx->f32, "");
1995 args[6] = LLVMConstInt(ctx->i1, a->done, 0);
1996 args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
1998 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32", ctx->voidt, args, 8, 0);
2002 void ac_build_export_null(struct ac_llvm_context *ctx, bool uses_discard)
2009 if (ctx->gfx_level >= GFX10 && !uses_discard)
2016 args.target = ctx->gfx_level >= GFX11 ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_NULL;
2018 args.out[0] = LLVMGetUndef(ctx->f32); /* R */
2019 args.out[1] = LLVMGetUndef(ctx->f32); /* G */
2020 args.out[2] = LLVMGetUndef(ctx->f32); /* B */
2021 args.out[3] = LLVMGetUndef(ctx->f32); /* A */
2023 ac_build_export(ctx, &args);
2100 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx, struct ac_image_args *a)
2108 assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 || !a->level_zero);
2120 assert(!a->d16 || (ctx->gfx_level >= GFX8 && a->opcode != ac_image_atomic &&
2123 assert(!a->a16 || ctx->gfx_level >= GFX9);
2124 assert(a->g16 == a->a16 || ctx->gfx_level >= GFX10);
2127 ac_get_elem_bits(ctx, LLVMTypeOf(a->offset)) == 32);
2129 ac_get_elem_bits(ctx, LLVMTypeOf(a->bias)) == 32);
2131 ac_get_elem_bits(ctx, LLVMTypeOf(a->compare)) == 32);
2133 ((!a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 16) &&
2134 (a->g16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->derivs[0])) == 32)));
2136 ((!a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 16) &&
2137 (a->a16 || ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])) == 32)));
2139 ((a->opcode != ac_image_get_resinfo || ac_get_elem_bits(ctx, LLVMTypeOf(a->lod))) &&
2141 ac_get_elem_bits(ctx, LLVMTypeOf(a->lod)) ==
2142 ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])))));
2144 ac_get_elem_bits(ctx, LLVMTypeOf(a->min_lod)) ==
2145 ac_get_elem_bits(ctx, LLVMTypeOf(a->coords[0])));
2166 LLVMTypeRef coord_type = sample ? (a->a16 ? ctx->f16 : ctx->f32) : (a->a16 ? ctx->i16 : ctx->i32);
2178 data_type = a->d16 ? ctx->v4f16 : ctx->v4f32;
2183 ctx->context, (LLVMTypeRef[]){data_type, ctx->i32}, 2, false);
2193 args[num_args++] = LLVMConstInt(ctx->i32, dmask, false);
2196 args[num_args++] = ac_to_integer(ctx, a->offset);
2198 args[num_args++] = ac_to_float(ctx, a->bias);
2202 args[num_args++] = ac_to_float(ctx, a->compare);
2206 args[num_args++] = ac_to_float(ctx, a->derivs[i]);
2211 args[num_args++] = LLVMBuildBitCast(ctx->builder, a->coords[i], coord_type, "");
2213 args[num_args++] = LLVMBuildBitCast(ctx->builder, a->lod, coord_type, "");
2215 args[num_args++] = LLVMBuildBitCast(ctx->builder, a->min_lod, coord_type, "");
2222 args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, false);
2225 args[num_args++] = a->tfe ? ctx->i32_1 : ctx->i32_0; /* texfailctrl */
2227 ctx->i32, load ? get_load_cache_policy(ctx, a->cache_policy) : a->cache_policy, false);
2313 retty = ctx->voidt;
2317 LLVMValueRef result = ac_build_intrinsic(ctx, intr_name, retty, args, num_args, a->attributes);
2319 LLVMValueRef texel = LLVMBuildExtractValue(ctx->builder, result, 0, "");
2320 LLVMValueRef code = LLVMBuildExtractValue(ctx->builder, result, 1, "");
2321 result = ac_build_concat(ctx, texel, ac_to_float(ctx, code));
2324 if (!sample && !atomic && retty != ctx->voidt)
2325 result = ac_to_integer(ctx, result);
2330 LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx, LLVMValueRef rsrc)
2337 samples = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 3, 0), "");
2338 samples = LLVMBuildLShr(ctx->builder, samples, LLVMConstInt(ctx->i32, 16, 0), "");
2339 samples = LLVMBuildAnd(ctx->builder, samples, LLVMConstInt(ctx->i32, 0xf, 0), "");
2340 samples = LLVMBuildShl(ctx->builder, ctx->i32_1, samples, "");
2344 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
2346 return ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz", ctx->v2f16, args, 2,
2350 LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
2352 LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16", ctx->v2i16, args, 2,
2354 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
2357 LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2])
2359 LLVMValueRef res = ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16", ctx->v2i16, args, 2,
2361 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
2364 LLVMValueRef ac_build_cvt_pknorm_i16_f16(struct ac_llvm_context *ctx,
2367 LLVMTypeRef param_types[] = {ctx->f16, ctx->f16};
2368 LLVMTypeRef calltype = LLVMFunctionType(ctx->i32, param_types, 2, false);
2370 ctx->gfx_level >= GFX11 ?
2374 return LLVMBuildCall2(ctx->builder, calltype, code, args, 2, "");
2377 LLVMValueRef ac_build_cvt_pknorm_u16_f16(struct ac_llvm_context *ctx,
2380 LLVMTypeRef param_types[] = {ctx->f16, ctx->f16};
2381 LLVMTypeRef calltype = LLVMFunctionType(ctx->i32, param_types, 2, false);
2383 ctx->gfx_level >= GFX11 ?
2387 return LLVMBuildCall2(ctx->builder, calltype, code, args, 2, "");
2391 LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
2396 LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 127 : bits == 10 ? 511 : 32767, 0);
2397 LLVMValueRef min_rgb = LLVMConstInt(ctx->i32, bits == 8 ? -128 : bits == 10 ? -512 : -32768, 0);
2398 LLVMValueRef max_alpha = bits != 10 ? max_rgb : ctx->i32_1;
2399 LLVMValueRef min_alpha = bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
2405 args[i] = ac_build_imin(ctx, args[i], alpha ? max_alpha : max_rgb);
2406 args[i] = ac_build_imax(ctx, args[i], alpha ? min_alpha : min_rgb);
2411 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE);
2412 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
2416 LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx, LLVMValueRef args[2], unsigned bits,
2421 LLVMValueRef max_rgb = LLVMConstInt(ctx->i32, bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
2422 LLVMValueRef max_alpha = bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
2428 args[i] = ac_build_umin(ctx, args[i], alpha ? max_alpha : max_rgb);
2433 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16", ctx->v2i16, args, 2, AC_FUNC_ATTR_READNONE);
2434 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
2437 LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
2439 return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1, &i1, 1, AC_FUNC_ATTR_READNONE);
2442 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
2444 ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt, &i1, 1, 0);
2447 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input, LLVMValueRef offset,
2456 return ac_build_intrinsic(ctx, is_signed ? "llvm.amdgcn.sbfe.i32" : "llvm.amdgcn.ubfe.i32",
2457 ctx->i32, args, 3, AC_FUNC_ATTR_READNONE);
2460 LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1,
2463 return LLVMBuildAdd(ctx->builder, LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
2466 LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0, LLVMValueRef s1,
2470 if (ctx->gfx_level >= GFX10) {
2471 return ac_build_intrinsic(ctx, "llvm.fma.f32", ctx->f32, (LLVMValueRef[]){s0, s1, s2}, 3,
2475 return LLVMBuildFAdd(ctx->builder, LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
2478 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags)
2485 unsigned vmcnt = ctx->gfx_level >= GFX9 ? 63 : 15;
2496 if (ctx->gfx_level >= GFX10)
2506 LLVMBuildFence(ctx->builder, LLVMAtomicOrderingRelease, false, "");
2512 if (ctx->gfx_level >= GFX11)
2518 LLVMConstInt(ctx->i32, simm16, false),
2520 ac_build_intrinsic(ctx, "llvm.amdgcn.s.waitcnt", ctx->voidt, args, 1, 0);
2523 LLVMValueRef ac_build_fsat(struct ac_llvm_context *ctx, LLVMValueRef src,
2526 unsigned bitsize = ac_get_elem_bits(ctx, type);
2531 if (bitsize == 64 || (bitsize == 16 && ctx->gfx_level <= GFX8) || type == ctx->v2f16) {
2535 result = ac_build_fmin(ctx, ac_build_fmax(ctx, src, zero), one);
2542 type = ctx->f16;
2546 type = ctx->f32;
2555 result = ac_build_intrinsic(ctx, intr, type, params, 3,
2559 if (ctx->gfx_level < GFX9 && bitsize == 32) {
2561 result = ac_build_canonicalize(ctx, result, bitsize);
2567 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
2574 type = ctx->f16;
2577 type = ctx->f32;
2580 type = ctx->f64;
2586 return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
2589 LLVMValueRef ac_const_uint_vec(struct ac_llvm_context *ctx, LLVMTypeRef type, uint64_t value)
2604 LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0)
2610 val = ac_build_imax(ctx, src0, ac_const_uint_vec(ctx, type, -1));
2611 return ac_build_imin(ctx, val, ac_const_uint_vec(ctx, type, 1));
2614 static LLVMValueRef ac_eliminate_negative_zero(struct ac_llvm_context *ctx, LLVMValueRef val)
2616 ac_enable_signed_zeros(ctx);
2618 val = LLVMBuildFAdd(ctx->builder, val, LLVMConstNull(LLVMTypeOf(val)), "");
2619 ac_disable_signed_zeros(ctx);
2623 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src)
2627 unsigned bitsize = ac_get_elem_bits(ctx, type);
2647 val = ac_to_integer(ctx, ac_eliminate_negative_zero(ctx, src));
2648 val = ac_build_isign(ctx, val);
2649 return LLVMBuildSIToFP(ctx->builder, val, type, "");
2653 pos = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src, ctx->f64_0, "");
2654 neg = LLVMBuildFCmp(ctx->builder, LLVMRealOLT, src, ctx->f64_0, "");
2655 dw[0] = ctx->i32_0;
2657 ctx->builder, pos, LLVMConstInt(ctx->i32, 0x3FF00000, 0),
2658 LLVMBuildSelect(ctx->builder, neg, LLVMConstInt(ctx->i32, 0xBFF00000, 0), ctx->i32_0, ""),
2660 return LLVMBuildBitCast(ctx->builder, ac_build_gather_values(ctx, dw, 2), ctx->f64, "");
2663 LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
2668 bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
2672 result = ac_build_intrinsic(ctx, "llvm.ctpop.i128", ctx->i128, (LLVMValueRef[]){src0}, 1,
2674 result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
2677 result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64, (LLVMValueRef[]){src0}, 1,
2680 result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
2683 result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, (LLVMValueRef[]){src0}, 1,
2687 result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16, (LLVMValueRef[]){src0}, 1,
2690 result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
2693 result = ac_build_intrinsic(ctx, "llvm.ctpop.i8", ctx->i8, (LLVMValueRef[]){src0}, 1,
2696 result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
2706 LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx, LLVMValueRef src0)
2711 bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
2715 result = ac_build_intrinsic(ctx, "llvm.bitreverse.i64", ctx->i64, (LLVMValueRef[]){src0}, 1,
2718 result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
2721 result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, (LLVMValueRef[]){src0}, 1,
2725 result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16, (LLVMValueRef[]){src0}, 1,
2728 result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
2731 result = ac_build_intrinsic(ctx, "llvm.bitreverse.i8", ctx->i8, (LLVMValueRef[]){src0}, 1,
2734 result = LLVMBuildZExt(ctx->builder, result, ctx->i32, "");
2744 void ac_init_exec_full_mask(struct ac_llvm_context *ctx)
2746 LLVMValueRef full_mask = LLVMConstInt(ctx->i64, ~0ull, 0);
2747 ac_build_intrinsic(ctx, "llvm.amdgcn.init.exec", ctx->voidt, &full_mask, 1,
2751 void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
2753 unsigned lds_size = ctx->gfx_level >= GFX7 ? 65536 : 32768;
2754 ctx->lds = LLVMBuildIntToPtr(
2755 ctx->builder, ctx->i32_0,
2756 LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS), "lds");
2759 LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx, LLVMValueRef dw_addr)
2761 return LLVMBuildLoad2(ctx->builder, ctx->i32, ac_build_gep0(ctx, ctx->lds, dw_addr), "");
2764 void ac_lds_store(struct ac_llvm_context *ctx, LLVMValueRef dw_addr, LLVMValueRef value)
2766 value = ac_to_integer(ctx, value);
2767 ac_build_indexed_store(ctx, ctx->lds, dw_addr, value);
2770 LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx, LLVMTypeRef dst_type, LLVMValueRef src0)
2772 unsigned src0_bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
2780 type = ctx->i64;
2781 zero = ctx->i64_0;
2785 type = ctx->i32;
2786 zero = ctx->i32_0;
2790 type = ctx->i16;
2791 zero = ctx->i16_0;
2795 type = ctx->i8;
2796 zero = ctx->i8_0;
2814 ctx->i1true,
2817 LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type, params, 2, AC_FUNC_ATTR_READNONE);
2820 lsb = LLVMBuildTrunc(ctx->builder, lsb, ctx->i32, "");
2822 lsb = LLVMBuildSExt(ctx->builder, lsb, ctx->i32, "");
2827 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0, zero, ""),
2828 LLVMConstInt(ctx->i32, -1, 0), lsb, "");
2841 static struct ac_llvm_flow *get_current_flow(struct ac_llvm_context *ctx)
2843 if (ctx->flow->depth > 0)
2844 return &ctx->flow->stack[ctx->flow->depth - 1];
2848 static struct ac_llvm_flow *get_innermost_loop(struct ac_llvm_context *ctx)
2850 for (unsigned i = ctx->flow->depth; i > 0; --i) {
2851 if (ctx->flow->stack[i - 1].loop_entry_block)
2852 return &ctx->flow->stack[i - 1];
2857 static struct ac_llvm_flow *push_flow(struct ac_llvm_context *ctx)
2861 if (ctx->flow->depth >= ctx->flow->depth_max) {
2862 unsigned new_max = MAX2(ctx->flow->depth << 1, AC_LLVM_INITIAL_CF_DEPTH);
2864 ctx->flow->stack = realloc(ctx->flow->stack, new_max * sizeof(*ctx->flow->stack));
2865 ctx->flow->depth_max = new_max;
2868 flow = &ctx->flow->stack[ctx->flow->depth];
2869 ctx->flow->depth++;
2885 static LLVMBasicBlockRef append_basic_block(struct ac_llvm_context *ctx, const char *name)
2887 assert(ctx->flow->depth >= 1);
2889 if (ctx->flow->depth >= 2) {
2890 struct ac_llvm_flow *flow = &ctx->flow->stack[ctx->flow->depth - 2];
2892 return LLVMInsertBasicBlockInContext(ctx->context, flow->next_block, name);
2895 LLVMValueRef main_fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->builder));
2896 return LLVMAppendBasicBlockInContext(ctx->context, main_fn, name);
2909 void ac_build_bgnloop(struct ac_llvm_context *ctx, int label_id)
2911 struct ac_llvm_flow *flow = push_flow(ctx);
2912 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
2913 flow->next_block = append_basic_block(ctx, "ENDLOOP");
2915 LLVMBuildBr(ctx->builder, flow->loop_entry_block);
2916 LLVMPositionBuilderAtEnd(ctx->builder, flow->loop_entry_block);
2919 void ac_build_break(struct ac_llvm_context *ctx)
2921 struct ac_llvm_flow *flow = get_innermost_loop(ctx);
2922 LLVMBuildBr(ctx->builder, flow->next_block);
2925 void ac_build_continue(struct ac_llvm_context *ctx)
2927 struct ac_llvm_flow *flow = get_innermost_loop(ctx);
2928 LLVMBuildBr(ctx->builder, flow->loop_entry_block);
2931 void ac_build_else(struct ac_llvm_context *ctx, int label_id)
2933 struct ac_llvm_flow *current_branch = get_current_flow(ctx);
2938 endif_block = append_basic_block(ctx, "ENDIF");
2939 emit_default_branch(ctx->builder, endif_block);
2941 LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
2948 static void ac_branch_exited(struct ac_llvm_context *ctx)
2950 if (ctx->flow->depth == 0 && ctx->conditional_demote_seen) {
2957 LLVMValueRef cond = LLVMBuildLoad2(ctx->builder, ctx->i1, ctx->postponed_kill, "");
2958 ac_build_kill_if_false(ctx, ac_build_wqm_vote(ctx, cond));
2959 ctx->conditional_demote_seen = false;
2963 void ac_build_endif(struct ac_llvm_context *ctx, int label_id)
2965 struct ac_llvm_flow *current_branch = get_current_flow(ctx);
2969 emit_default_branch(ctx->builder, current_branch->next_block);
2970 LLVMPositionBuilderAtEnd(ctx->builder, current_branch->next_block);
2973 ctx->flow->depth--;
2974 ac_branch_exited(ctx);
2977 void ac_build_endloop(struct ac_llvm_context *ctx, int label_id)
2979 struct ac_llvm_flow *current_loop = get_current_flow(ctx);
2983 emit_default_branch(ctx->builder, current_loop->loop_entry_block);
2985 LLVMPositionBuilderAtEnd(ctx->builder, current_loop->next_block);
2987 ctx->flow->depth--;
2988 ac_branch_exited(ctx);
2991 void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id)
2993 struct ac_llvm_flow *flow = push_flow(ctx);
2996 if_block = append_basic_block(ctx, "IF");
2997 flow->next_block = append_basic_block(ctx, "ELSE");
2999 LLVMBuildCondBr(ctx->builder, cond, if_block, flow->next_block);
3000 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
3038 LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr, LLVMTypeRef type)
3041 return LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
3044 LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value, unsigned count)
3051 masks[0] = ctx->i32_0;
3052 masks[1] = ctx->i32_1;
3054 masks[i] = LLVMConstInt(ctx->i32, i, false);
3057 return LLVMBuildExtractElement(ctx->builder, value, masks[0], "");
3060 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
3064 LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param, unsigned rshift,
3069 value = LLVMBuildLShr(ctx->builder, value, LLVMConstInt(LLVMTypeOf(param), rshift, false), "");
3073 value = LLVMBuildAnd(ctx->builder, value, LLVMConstInt(LLVMTypeOf(param), mask, false), "");
3076 if (bitwidth <= 32 && LLVMTypeOf(param) == ctx->i64)
3077 value = LLVMBuildTrunc(ctx->builder, value, ctx->i32, "");
3138 static LLVMValueRef _ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src,
3145 ac_build_optimization_barrier(ctx, &src, false);
3147 src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
3149 lane = LLVMBuildZExt(ctx->builder, lane, ctx->i32, "");
3152 ac_build_intrinsic(ctx, lane == NULL ? "llvm.amdgcn.readfirstlane" : "llvm.amdgcn.readlane",
3153 ctx->i32, (LLVMValueRef[]){src, lane}, lane == NULL ? 1 : 2,
3156 return LLVMBuildTrunc(ctx->builder, result, type, "");
3159 static LLVMValueRef ac_build_readlane_common(struct ac_llvm_context *ctx, LLVMValueRef src,
3163 src = ac_to_integer(ctx, src);
3169 LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
3170 LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
3175 src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
3177 ret_comp = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
3180 LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
3183 ret = _ac_build_readlane(ctx, src, lane, with_opt_barrier);
3187 return LLVMBuildIntToPtr(ctx->builder, ret, src_type, "");
3188 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
3197 * @param ctx
3202 LLVMValueRef ac_build_readlane_no_opt_barrier(struct ac_llvm_context *ctx, LLVMValueRef src,
3205 return ac_build_readlane_common(ctx, src, lane, false);
3208 LLVMValueRef ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane)
3210 return ac_build_readlane_common(ctx, src, lane, true);
3213 LLVMValueRef ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value,
3216 return ac_build_intrinsic(ctx, "llvm.amdgcn.writelane", ctx->i32,
3221 LLVMValueRef ac_build_mbcnt_add(struct ac_llvm_context *ctx, LLVMValueRef mask, LLVMValueRef add_src)
3225 if (ctx->wave_size == 32) {
3226 val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
3227 (LLVMValueRef[]){mask, ctx->i32_0}, 2, AC_FUNC_ATTR_READNONE);
3229 LLVMValueRef mask_vec = LLVMBuildBitCast(ctx->builder, mask, ctx->v2i32, "");
3230 LLVMValueRef mask_lo = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_0, "");
3231 LLVMValueRef mask_hi = LLVMBuildExtractElement(ctx->builder, mask_vec, ctx->i32_1, "");
3232 val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.lo", ctx->i32,
3233 (LLVMValueRef[]){mask_lo, ctx->i32_0}, 2, AC_FUNC_ATTR_READNONE);
3234 val = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi", ctx->i32, (LLVMValueRef[]){mask_hi, val},
3241 if (add_src != NULL && add_src != ctx->i32_0) {
3242 return LLVMBuildAdd(ctx->builder, val, add_src, "");
3248 LLVMValueRef ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask)
3250 return ac_build_mbcnt_add(ctx, mask, ctx->i32_0);
3282 static LLVMValueRef _ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
3289 old = LLVMBuildZExt(ctx->builder, old, ctx->i32, "");
3290 src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
3293 ctx, "llvm.amdgcn.update.dpp.i32", ctx->i32,
3294 (LLVMValueRef[]){old, src, LLVMConstInt(ctx->i32, dpp_ctrl, 0),
3295 LLVMConstInt(ctx->i32, row_mask, 0), LLVMConstInt(ctx->i32, bank_mask, 0),
3296 LLVMConstInt(ctx->i1, bound_ctrl, 0)},
3299 return LLVMBuildTrunc(ctx->builder, res, type, "");
3302 static LLVMValueRef ac_build_dpp(struct ac_llvm_context *ctx, LLVMValueRef old, LLVMValueRef src,
3307 src = ac_to_integer(ctx, src);
3308 old = ac_to_integer(ctx, old);
3313 LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
3314 LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
3315 LLVMValueRef old_vector = LLVMBuildBitCast(ctx->builder, old, vec_type, "");
3318 src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
3319 old = LLVMBuildExtractElement(ctx->builder, old_vector, LLVMConstInt(ctx->i32, i, 0), "");
3321 _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
3323 LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
3326 ret = _ac_build_dpp(ctx, old, src, dpp_ctrl, row_mask, bank_mask, bound_ctrl);
3328 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
3331 static LLVMValueRef _ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src,
3337 src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
3342 LLVMConstInt(ctx->i32, sel, false),
3343 LLVMConstInt(ctx->i32, sel >> 32, false),
3344 ctx->i1true, /* fi */
3345 bound_ctrl ? ctx->i1true : ctx->i1false,
3349 ac_build_intrinsic(ctx, exchange_rows ? "llvm.amdgcn.permlanex16" : "llvm.amdgcn.permlane16",
3350 ctx->i32, args, 6, AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
3352 return LLVMBuildTrunc(ctx->builder, result, type, "");
3355 static LLVMValueRef ac_build_permlane16(struct ac_llvm_context *ctx, LLVMValueRef src, uint64_t sel,
3359 src = ac_to_integer(ctx, src);
3364 LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
3365 LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
3368 src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
3369 LLVMValueRef ret_comp = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl);
3371 LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
3374 ret = _ac_build_permlane16(ctx, src, sel, exchange_rows, bound_ctrl);
3376 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
3385 static LLVMValueRef _ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
3391 src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
3393 ret = ac_build_intrinsic(ctx, "llvm.amdgcn.ds.swizzle", ctx->i32,
3394 (LLVMValueRef[]){src, LLVMConstInt(ctx->i32, mask, 0)}, 2,
3397 return LLVMBuildTrunc(ctx->builder, ret, src_type, "");
3400 LLVMValueRef ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask)
3403 src = ac_to_integer(ctx, src);
3408 LLVMTypeRef vec_type = LLVMVectorType(ctx->i32, bits / 32);
3409 LLVMValueRef src_vector = LLVMBuildBitCast(ctx->builder, src, vec_type, "");
3412 src = LLVMBuildExtractElement(ctx->builder, src_vector, LLVMConstInt(ctx->i32, i, 0), "");
3413 LLVMValueRef ret_comp = _ac_build_ds_swizzle(ctx, src, mask);
3415 LLVMBuildInsertElement(ctx->builder, ret, ret_comp, LLVMConstInt(ctx->i32, i, 0), "");
3418 ret = _ac_build_ds_swizzle(ctx, src, mask);
3420 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
3423 static LLVMValueRef ac_build_wwm(struct ac_llvm_context *ctx, LLVMValueRef src)
3426 unsigned bitsize = ac_get_elem_bits(ctx, src_type);
3430 src = ac_to_integer(ctx, src);
3433 src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
3437 ret = ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src}, 1,
3441 ret = LLVMBuildTrunc(ctx->builder, ret, ac_to_integer_type(ctx, src_type), "");
3443 return LLVMBuildBitCast(ctx->builder, ret, src_type, "");
3446 static LLVMValueRef ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
3451 unsigned bitsize = ac_get_elem_bits(ctx, src_type);
3452 src = ac_to_integer(ctx, src);
3453 inactive = ac_to_integer(ctx, inactive);
3456 src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
3457 inactive = LLVMBuildZExt(ctx->builder, inactive, ctx->i32, "");
3463 ac_build_intrinsic(ctx, name, LLVMTypeOf(src), (LLVMValueRef[]){src, inactive}, 2,
3466 ret = LLVMBuildTrunc(ctx->builder, ret, src_type, "");
3471 static LLVMValueRef get_reduction_identity(struct ac_llvm_context *ctx, nir_op op,
3479 return LLVMConstInt(ctx->i1, 0, 0);
3481 return LLVMConstInt(ctx->i1, 1, 0);
3488 return ctx->i8_0;
3490 return ctx->i8_1;
3492 return LLVMConstInt(ctx->i8, INT8_MAX, 0);
3494 return LLVMConstInt(ctx->i8, UINT8_MAX, 0);
3496 return LLVMConstInt(ctx->i8, INT8_MIN, 0);
3498 return ctx->i8_0;
3500 return LLVMConstInt(ctx->i8, -1, 0);
3502 return ctx->i8_0;
3504 return ctx->i8_0;
3511 return ctx->i16_0;
3513 return ctx->f16_0;
3515 return ctx->i16_1;
3517 return ctx->f16_1;
3519 return LLVMConstInt(ctx->i16, INT16_MAX, 0);
3521 return LLVMConstInt(ctx->i16, UINT16_MAX, 0);
3523 return LLVMConstReal(ctx->f16, INFINITY);
3525 return LLVMConstInt(ctx->i16, INT16_MIN, 0);
3527 return ctx->i16_0;
3529 return LLVMConstReal(ctx->f16, -INFINITY);
3531 return LLVMConstInt(ctx->i16, -1, 0);
3533 return ctx->i16_0;
3535 return ctx->i16_0;
3542 return ctx->i32_0;
3544 return ctx->f32_0;
3546 return ctx->i32_1;
3548 return ctx->f32_1;
3550 return LLVMConstInt(ctx->i32, INT32_MAX, 0);
3552 return LLVMConstInt(ctx->i32, UINT32_MAX, 0);
3554 return LLVMConstReal(ctx->f32, INFINITY);
3556 return LLVMConstInt(ctx->i32, INT32_MIN, 0);
3558 return ctx->i32_0;
3560 return LLVMConstReal(ctx->f32, -INFINITY);
3562 return LLVMConstInt(ctx->i32, -1, 0);
3564 return ctx->i32_0;
3566 return ctx->i32_0;
3573 return ctx->i64_0;
3575 return ctx->f64_0;
3577 return ctx->i64_1;
3579 return ctx->f64_1;
3581 return LLVMConstInt(ctx->i64, INT64_MAX, 0);
3583 return LLVMConstInt(ctx->i64, UINT64_MAX, 0);
3585 return LLVMConstReal(ctx->f64, INFINITY);
3587 return LLVMConstInt(ctx->i64, INT64_MIN, 0);
3589 return ctx->i64_0;
3591 return LLVMConstReal(ctx->f64, -INFINITY);
3593 return LLVMConstInt(ctx->i64, -1, 0);
3595 return ctx->i64_0;
3597 return ctx->i64_0;
3604 static LLVMValueRef ac_build_alu_op(struct ac_llvm_context *ctx, LLVMValueRef lhs, LLVMValueRef rhs,
3611 return LLVMBuildAdd(ctx->builder, lhs, rhs, "");
3613 return LLVMBuildFAdd(ctx->builder, lhs, rhs, "");
3615 return LLVMBuildMul(ctx->builder, lhs, rhs, "");
3617 return LLVMBuildFMul(ctx->builder, lhs, rhs, "");
3619 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSLT, lhs, rhs, ""),
3622 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntULT, lhs, rhs, ""),
3626 ctx, _64bit ? "llvm.minnum.f64" : _32bit ? "llvm.minnum.f32" : "llvm.minnum.f16",
3627 _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2,
3630 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntSGT, lhs, rhs, ""),
3633 return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder, LLVMIntUGT, lhs, rhs, ""),
3637 ctx, _64bit ? "llvm.maxnum.f64" : _32bit ? "llvm.maxnum.f32" : "llvm.maxnum.f16",
3638 _64bit ? ctx->f64 : _32bit ? ctx->f32 : ctx->f16, (LLVMValueRef[]){lhs, rhs}, 2,
3641 return LLVMBuildAnd(ctx->builder, lhs, rhs, "");
3643 return LLVMBuildOr(ctx->builder, lhs, rhs, "");
3645 return LLVMBuildXor(ctx->builder, lhs, rhs, "");
3658 static LLVMValueRef ac_wavefront_shift_right_1(struct ac_llvm_context *ctx, LLVMValueRef src,
3661 if (ctx->gfx_level >= GFX10) {
3664 LLVMValueRef tid = ac_get_thread_id(ctx);
3666 tmp1 = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
3668 tmp2 = ac_build_permlane16(ctx, src, (uint64_t)~0, true, false);
3672 LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, false), "");
3674 tmp2 = LLVMBuildSelect(ctx->builder, active,
3675 ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, false)),
3679 ctx->builder, active,
3680 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
3681 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, false), ""),
3682 LLVMConstInt(ctx->i32, 0x10, false), ""),
3684 return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
3687 LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 16, false), "");
3689 return LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
3691 } else if (ctx->gfx_level >= GFX8) {
3692 return ac_build_dpp(ctx, identity, src, dpp_wf_sr1, 0xf, 0xf, false);
3697 LLVMValueRef tid = ac_get_thread_id(ctx);
3698 tmp1 = ac_build_ds_swizzle(ctx, src, (1 << 15) | dpp_quad_perm(0, 0, 1, 2));
3699 tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x18, 0x03, 0x00));
3700 active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
3701 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x7, 0), ""),
3702 LLVMConstInt(ctx->i32, 0x4, 0), "");
3703 tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
3704 tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x10, 0x07, 0x00));
3705 active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
3706 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0xf, 0), ""),
3707 LLVMConstInt(ctx->i32, 0x8, 0), "");
3708 tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
3709 tmp2 = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x00, 0x0f, 0x00));
3710 active = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
3711 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 0x1f, 0), ""),
3712 LLVMConstInt(ctx->i32, 0x10, 0), "");
3713 tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
3714 tmp2 = ac_build_readlane(ctx, src, LLVMConstInt(ctx->i32, 31, 0));
3715 active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 32, 0), "");
3716 tmp1 = LLVMBuildSelect(ctx->builder, active, tmp2, tmp1, "");
3717 active = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tid, LLVMConstInt(ctx->i32, 0, 0), "");
3718 return LLVMBuildSelect(ctx->builder, active, identity, tmp1, "");
3725 static LLVMValueRef ac_build_scan(struct ac_llvm_context *ctx, nir_op op, LLVMValueRef src,
3731 src = ac_wavefront_shift_right_1(ctx, src, identity, maxprefix);
3735 if (ctx->gfx_level <= GFX7) {
3737 LLVMValueRef tid = ac_get_thread_id(ctx);
3739 tmp = ac_build_ds_swizzle(ctx, src, ds_pattern_bitmode(0x1e, 0x00, 0x00));
3740 active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3741 LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""), ctx->i32_0, "");
3742 tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
3743 result = ac_build_alu_op(ctx, result, tmp, op);
3744 tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1c, 0x01, 0x00));
3745 active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3746 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 2, 0), ""),
3747 ctx->i32_0, "");
3748 tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
3749 result = ac_build_alu_op(ctx, result, tmp, op);
3750 tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x18, 0x03, 0x00));
3751 active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3752 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 4, 0), ""),
3753 ctx->i32_0, "");
3754 tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
3755 result = ac_build_alu_op(ctx, result, tmp, op);
3756 tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x10, 0x07, 0x00));
3757 active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3758 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 8, 0), ""),
3759 ctx->i32_0, "");
3760 tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
3761 result = ac_build_alu_op(ctx, result, tmp, op);
3762 tmp = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x00, 0x0f, 0x00));
3763 active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3764 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, 0), ""),
3765 ctx->i32_0, "");
3766 tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
3767 result = ac_build_alu_op(ctx, result, tmp, op);
3768 tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, 0));
3769 active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3770 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 32, 0), ""),
3771 ctx->i32_0, "");
3772 tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
3773 result = ac_build_alu_op(ctx, result, tmp, op);
3779 tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(1), 0xf, 0xf, false);
3780 result = ac_build_alu_op(ctx, result, tmp, op);
3783 tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(2), 0xf, 0xf, false);
3784 result = ac_build_alu_op(ctx, result, tmp, op);
3787 tmp = ac_build_dpp(ctx, identity, src, dpp_row_sr(3), 0xf, 0xf, false);
3788 result = ac_build_alu_op(ctx, result, tmp, op);
3791 tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(4), 0xf, 0xe, false);
3792 result = ac_build_alu_op(ctx, result, tmp, op);
3795 tmp = ac_build_dpp(ctx, identity, result, dpp_row_sr(8), 0xf, 0xc, false);
3796 result = ac_build_alu_op(ctx, result, tmp, op);
3800 if (ctx->gfx_level >= GFX10) {
3801 LLVMValueRef tid = ac_get_thread_id(ctx);
3804 tmp = ac_build_permlane16(ctx, result, ~(uint64_t)0, true, false);
3806 active = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3807 LLVMBuildAnd(ctx->builder, tid, LLVMConstInt(ctx->i32, 16, false), ""),
3808 ctx->i32_0, "");
3810 tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
3812 result = ac_build_alu_op(ctx, result, tmp, op);
3817 tmp = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
3819 active = LLVMBuildICmp(ctx->builder, LLVMIntUGE, tid, LLVMConstInt(ctx->i32, 32, false), "");
3821 tmp = LLVMBuildSelect(ctx->builder, active, tmp, identity, "");
3823 result = ac_build_alu_op(ctx, result, tmp, op);
3827 tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
3828 result = ac_build_alu_op(ctx, result, tmp, op);
3831 tmp = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
3832 result = ac_build_alu_op(ctx, result, tmp, op);
3836 LLVMValueRef ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
3840 if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
3841 LLVMBuilderRef builder = ctx->builder;
3842 src = LLVMBuildZExt(builder, src, ctx->i32, "");
3843 result = ac_build_ballot(ctx, src);
3844 result = ac_build_mbcnt(ctx, result);
3849 ac_build_optimization_barrier(ctx, &src, false);
3851 LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
3852 result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
3854 result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, true);
3856 return ac_build_wwm(ctx, result);
3859 LLVMValueRef ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
3863 if (LLVMTypeOf(src) == ctx->i1 && op == nir_op_iadd) {
3864 LLVMBuilderRef builder = ctx->builder;
3865 src = LLVMBuildZExt(builder, src, ctx->i32, "");
3866 result = ac_build_ballot(ctx, src);
3867 result = ac_build_mbcnt(ctx, result);
3871 ac_build_optimization_barrier(ctx, &src, false);
3873 LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
3874 result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
3876 result = ac_build_scan(ctx, op, result, identity, ctx->wave_size, false);
3878 return ac_build_wwm(ctx, result);
3881 LLVMValueRef ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op,
3886 ac_build_optimization_barrier(ctx, &src, false);
3888 LLVMValueRef identity = get_reduction_identity(ctx, op, ac_get_type_size(LLVMTypeOf(src)));
3889 result = LLVMBuildBitCast(ctx->builder, ac_build_set_inactive(ctx, src, identity),
3891 swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
3892 result = ac_build_alu_op(ctx, result, swap, op);
3894 return ac_build_wwm(ctx, result);
3896 swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
3897 result = ac_build_alu_op(ctx, result, swap, op);
3899 return ac_build_wwm(ctx, result);
3901 if (ctx->gfx_level >= GFX8)
3902 swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
3904 swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
3905 result = ac_build_alu_op(ctx, result, swap, op);
3907 return ac_build_wwm(ctx, result);
3909 if (ctx->gfx_level >= GFX8)
3910 swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
3912 swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
3913 result = ac_build_alu_op(ctx, result, swap, op);
3915 return ac_build_wwm(ctx, result);
3917 if (ctx->gfx_level >= GFX10)
3918 swap = ac_build_permlane16(ctx, result, 0, true, false);
3919 else if (ctx->gfx_level >= GFX8 && cluster_size != 32)
3920 swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
3922 swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
3923 result = ac_build_alu_op(ctx, result, swap, op);
3925 return ac_build_wwm(ctx, result);
3927 if (ctx->gfx_level >= GFX8) {
3928 if (ctx->wave_size == 64) {
3929 if (ctx->gfx_level >= GFX10)
3930 swap = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 31, false));
3932 swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
3933 result = ac_build_alu_op(ctx, result, swap, op);
3934 result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
3937 return ac_build_wwm(ctx, result);
3939 swap = ac_build_readlane(ctx, result, ctx->i32_0);
3940 result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
3941 result = ac_build_alu_op(ctx, result, swap, op);
3942 return ac_build_wwm(ctx, result);
3953 void ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
3958 const LLVMValueRef last_lane = LLVMConstInt(ctx->i32, ctx->wave_size - 1, false);
3959 LLVMBuilderRef builder = ctx->builder;
3960 LLVMValueRef tid = ac_get_thread_id(ctx);
3964 ac_build_ifcc(ctx, tmp, 1000);
3967 ac_build_endif(ctx, 1000);
3976 void ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
3979 const LLVMValueRef identity = get_reduction_identity(ctx, ws->op, ac_get_type_size(type));
3989 LLVMBuilderRef builder = ctx->builder;
3990 LLVMValueRef tid = ac_get_thread_id(ctx);
4004 ac_build_ifcc(ctx, tmp, 1001);
4009 ac_build_optimization_barrier(ctx, &tmp, false);
4012 phivalues_scan[1] = ac_build_scan(ctx, ws->op, tmp, identity, ws->maxwaves, true);
4014 ac_build_endif(ctx, 1001);
4016 const LLVMValueRef scan = ac_build_phi(ctx, type, 2, phivalues_scan, bbs);
4019 tmp = LLVMBuildSub(builder, ws->numwaves, ctx->i32_1, "");
4020 ws->result_reduce = ac_build_readlane(ctx, scan, tmp);
4023 ws->result_inclusive = ac_build_readlane(ctx, scan, ws->waveidx);
4025 tmp = LLVMBuildSub(builder, ws->waveidx, ctx->i32_1, "");
4026 tmp = ac_build_readlane(ctx, scan, tmp);
4027 tmp2 = LLVMBuildICmp(builder, LLVMIntEQ, ws->waveidx, ctx->i32_0, "");
4041 void ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
4043 ac_build_wg_wavescan_top(ctx, ws);
4044 ac_build_waitcnt(ctx, AC_WAIT_LGKM);
4045 ac_build_s_barrier(ctx, ws->stage);
4046 ac_build_wg_wavescan_bottom(ctx, ws);
4055 void ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
4058 ws->extra = ac_build_exclusive_scan(ctx, ws->src, ws->op);
4059 if (LLVMTypeOf(ws->src) == ctx->i1 && ws->op == nir_op_iadd)
4060 ws->src = LLVMBuildZExt(ctx->builder, ws->src, ctx->i32, "");
4061 ws->src = ac_build_alu_op(ctx, ws->extra, ws->src, ws->op);
4063 ws->src = ac_build_inclusive_scan(ctx, ws->src, ws->op);
4070 ac_build_wg_wavescan_top(ctx, ws);
4081 void ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
4087 ac_build_wg_wavescan_bottom(ctx, ws);
4093 ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
4095 ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
4104 void ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)
4106 ac_build_wg_scan_top(ctx, ws);
4107 ac_build_waitcnt(ctx, AC_WAIT_LGKM);
4108 ac_build_s_barrier(ctx, ws->stage);
4109 ac_build_wg_scan_bottom(ctx, ws);
4112 static void _ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
4121 src0 = LLVMBuildBitCast(ctx->builder, *arg0, ctx->i32, "");
4122 src1 = LLVMBuildBitCast(ctx->builder, *arg1, ctx->i32, "");
4126 params[1] = LLVMConstInt(ctx->i32, 0xde54c1, 0);
4127 src0 = ac_build_intrinsic(ctx, "llvm.amdgcn.mov.dpp8.i32",
4128 ctx->i32, params, 2, AC_FUNC_ATTR_CONVERGENT);
4131 tid = ac_get_thread_id(ctx);
4132 is_even = LLVMBuildICmp(ctx->builder, LLVMIntEQ,
4133 LLVMBuildAnd(ctx->builder, tid, ctx->i32_1, ""),
4134 ctx->i32_0, "");
4136 src0 = LLVMBuildSelect(ctx->builder, is_even, src1, src0, "");
4137 src1 = LLVMBuildSelect(ctx->builder, is_even, tmp0, src1, "");
4141 params[1] = LLVMConstInt(ctx->i32, 0xde54c1, 0);
4142 src0 = ac_build_intrinsic(ctx, "llvm.amdgcn.mov.dpp8.i32",
4143 ctx->i32, params, 2, AC_FUNC_ATTR_CONVERGENT);
4149 void ac_build_dual_src_blend_swizzle(struct ac_llvm_context *ctx,
4153 assert(ctx->gfx_level >= GFX11);
4158 _ac_build_dual_src_blend_swizzle(ctx, &mrt0->out[i], &mrt1->out[i]);
4162 LLVMValueRef ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned lane0,
4166 if (ctx->gfx_level >= GFX8) {
4167 return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
4169 return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
4173 LLVMValueRef ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
4178 index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
4179 src = LLVMBuildZExt(ctx->builder, src, ctx->i32, "");
4182 ac_build_intrinsic(ctx, "llvm.amdgcn.ds.bpermute", ctx->i32, (LLVMValueRef[]){index, src}, 2,
4184 return LLVMBuildTrunc(ctx->builder, result, type, "");
4187 LLVMValueRef ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
4194 type = ctx->i16;
4197 type = ctx->i32;
4200 type = ctx->i32;
4206 return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
4208 LLVMValueRef ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
4215 type = ctx->f16;
4218 type = ctx->f32;
4221 type = ctx->f64;
4227 return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
4230 LLVMValueRef ac_build_canonicalize(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
4237 type = ctx->f16;
4240 type = ctx->f32;
4243 type = ctx->f64;
4249 return ac_build_intrinsic(ctx, intr, type, params, 1, AC_FUNC_ATTR_READNONE);
4257 LLVMValueRef ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij)
4263 a = LLVMBuildExtractElement(ctx->builder, interp_ij, LLVMConstInt(ctx->i32, i, false), "");
4264 result[i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 1, a);
4265 result[2 + i] = ac_build_ddxy(ctx, AC_TID_MASK_TOP_LEFT, 2, a);
4267 return ac_build_gather_values(ctx, result, 4);
4270 LLVMValueRef ac_build_load_helper_invocation(struct ac_llvm_context *ctx)
4275 result = ac_build_intrinsic(ctx, "llvm.amdgcn.live.mask", ctx->i1, NULL, 0,
4278 result = ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0,
4281 return LLVMBuildNot(ctx->builder, result, "");
4284 LLVMValueRef ac_build_is_helper_invocation(struct ac_llvm_context *ctx)
4286 if (!ctx->postponed_kill)
4287 return ac_build_load_helper_invocation(ctx);
4294 ac_build_intrinsic(ctx, "llvm.amdgcn.ps.live", ctx->i1, NULL, 0, AC_FUNC_ATTR_READNONE);
4296 LLVMValueRef postponed = LLVMBuildLoad2(ctx->builder, ctx->i1, ctx->postponed_kill, "");
4297 return LLVMBuildNot(ctx->builder, LLVMBuildAnd(ctx->builder, exact, postponed, ""), "");
4300 LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func, LLVMValueRef *args,
4303 LLVMValueRef ret = LLVMBuildCall(ctx->builder, func, args, num_args, "");
4308 void ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth, LLVMValueRef stencil,
4329 args->out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
4330 args->out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
4331 args->out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
4332 args->out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
4336 args->compr = ctx->gfx_level < GFX11; /* COMPR flag */
4340 stencil = ac_to_integer(ctx, stencil);
4341 stencil = LLVMBuildShl(ctx->builder, stencil, LLVMConstInt(ctx->i32, 16, 0), "");
4342 args->out[0] = ac_to_float(ctx, stencil);
4343 mask |= ctx->gfx_level >= GFX11 ? 0x1 : 0x3;
4348 mask |= ctx->gfx_level >= GFX11 ? 0x2 : 0xc;
4371 if (ctx->gfx_level == GFX6 && ctx->family != CHIP_OLAND && ctx->family != CHIP_HAINAN)
4383 void ac_build_sendmsg_gs_alloc_req(struct ac_llvm_context *ctx, LLVMValueRef wave_id,
4386 LLVMBuilderRef builder = ctx->builder;
4394 if (prim_cnt == ctx->i32_0 && ctx->gfx_level == GFX10) {
4395 assert(vtx_cnt == ctx->i32_0);
4396 prim_cnt = ctx->i32_1;
4397 vtx_cnt = ctx->i32_1;
4402 ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, wave_id, ctx->i32_0, ""), 5020);
4404 tmp = LLVMBuildShl(builder, prim_cnt, LLVMConstInt(ctx->i32, 12, false), "");
4406 ac_build_sendmsg(ctx, AC_SENDMSG_GS_ALLOC_REQ, tmp);
4411 prim.passthrough = ctx->i32_0;
4415 pos.out[0] = pos.out[1] = pos.out[2] = pos.out[3] = LLVMConstReal(ctx->f32, NAN);
4420 ac_build_ifcc(ctx, LLVMBuildICmp(builder, LLVMIntEQ, ac_get_thread_id(ctx), ctx->i32_0, ""),
4422 ac_build_export_prim(ctx, &prim);
4423 ac_build_export(ctx, &pos);
4424 ac_build_endif(ctx, 5021);
4428 ac_build_endif(ctx, 5020);
4432 LLVMValueRef ac_pack_edgeflags_for_export(struct ac_llvm_context *ctx,
4440 LLVMValueRef tmp = LLVMBuildAnd(ctx->builder,
4441 ac_get_arg(ctx, args->gs_invocation_id),
4442 LLVMConstInt(ctx->i32, 0x700, 0), "");
4443 tmp = LLVMBuildMul(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x80402u, 0), "");
4444 return LLVMBuildAnd(ctx->builder, tmp, LLVMConstInt(ctx->i32, 0x20080200, 0), "");
4447 LLVMValueRef ac_pack_prim_export(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim)
4458 LLVMBuilderRef builder = ctx->builder;
4459 LLVMValueRef tmp = LLVMBuildZExt(builder, prim->isnull, ctx->i32, "");
4460 LLVMValueRef result = LLVMBuildShl(builder, tmp, LLVMConstInt(ctx->i32, 31, false), "");
4461 result = LLVMBuildOr(ctx->builder, result, prim->edgeflags, "");
4464 tmp = LLVMBuildShl(builder, prim->index[i], LLVMConstInt(ctx->i32, 10 * i, false), "");
4470 void ac_build_export_prim(struct ac_llvm_context *ctx, const struct ac_ngg_prim *prim)
4477 args.out[0] = ac_pack_prim_export(ctx, prim);
4480 args.out[0] = LLVMBuildBitCast(ctx->builder, args.out[0], ctx->f32, "");
4481 args.out[1] = LLVMGetUndef(ctx->f32);
4482 args.out[2] = LLVMGetUndef(ctx->f32);
4483 args.out[3] = LLVMGetUndef(ctx->f32);
4491 ac_build_export(ctx, &args);
4494 static LLVMTypeRef arg_llvm_type(enum ac_arg_type type, unsigned size, struct ac_llvm_context *ctx)
4497 return size == 1 ? ctx->f32 : LLVMVectorType(ctx->f32, size);
4499 return size == 1 ? ctx->i32 : LLVMVectorType(ctx->i32, size);
4504 ptr_type = ctx->i8;
4507 ptr_type = ctx->f32;
4510 ptr_type = ac_array_in_const32_addr_space(ctx->i8);
4513 ptr_type = ctx->v4i32;
4516 ptr_type = ctx->v8i32;
4530 LLVMValueRef ac_build_main(const struct ac_shader_args *args, struct ac_llvm_context *ctx,
4537 arg_types[i] = arg_llvm_type(args->args[i].type, args->args[i].size, ctx);
4544 LLVMAppendBasicBlockInContext(ctx->context, main_function, "main_body");
4545 LLVMPositionBuilderAtEnd(ctx->builder, main_function_body);
4554 ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_INREG);
4557 ac_add_function_attr(ctx->context, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
4563 ctx->main_function = main_function;
4573 void ac_build_s_endpgm(struct ac_llvm_context *ctx)
4575 LLVMTypeRef calltype = LLVMFunctionType(ctx->voidt, NULL, 0, false);
4577 LLVMBuildCall2(ctx->builder, calltype, code, NULL, 0, "");
4584 void ac_build_triangle_strip_indices_to_triangle(struct ac_llvm_context *ctx, LLVMValueRef is_odd,
4588 LLVMBuilderRef builder = ctx->builder;
4608 LLVMValueRef ac_build_is_inf_or_nan(struct ac_llvm_context *ctx, LLVMValueRef a)
4612 LLVMConstInt(ctx->i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, 0),
4614 return ac_build_intrinsic(ctx, "llvm.amdgcn.class.f32", ctx->i1, args, 2,