18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright © 2014 Broadcom
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation
78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice (including the next
128c2ecf20Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
138c2ecf20Sopenharmony_ci * Software.
148c2ecf20Sopenharmony_ci *
158c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
168c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
178c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
188c2ecf20Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
198c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
208c2ecf20Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
218c2ecf20Sopenharmony_ci * IN THE SOFTWARE.
228c2ecf20Sopenharmony_ci */
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci/**
258c2ecf20Sopenharmony_ci * DOC: Shader validator for VC4.
268c2ecf20Sopenharmony_ci *
278c2ecf20Sopenharmony_ci * Since the VC4 has no IOMMU between it and system memory, a user
288c2ecf20Sopenharmony_ci * with access to execute shaders could escalate privilege by
298c2ecf20Sopenharmony_ci * overwriting system memory (using the VPM write address register in
308c2ecf20Sopenharmony_ci * the general-purpose DMA mode) or reading system memory it shouldn't
318c2ecf20Sopenharmony_ci * (reading it as a texture, uniform data, or direct-addressed TMU
328c2ecf20Sopenharmony_ci * lookup).
338c2ecf20Sopenharmony_ci *
348c2ecf20Sopenharmony_ci * The shader validator walks over a shader's BO, ensuring that its
358c2ecf20Sopenharmony_ci * accesses are appropriately bounded, and recording where texture
368c2ecf20Sopenharmony_ci * accesses are made so that we can do relocations for them in the
378c2ecf20Sopenharmony_ci * uniform stream.
388c2ecf20Sopenharmony_ci *
398c2ecf20Sopenharmony_ci * Shader BO are immutable for their lifetimes (enforced by not
408c2ecf20Sopenharmony_ci * allowing mmaps, GEM prime export, or rendering to from a CL), so
418c2ecf20Sopenharmony_ci * this validation is only performed at BO creation time.
428c2ecf20Sopenharmony_ci */
438c2ecf20Sopenharmony_ci
448c2ecf20Sopenharmony_ci#include "vc4_drv.h"
458c2ecf20Sopenharmony_ci#include "vc4_qpu_defines.h"
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci#define LIVE_REG_COUNT (32 + 32 + 4)
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_cistruct vc4_shader_validation_state {
508c2ecf20Sopenharmony_ci	/* Current IP being validated. */
518c2ecf20Sopenharmony_ci	uint32_t ip;
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	/* IP at the end of the BO, do not read shader[max_ip] */
548c2ecf20Sopenharmony_ci	uint32_t max_ip;
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci	uint64_t *shader;
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_ci	struct vc4_texture_sample_info tmu_setup[2];
598c2ecf20Sopenharmony_ci	int tmu_write_count[2];
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	/* For registers that were last written to by a MIN instruction with
628c2ecf20Sopenharmony_ci	 * one argument being a uniform, the address of the uniform.
638c2ecf20Sopenharmony_ci	 * Otherwise, ~0.
648c2ecf20Sopenharmony_ci	 *
658c2ecf20Sopenharmony_ci	 * This is used for the validation of direct address memory reads.
668c2ecf20Sopenharmony_ci	 */
678c2ecf20Sopenharmony_ci	uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
688c2ecf20Sopenharmony_ci	bool live_max_clamp_regs[LIVE_REG_COUNT];
698c2ecf20Sopenharmony_ci	uint32_t live_immediates[LIVE_REG_COUNT];
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	/* Bitfield of which IPs are used as branch targets.
728c2ecf20Sopenharmony_ci	 *
738c2ecf20Sopenharmony_ci	 * Used for validation that the uniform stream is updated at the right
748c2ecf20Sopenharmony_ci	 * points and clearing the texturing/clamping state.
758c2ecf20Sopenharmony_ci	 */
768c2ecf20Sopenharmony_ci	unsigned long *branch_targets;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	/* Set when entering a basic block, and cleared when the uniform
798c2ecf20Sopenharmony_ci	 * address update is found.  This is used to make sure that we don't
808c2ecf20Sopenharmony_ci	 * read uniforms when the address is undefined.
818c2ecf20Sopenharmony_ci	 */
828c2ecf20Sopenharmony_ci	bool needs_uniform_address_update;
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	/* Set when we find a backwards branch.  If the branch is backwards,
858c2ecf20Sopenharmony_ci	 * the taraget is probably doing an address reset to read uniforms,
868c2ecf20Sopenharmony_ci	 * and so we need to be sure that a uniforms address is present in the
878c2ecf20Sopenharmony_ci	 * stream, even if the shader didn't need to read uniforms in later
888c2ecf20Sopenharmony_ci	 * basic blocks.
898c2ecf20Sopenharmony_ci	 */
908c2ecf20Sopenharmony_ci	bool needs_uniform_address_for_loop;
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	/* Set when we find an instruction writing the top half of the
938c2ecf20Sopenharmony_ci	 * register files.  If we allowed writing the unusable regs in
948c2ecf20Sopenharmony_ci	 * a threaded shader, then the other shader running on our
958c2ecf20Sopenharmony_ci	 * QPU's clamp validation would be invalid.
968c2ecf20Sopenharmony_ci	 */
978c2ecf20Sopenharmony_ci	bool all_registers_used;
988c2ecf20Sopenharmony_ci};
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_cistatic uint32_t
1018c2ecf20Sopenharmony_ciwaddr_to_live_reg_index(uint32_t waddr, bool is_b)
1028c2ecf20Sopenharmony_ci{
1038c2ecf20Sopenharmony_ci	if (waddr < 32) {
1048c2ecf20Sopenharmony_ci		if (is_b)
1058c2ecf20Sopenharmony_ci			return 32 + waddr;
1068c2ecf20Sopenharmony_ci		else
1078c2ecf20Sopenharmony_ci			return waddr;
1088c2ecf20Sopenharmony_ci	} else if (waddr <= QPU_W_ACC3) {
1098c2ecf20Sopenharmony_ci		return 64 + waddr - QPU_W_ACC0;
1108c2ecf20Sopenharmony_ci	} else {
1118c2ecf20Sopenharmony_ci		return ~0;
1128c2ecf20Sopenharmony_ci	}
1138c2ecf20Sopenharmony_ci}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_cistatic uint32_t
1168c2ecf20Sopenharmony_ciraddr_add_a_to_live_reg_index(uint64_t inst)
1178c2ecf20Sopenharmony_ci{
1188c2ecf20Sopenharmony_ci	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
1198c2ecf20Sopenharmony_ci	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
1208c2ecf20Sopenharmony_ci	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
1218c2ecf20Sopenharmony_ci	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	if (add_a == QPU_MUX_A)
1248c2ecf20Sopenharmony_ci		return raddr_a;
1258c2ecf20Sopenharmony_ci	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
1268c2ecf20Sopenharmony_ci		return 32 + raddr_b;
1278c2ecf20Sopenharmony_ci	else if (add_a <= QPU_MUX_R3)
1288c2ecf20Sopenharmony_ci		return 64 + add_a;
1298c2ecf20Sopenharmony_ci	else
1308c2ecf20Sopenharmony_ci		return ~0;
1318c2ecf20Sopenharmony_ci}
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_cistatic bool
1348c2ecf20Sopenharmony_cilive_reg_is_upper_half(uint32_t lri)
1358c2ecf20Sopenharmony_ci{
1368c2ecf20Sopenharmony_ci	return	(lri >= 16 && lri < 32) ||
1378c2ecf20Sopenharmony_ci		(lri >= 32 + 16 && lri < 32 + 32);
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_cistatic bool
1418c2ecf20Sopenharmony_ciis_tmu_submit(uint32_t waddr)
1428c2ecf20Sopenharmony_ci{
1438c2ecf20Sopenharmony_ci	return (waddr == QPU_W_TMU0_S ||
1448c2ecf20Sopenharmony_ci		waddr == QPU_W_TMU1_S);
1458c2ecf20Sopenharmony_ci}
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_cistatic bool
1488c2ecf20Sopenharmony_ciis_tmu_write(uint32_t waddr)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	return (waddr >= QPU_W_TMU0_S &&
1518c2ecf20Sopenharmony_ci		waddr <= QPU_W_TMU1_B);
1528c2ecf20Sopenharmony_ci}
1538c2ecf20Sopenharmony_ci
1548c2ecf20Sopenharmony_cistatic bool
1558c2ecf20Sopenharmony_cirecord_texture_sample(struct vc4_validated_shader_info *validated_shader,
1568c2ecf20Sopenharmony_ci		      struct vc4_shader_validation_state *validation_state,
1578c2ecf20Sopenharmony_ci		      int tmu)
1588c2ecf20Sopenharmony_ci{
1598c2ecf20Sopenharmony_ci	uint32_t s = validated_shader->num_texture_samples;
1608c2ecf20Sopenharmony_ci	int i;
1618c2ecf20Sopenharmony_ci	struct vc4_texture_sample_info *temp_samples;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	temp_samples = krealloc(validated_shader->texture_samples,
1648c2ecf20Sopenharmony_ci				(s + 1) * sizeof(*temp_samples),
1658c2ecf20Sopenharmony_ci				GFP_KERNEL);
1668c2ecf20Sopenharmony_ci	if (!temp_samples)
1678c2ecf20Sopenharmony_ci		return false;
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci	memcpy(&temp_samples[s],
1708c2ecf20Sopenharmony_ci	       &validation_state->tmu_setup[tmu],
1718c2ecf20Sopenharmony_ci	       sizeof(*temp_samples));
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci	validated_shader->num_texture_samples = s + 1;
1748c2ecf20Sopenharmony_ci	validated_shader->texture_samples = temp_samples;
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	for (i = 0; i < 4; i++)
1778c2ecf20Sopenharmony_ci		validation_state->tmu_setup[tmu].p_offset[i] = ~0;
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	return true;
1808c2ecf20Sopenharmony_ci}
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_cistatic bool
1838c2ecf20Sopenharmony_cicheck_tmu_write(struct vc4_validated_shader_info *validated_shader,
1848c2ecf20Sopenharmony_ci		struct vc4_shader_validation_state *validation_state,
1858c2ecf20Sopenharmony_ci		bool is_mul)
1868c2ecf20Sopenharmony_ci{
1878c2ecf20Sopenharmony_ci	uint64_t inst = validation_state->shader[validation_state->ip];
1888c2ecf20Sopenharmony_ci	uint32_t waddr = (is_mul ?
1898c2ecf20Sopenharmony_ci			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
1908c2ecf20Sopenharmony_ci			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
1918c2ecf20Sopenharmony_ci	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
1928c2ecf20Sopenharmony_ci	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
1938c2ecf20Sopenharmony_ci	int tmu = waddr > QPU_W_TMU0_B;
1948c2ecf20Sopenharmony_ci	bool submit = is_tmu_submit(waddr);
1958c2ecf20Sopenharmony_ci	bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0;
1968c2ecf20Sopenharmony_ci	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	if (is_direct) {
1998c2ecf20Sopenharmony_ci		uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
2008c2ecf20Sopenharmony_ci		uint32_t clamp_reg, clamp_offset;
2018c2ecf20Sopenharmony_ci
2028c2ecf20Sopenharmony_ci		if (sig == QPU_SIG_SMALL_IMM) {
2038c2ecf20Sopenharmony_ci			DRM_DEBUG("direct TMU read used small immediate\n");
2048c2ecf20Sopenharmony_ci			return false;
2058c2ecf20Sopenharmony_ci		}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci		/* Make sure that this texture load is an add of the base
2088c2ecf20Sopenharmony_ci		 * address of the UBO to a clamped offset within the UBO.
2098c2ecf20Sopenharmony_ci		 */
2108c2ecf20Sopenharmony_ci		if (is_mul ||
2118c2ecf20Sopenharmony_ci		    QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
2128c2ecf20Sopenharmony_ci			DRM_DEBUG("direct TMU load wasn't an add\n");
2138c2ecf20Sopenharmony_ci			return false;
2148c2ecf20Sopenharmony_ci		}
2158c2ecf20Sopenharmony_ci
2168c2ecf20Sopenharmony_ci		/* We assert that the clamped address is the first
2178c2ecf20Sopenharmony_ci		 * argument, and the UBO base address is the second argument.
2188c2ecf20Sopenharmony_ci		 * This is arbitrary, but simpler than supporting flipping the
2198c2ecf20Sopenharmony_ci		 * two either way.
2208c2ecf20Sopenharmony_ci		 */
2218c2ecf20Sopenharmony_ci		clamp_reg = raddr_add_a_to_live_reg_index(inst);
2228c2ecf20Sopenharmony_ci		if (clamp_reg == ~0) {
2238c2ecf20Sopenharmony_ci			DRM_DEBUG("direct TMU load wasn't clamped\n");
2248c2ecf20Sopenharmony_ci			return false;
2258c2ecf20Sopenharmony_ci		}
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci		clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
2288c2ecf20Sopenharmony_ci		if (clamp_offset == ~0) {
2298c2ecf20Sopenharmony_ci			DRM_DEBUG("direct TMU load wasn't clamped\n");
2308c2ecf20Sopenharmony_ci			return false;
2318c2ecf20Sopenharmony_ci		}
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci		/* Store the clamp value's offset in p1 (see reloc_tex() in
2348c2ecf20Sopenharmony_ci		 * vc4_validate.c).
2358c2ecf20Sopenharmony_ci		 */
2368c2ecf20Sopenharmony_ci		validation_state->tmu_setup[tmu].p_offset[1] =
2378c2ecf20Sopenharmony_ci			clamp_offset;
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
2408c2ecf20Sopenharmony_ci		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
2418c2ecf20Sopenharmony_ci			DRM_DEBUG("direct TMU load didn't add to a uniform\n");
2428c2ecf20Sopenharmony_ci			return false;
2438c2ecf20Sopenharmony_ci		}
2448c2ecf20Sopenharmony_ci
2458c2ecf20Sopenharmony_ci		validation_state->tmu_setup[tmu].is_direct = true;
2468c2ecf20Sopenharmony_ci	} else {
2478c2ecf20Sopenharmony_ci		if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM &&
2488c2ecf20Sopenharmony_ci					      raddr_b == QPU_R_UNIF)) {
2498c2ecf20Sopenharmony_ci			DRM_DEBUG("uniform read in the same instruction as "
2508c2ecf20Sopenharmony_ci				  "texture setup.\n");
2518c2ecf20Sopenharmony_ci			return false;
2528c2ecf20Sopenharmony_ci		}
2538c2ecf20Sopenharmony_ci	}
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	if (validation_state->tmu_write_count[tmu] >= 4) {
2568c2ecf20Sopenharmony_ci		DRM_DEBUG("TMU%d got too many parameters before dispatch\n",
2578c2ecf20Sopenharmony_ci			  tmu);
2588c2ecf20Sopenharmony_ci		return false;
2598c2ecf20Sopenharmony_ci	}
2608c2ecf20Sopenharmony_ci	validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
2618c2ecf20Sopenharmony_ci		validated_shader->uniforms_size;
2628c2ecf20Sopenharmony_ci	validation_state->tmu_write_count[tmu]++;
2638c2ecf20Sopenharmony_ci	/* Since direct uses a RADDR uniform reference, it will get counted in
2648c2ecf20Sopenharmony_ci	 * check_instruction_reads()
2658c2ecf20Sopenharmony_ci	 */
2668c2ecf20Sopenharmony_ci	if (!is_direct) {
2678c2ecf20Sopenharmony_ci		if (validation_state->needs_uniform_address_update) {
2688c2ecf20Sopenharmony_ci			DRM_DEBUG("Texturing with undefined uniform address\n");
2698c2ecf20Sopenharmony_ci			return false;
2708c2ecf20Sopenharmony_ci		}
2718c2ecf20Sopenharmony_ci
2728c2ecf20Sopenharmony_ci		validated_shader->uniforms_size += 4;
2738c2ecf20Sopenharmony_ci	}
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	if (submit) {
2768c2ecf20Sopenharmony_ci		if (!record_texture_sample(validated_shader,
2778c2ecf20Sopenharmony_ci					   validation_state, tmu)) {
2788c2ecf20Sopenharmony_ci			return false;
2798c2ecf20Sopenharmony_ci		}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_ci		validation_state->tmu_write_count[tmu] = 0;
2828c2ecf20Sopenharmony_ci	}
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	return true;
2858c2ecf20Sopenharmony_ci}
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_cistatic bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
2888c2ecf20Sopenharmony_ci{
2898c2ecf20Sopenharmony_ci	uint32_t o = validated_shader->num_uniform_addr_offsets;
2908c2ecf20Sopenharmony_ci	uint32_t num_uniforms = validated_shader->uniforms_size / 4;
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci	validated_shader->uniform_addr_offsets =
2938c2ecf20Sopenharmony_ci		krealloc(validated_shader->uniform_addr_offsets,
2948c2ecf20Sopenharmony_ci			 (o + 1) *
2958c2ecf20Sopenharmony_ci			 sizeof(*validated_shader->uniform_addr_offsets),
2968c2ecf20Sopenharmony_ci			 GFP_KERNEL);
2978c2ecf20Sopenharmony_ci	if (!validated_shader->uniform_addr_offsets)
2988c2ecf20Sopenharmony_ci		return false;
2998c2ecf20Sopenharmony_ci
3008c2ecf20Sopenharmony_ci	validated_shader->uniform_addr_offsets[o] = num_uniforms;
3018c2ecf20Sopenharmony_ci	validated_shader->num_uniform_addr_offsets++;
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci	return true;
3048c2ecf20Sopenharmony_ci}
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_cistatic bool
3078c2ecf20Sopenharmony_civalidate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
3088c2ecf20Sopenharmony_ci			       struct vc4_shader_validation_state *validation_state,
3098c2ecf20Sopenharmony_ci			       bool is_mul)
3108c2ecf20Sopenharmony_ci{
3118c2ecf20Sopenharmony_ci	uint64_t inst = validation_state->shader[validation_state->ip];
3128c2ecf20Sopenharmony_ci	u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
3138c2ecf20Sopenharmony_ci	u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
3148c2ecf20Sopenharmony_ci	u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
3158c2ecf20Sopenharmony_ci	u32 add_lri = raddr_add_a_to_live_reg_index(inst);
3168c2ecf20Sopenharmony_ci	/* We want our reset to be pointing at whatever uniform follows the
3178c2ecf20Sopenharmony_ci	 * uniforms base address.
3188c2ecf20Sopenharmony_ci	 */
3198c2ecf20Sopenharmony_ci	u32 expected_offset = validated_shader->uniforms_size + 4;
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	/* We only support absolute uniform address changes, and we
3228c2ecf20Sopenharmony_ci	 * require that they be in the current basic block before any
3238c2ecf20Sopenharmony_ci	 * of its uniform reads.
3248c2ecf20Sopenharmony_ci	 *
3258c2ecf20Sopenharmony_ci	 * One could potentially emit more efficient QPU code, by
3268c2ecf20Sopenharmony_ci	 * noticing that (say) an if statement does uniform control
3278c2ecf20Sopenharmony_ci	 * flow for all threads and that the if reads the same number
3288c2ecf20Sopenharmony_ci	 * of uniforms on each side.  However, this scheme is easy to
3298c2ecf20Sopenharmony_ci	 * validate so it's all we allow for now.
3308c2ecf20Sopenharmony_ci	 */
3318c2ecf20Sopenharmony_ci	switch (QPU_GET_FIELD(inst, QPU_SIG)) {
3328c2ecf20Sopenharmony_ci	case QPU_SIG_NONE:
3338c2ecf20Sopenharmony_ci	case QPU_SIG_SCOREBOARD_UNLOCK:
3348c2ecf20Sopenharmony_ci	case QPU_SIG_COLOR_LOAD:
3358c2ecf20Sopenharmony_ci	case QPU_SIG_LOAD_TMU0:
3368c2ecf20Sopenharmony_ci	case QPU_SIG_LOAD_TMU1:
3378c2ecf20Sopenharmony_ci		break;
3388c2ecf20Sopenharmony_ci	default:
3398c2ecf20Sopenharmony_ci		DRM_DEBUG("uniforms address change must be "
3408c2ecf20Sopenharmony_ci			  "normal math\n");
3418c2ecf20Sopenharmony_ci		return false;
3428c2ecf20Sopenharmony_ci	}
3438c2ecf20Sopenharmony_ci
3448c2ecf20Sopenharmony_ci	if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
3458c2ecf20Sopenharmony_ci		DRM_DEBUG("Uniform address reset must be an ADD.\n");
3468c2ecf20Sopenharmony_ci		return false;
3478c2ecf20Sopenharmony_ci	}
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci	if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
3508c2ecf20Sopenharmony_ci		DRM_DEBUG("Uniform address reset must be unconditional.\n");
3518c2ecf20Sopenharmony_ci		return false;
3528c2ecf20Sopenharmony_ci	}
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
3558c2ecf20Sopenharmony_ci	    !(inst & QPU_PM)) {
3568c2ecf20Sopenharmony_ci		DRM_DEBUG("No packing allowed on uniforms reset\n");
3578c2ecf20Sopenharmony_ci		return false;
3588c2ecf20Sopenharmony_ci	}
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci	if (add_lri == -1) {
3618c2ecf20Sopenharmony_ci		DRM_DEBUG("First argument of uniform address write must be "
3628c2ecf20Sopenharmony_ci			  "an immediate value.\n");
3638c2ecf20Sopenharmony_ci		return false;
3648c2ecf20Sopenharmony_ci	}
3658c2ecf20Sopenharmony_ci
3668c2ecf20Sopenharmony_ci	if (validation_state->live_immediates[add_lri] != expected_offset) {
3678c2ecf20Sopenharmony_ci		DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n",
3688c2ecf20Sopenharmony_ci			  validation_state->live_immediates[add_lri],
3698c2ecf20Sopenharmony_ci			  expected_offset);
3708c2ecf20Sopenharmony_ci		return false;
3718c2ecf20Sopenharmony_ci	}
3728c2ecf20Sopenharmony_ci
3738c2ecf20Sopenharmony_ci	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
3748c2ecf20Sopenharmony_ci	    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
3758c2ecf20Sopenharmony_ci		DRM_DEBUG("Second argument of uniform address write must be "
3768c2ecf20Sopenharmony_ci			  "a uniform.\n");
3778c2ecf20Sopenharmony_ci		return false;
3788c2ecf20Sopenharmony_ci	}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	validation_state->needs_uniform_address_update = false;
3818c2ecf20Sopenharmony_ci	validation_state->needs_uniform_address_for_loop = false;
3828c2ecf20Sopenharmony_ci	return require_uniform_address_uniform(validated_shader);
3838c2ecf20Sopenharmony_ci}
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_cistatic bool
3868c2ecf20Sopenharmony_cicheck_reg_write(struct vc4_validated_shader_info *validated_shader,
3878c2ecf20Sopenharmony_ci		struct vc4_shader_validation_state *validation_state,
3888c2ecf20Sopenharmony_ci		bool is_mul)
3898c2ecf20Sopenharmony_ci{
3908c2ecf20Sopenharmony_ci	uint64_t inst = validation_state->shader[validation_state->ip];
3918c2ecf20Sopenharmony_ci	uint32_t waddr = (is_mul ?
3928c2ecf20Sopenharmony_ci			  QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
3938c2ecf20Sopenharmony_ci			  QPU_GET_FIELD(inst, QPU_WADDR_ADD));
3948c2ecf20Sopenharmony_ci	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
3958c2ecf20Sopenharmony_ci	bool ws = inst & QPU_WS;
3968c2ecf20Sopenharmony_ci	bool is_b = is_mul ^ ws;
3978c2ecf20Sopenharmony_ci	u32 lri = waddr_to_live_reg_index(waddr, is_b);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	if (lri != -1) {
4008c2ecf20Sopenharmony_ci		uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
4018c2ecf20Sopenharmony_ci		uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci		if (sig == QPU_SIG_LOAD_IMM &&
4048c2ecf20Sopenharmony_ci		    QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
4058c2ecf20Sopenharmony_ci		    ((is_mul && cond_mul == QPU_COND_ALWAYS) ||
4068c2ecf20Sopenharmony_ci		     (!is_mul && cond_add == QPU_COND_ALWAYS))) {
4078c2ecf20Sopenharmony_ci			validation_state->live_immediates[lri] =
4088c2ecf20Sopenharmony_ci				QPU_GET_FIELD(inst, QPU_LOAD_IMM);
4098c2ecf20Sopenharmony_ci		} else {
4108c2ecf20Sopenharmony_ci			validation_state->live_immediates[lri] = ~0;
4118c2ecf20Sopenharmony_ci		}
4128c2ecf20Sopenharmony_ci
4138c2ecf20Sopenharmony_ci		if (live_reg_is_upper_half(lri))
4148c2ecf20Sopenharmony_ci			validation_state->all_registers_used = true;
4158c2ecf20Sopenharmony_ci	}
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci	switch (waddr) {
4188c2ecf20Sopenharmony_ci	case QPU_W_UNIFORMS_ADDRESS:
4198c2ecf20Sopenharmony_ci		if (is_b) {
4208c2ecf20Sopenharmony_ci			DRM_DEBUG("relative uniforms address change "
4218c2ecf20Sopenharmony_ci				  "unsupported\n");
4228c2ecf20Sopenharmony_ci			return false;
4238c2ecf20Sopenharmony_ci		}
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci		return validate_uniform_address_write(validated_shader,
4268c2ecf20Sopenharmony_ci						      validation_state,
4278c2ecf20Sopenharmony_ci						      is_mul);
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci	case QPU_W_TLB_COLOR_MS:
4308c2ecf20Sopenharmony_ci	case QPU_W_TLB_COLOR_ALL:
4318c2ecf20Sopenharmony_ci	case QPU_W_TLB_Z:
4328c2ecf20Sopenharmony_ci		/* These only interact with the tile buffer, not main memory,
4338c2ecf20Sopenharmony_ci		 * so they're safe.
4348c2ecf20Sopenharmony_ci		 */
4358c2ecf20Sopenharmony_ci		return true;
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	case QPU_W_TMU0_S:
4388c2ecf20Sopenharmony_ci	case QPU_W_TMU0_T:
4398c2ecf20Sopenharmony_ci	case QPU_W_TMU0_R:
4408c2ecf20Sopenharmony_ci	case QPU_W_TMU0_B:
4418c2ecf20Sopenharmony_ci	case QPU_W_TMU1_S:
4428c2ecf20Sopenharmony_ci	case QPU_W_TMU1_T:
4438c2ecf20Sopenharmony_ci	case QPU_W_TMU1_R:
4448c2ecf20Sopenharmony_ci	case QPU_W_TMU1_B:
4458c2ecf20Sopenharmony_ci		return check_tmu_write(validated_shader, validation_state,
4468c2ecf20Sopenharmony_ci				       is_mul);
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	case QPU_W_HOST_INT:
4498c2ecf20Sopenharmony_ci	case QPU_W_TMU_NOSWAP:
4508c2ecf20Sopenharmony_ci	case QPU_W_TLB_ALPHA_MASK:
4518c2ecf20Sopenharmony_ci	case QPU_W_MUTEX_RELEASE:
4528c2ecf20Sopenharmony_ci		/* XXX: I haven't thought about these, so don't support them
4538c2ecf20Sopenharmony_ci		 * for now.
4548c2ecf20Sopenharmony_ci		 */
4558c2ecf20Sopenharmony_ci		DRM_DEBUG("Unsupported waddr %d\n", waddr);
4568c2ecf20Sopenharmony_ci		return false;
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci	case QPU_W_VPM_ADDR:
4598c2ecf20Sopenharmony_ci		DRM_DEBUG("General VPM DMA unsupported\n");
4608c2ecf20Sopenharmony_ci		return false;
4618c2ecf20Sopenharmony_ci
4628c2ecf20Sopenharmony_ci	case QPU_W_VPM:
4638c2ecf20Sopenharmony_ci	case QPU_W_VPMVCD_SETUP:
4648c2ecf20Sopenharmony_ci		/* We allow VPM setup in general, even including VPM DMA
4658c2ecf20Sopenharmony_ci		 * configuration setup, because the (unsafe) DMA can only be
4668c2ecf20Sopenharmony_ci		 * triggered by QPU_W_VPM_ADDR writes.
4678c2ecf20Sopenharmony_ci		 */
4688c2ecf20Sopenharmony_ci		return true;
4698c2ecf20Sopenharmony_ci
4708c2ecf20Sopenharmony_ci	case QPU_W_TLB_STENCIL_SETUP:
4718c2ecf20Sopenharmony_ci		return true;
4728c2ecf20Sopenharmony_ci	}
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	return true;
4758c2ecf20Sopenharmony_ci}
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_cistatic void
4788c2ecf20Sopenharmony_citrack_live_clamps(struct vc4_validated_shader_info *validated_shader,
4798c2ecf20Sopenharmony_ci		  struct vc4_shader_validation_state *validation_state)
4808c2ecf20Sopenharmony_ci{
4818c2ecf20Sopenharmony_ci	uint64_t inst = validation_state->shader[validation_state->ip];
4828c2ecf20Sopenharmony_ci	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
4838c2ecf20Sopenharmony_ci	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
4848c2ecf20Sopenharmony_ci	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
4858c2ecf20Sopenharmony_ci	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
4868c2ecf20Sopenharmony_ci	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
4878c2ecf20Sopenharmony_ci	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
4888c2ecf20Sopenharmony_ci	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
4898c2ecf20Sopenharmony_ci	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
4908c2ecf20Sopenharmony_ci	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
4918c2ecf20Sopenharmony_ci	bool ws = inst & QPU_WS;
4928c2ecf20Sopenharmony_ci	uint32_t lri_add_a, lri_add, lri_mul;
4938c2ecf20Sopenharmony_ci	bool add_a_is_min_0;
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci	/* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),
4968c2ecf20Sopenharmony_ci	 * before we clear previous live state.
4978c2ecf20Sopenharmony_ci	 */
4988c2ecf20Sopenharmony_ci	lri_add_a = raddr_add_a_to_live_reg_index(inst);
4998c2ecf20Sopenharmony_ci	add_a_is_min_0 = (lri_add_a != ~0 &&
5008c2ecf20Sopenharmony_ci			  validation_state->live_max_clamp_regs[lri_add_a]);
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_ci	/* Clear live state for registers written by our instruction. */
5038c2ecf20Sopenharmony_ci	lri_add = waddr_to_live_reg_index(waddr_add, ws);
5048c2ecf20Sopenharmony_ci	lri_mul = waddr_to_live_reg_index(waddr_mul, !ws);
5058c2ecf20Sopenharmony_ci	if (lri_mul != ~0) {
5068c2ecf20Sopenharmony_ci		validation_state->live_max_clamp_regs[lri_mul] = false;
5078c2ecf20Sopenharmony_ci		validation_state->live_min_clamp_offsets[lri_mul] = ~0;
5088c2ecf20Sopenharmony_ci	}
5098c2ecf20Sopenharmony_ci	if (lri_add != ~0) {
5108c2ecf20Sopenharmony_ci		validation_state->live_max_clamp_regs[lri_add] = false;
5118c2ecf20Sopenharmony_ci		validation_state->live_min_clamp_offsets[lri_add] = ~0;
5128c2ecf20Sopenharmony_ci	} else {
5138c2ecf20Sopenharmony_ci		/* Nothing further to do for live tracking, since only ADDs
5148c2ecf20Sopenharmony_ci		 * generate new live clamp registers.
5158c2ecf20Sopenharmony_ci		 */
5168c2ecf20Sopenharmony_ci		return;
5178c2ecf20Sopenharmony_ci	}
5188c2ecf20Sopenharmony_ci
5198c2ecf20Sopenharmony_ci	/* Now, handle remaining live clamp tracking for the ADD operation. */
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	if (cond_add != QPU_COND_ALWAYS)
5228c2ecf20Sopenharmony_ci		return;
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_ci	if (op_add == QPU_A_MAX) {
5258c2ecf20Sopenharmony_ci		/* Track live clamps of a value to a minimum of 0 (in either
5268c2ecf20Sopenharmony_ci		 * arg).
5278c2ecf20Sopenharmony_ci		 */
5288c2ecf20Sopenharmony_ci		if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 ||
5298c2ecf20Sopenharmony_ci		    (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
5308c2ecf20Sopenharmony_ci			return;
5318c2ecf20Sopenharmony_ci		}
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci		validation_state->live_max_clamp_regs[lri_add] = true;
5348c2ecf20Sopenharmony_ci	} else if (op_add == QPU_A_MIN) {
5358c2ecf20Sopenharmony_ci		/* Track live clamps of a value clamped to a minimum of 0 and
5368c2ecf20Sopenharmony_ci		 * a maximum of some uniform's offset.
5378c2ecf20Sopenharmony_ci		 */
5388c2ecf20Sopenharmony_ci		if (!add_a_is_min_0)
5398c2ecf20Sopenharmony_ci			return;
5408c2ecf20Sopenharmony_ci
5418c2ecf20Sopenharmony_ci		if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
5428c2ecf20Sopenharmony_ci		    !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
5438c2ecf20Sopenharmony_ci		      sig != QPU_SIG_SMALL_IMM)) {
5448c2ecf20Sopenharmony_ci			return;
5458c2ecf20Sopenharmony_ci		}
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci		validation_state->live_min_clamp_offsets[lri_add] =
5488c2ecf20Sopenharmony_ci			validated_shader->uniforms_size;
5498c2ecf20Sopenharmony_ci	}
5508c2ecf20Sopenharmony_ci}
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_cistatic bool
5538c2ecf20Sopenharmony_cicheck_instruction_writes(struct vc4_validated_shader_info *validated_shader,
5548c2ecf20Sopenharmony_ci			 struct vc4_shader_validation_state *validation_state)
5558c2ecf20Sopenharmony_ci{
5568c2ecf20Sopenharmony_ci	uint64_t inst = validation_state->shader[validation_state->ip];
5578c2ecf20Sopenharmony_ci	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
5588c2ecf20Sopenharmony_ci	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
5598c2ecf20Sopenharmony_ci	bool ok;
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) {
5628c2ecf20Sopenharmony_ci		DRM_DEBUG("ADD and MUL both set up textures\n");
5638c2ecf20Sopenharmony_ci		return false;
5648c2ecf20Sopenharmony_ci	}
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci	ok = (check_reg_write(validated_shader, validation_state, false) &&
5678c2ecf20Sopenharmony_ci	      check_reg_write(validated_shader, validation_state, true));
5688c2ecf20Sopenharmony_ci
5698c2ecf20Sopenharmony_ci	track_live_clamps(validated_shader, validation_state);
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	return ok;
5728c2ecf20Sopenharmony_ci}
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_cistatic bool
5758c2ecf20Sopenharmony_cicheck_branch(uint64_t inst,
5768c2ecf20Sopenharmony_ci	     struct vc4_validated_shader_info *validated_shader,
5778c2ecf20Sopenharmony_ci	     struct vc4_shader_validation_state *validation_state,
5788c2ecf20Sopenharmony_ci	     int ip)
5798c2ecf20Sopenharmony_ci{
5808c2ecf20Sopenharmony_ci	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
5818c2ecf20Sopenharmony_ci	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
5828c2ecf20Sopenharmony_ci	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci	if ((int)branch_imm < 0)
5858c2ecf20Sopenharmony_ci		validation_state->needs_uniform_address_for_loop = true;
5868c2ecf20Sopenharmony_ci
5878c2ecf20Sopenharmony_ci	/* We don't want to have to worry about validation of this, and
5888c2ecf20Sopenharmony_ci	 * there's no need for it.
5898c2ecf20Sopenharmony_ci	 */
5908c2ecf20Sopenharmony_ci	if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) {
5918c2ecf20Sopenharmony_ci		DRM_DEBUG("branch instruction at %d wrote a register.\n",
5928c2ecf20Sopenharmony_ci			  validation_state->ip);
5938c2ecf20Sopenharmony_ci		return false;
5948c2ecf20Sopenharmony_ci	}
5958c2ecf20Sopenharmony_ci
5968c2ecf20Sopenharmony_ci	return true;
5978c2ecf20Sopenharmony_ci}
5988c2ecf20Sopenharmony_ci
5998c2ecf20Sopenharmony_cistatic bool
6008c2ecf20Sopenharmony_cicheck_instruction_reads(struct vc4_validated_shader_info *validated_shader,
6018c2ecf20Sopenharmony_ci			struct vc4_shader_validation_state *validation_state)
6028c2ecf20Sopenharmony_ci{
6038c2ecf20Sopenharmony_ci	uint64_t inst = validation_state->shader[validation_state->ip];
6048c2ecf20Sopenharmony_ci	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
6058c2ecf20Sopenharmony_ci	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
6068c2ecf20Sopenharmony_ci	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	if (raddr_a == QPU_R_UNIF ||
6098c2ecf20Sopenharmony_ci	    (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
6108c2ecf20Sopenharmony_ci		/* This can't overflow the uint32_t, because we're reading 8
6118c2ecf20Sopenharmony_ci		 * bytes of instruction to increment by 4 here, so we'd
6128c2ecf20Sopenharmony_ci		 * already be OOM.
6138c2ecf20Sopenharmony_ci		 */
6148c2ecf20Sopenharmony_ci		validated_shader->uniforms_size += 4;
6158c2ecf20Sopenharmony_ci
6168c2ecf20Sopenharmony_ci		if (validation_state->needs_uniform_address_update) {
6178c2ecf20Sopenharmony_ci			DRM_DEBUG("Uniform read with undefined uniform "
6188c2ecf20Sopenharmony_ci				  "address\n");
6198c2ecf20Sopenharmony_ci			return false;
6208c2ecf20Sopenharmony_ci		}
6218c2ecf20Sopenharmony_ci	}
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci	if ((raddr_a >= 16 && raddr_a < 32) ||
6248c2ecf20Sopenharmony_ci	    (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
6258c2ecf20Sopenharmony_ci		validation_state->all_registers_used = true;
6268c2ecf20Sopenharmony_ci	}
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci	return true;
6298c2ecf20Sopenharmony_ci}
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci/* Make sure that all branches are absolute and point within the shader, and
6328c2ecf20Sopenharmony_ci * note their targets for later.
6338c2ecf20Sopenharmony_ci */
6348c2ecf20Sopenharmony_cistatic bool
6358c2ecf20Sopenharmony_civc4_validate_branches(struct vc4_shader_validation_state *validation_state)
6368c2ecf20Sopenharmony_ci{
6378c2ecf20Sopenharmony_ci	uint32_t max_branch_target = 0;
6388c2ecf20Sopenharmony_ci	int ip;
6398c2ecf20Sopenharmony_ci	int last_branch = -2;
6408c2ecf20Sopenharmony_ci
6418c2ecf20Sopenharmony_ci	for (ip = 0; ip < validation_state->max_ip; ip++) {
6428c2ecf20Sopenharmony_ci		uint64_t inst = validation_state->shader[ip];
6438c2ecf20Sopenharmony_ci		int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
6448c2ecf20Sopenharmony_ci		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
6458c2ecf20Sopenharmony_ci		uint32_t after_delay_ip = ip + 4;
6468c2ecf20Sopenharmony_ci		uint32_t branch_target_ip;
6478c2ecf20Sopenharmony_ci
6488c2ecf20Sopenharmony_ci		if (sig == QPU_SIG_PROG_END) {
6498c2ecf20Sopenharmony_ci			/* There are two delay slots after program end is
6508c2ecf20Sopenharmony_ci			 * signaled that are still executed, then we're
6518c2ecf20Sopenharmony_ci			 * finished.  validation_state->max_ip is the
6528c2ecf20Sopenharmony_ci			 * instruction after the last valid instruction in the
6538c2ecf20Sopenharmony_ci			 * program.
6548c2ecf20Sopenharmony_ci			 */
6558c2ecf20Sopenharmony_ci			validation_state->max_ip = ip + 3;
6568c2ecf20Sopenharmony_ci			continue;
6578c2ecf20Sopenharmony_ci		}
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci		if (sig != QPU_SIG_BRANCH)
6608c2ecf20Sopenharmony_ci			continue;
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci		if (ip - last_branch < 4) {
6638c2ecf20Sopenharmony_ci			DRM_DEBUG("Branch at %d during delay slots\n", ip);
6648c2ecf20Sopenharmony_ci			return false;
6658c2ecf20Sopenharmony_ci		}
6668c2ecf20Sopenharmony_ci		last_branch = ip;
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ci		if (inst & QPU_BRANCH_REG) {
6698c2ecf20Sopenharmony_ci			DRM_DEBUG("branching from register relative "
6708c2ecf20Sopenharmony_ci				  "not supported\n");
6718c2ecf20Sopenharmony_ci			return false;
6728c2ecf20Sopenharmony_ci		}
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci		if (!(inst & QPU_BRANCH_REL)) {
6758c2ecf20Sopenharmony_ci			DRM_DEBUG("relative branching required\n");
6768c2ecf20Sopenharmony_ci			return false;
6778c2ecf20Sopenharmony_ci		}
6788c2ecf20Sopenharmony_ci
6798c2ecf20Sopenharmony_ci		/* The actual branch target is the instruction after the delay
6808c2ecf20Sopenharmony_ci		 * slots, plus whatever byte offset is in the low 32 bits of
6818c2ecf20Sopenharmony_ci		 * the instruction.  Make sure we're not branching beyond the
6828c2ecf20Sopenharmony_ci		 * end of the shader object.
6838c2ecf20Sopenharmony_ci		 */
6848c2ecf20Sopenharmony_ci		if (branch_imm % sizeof(inst) != 0) {
6858c2ecf20Sopenharmony_ci			DRM_DEBUG("branch target not aligned\n");
6868c2ecf20Sopenharmony_ci			return false;
6878c2ecf20Sopenharmony_ci		}
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci		branch_target_ip = after_delay_ip + (branch_imm >> 3);
6908c2ecf20Sopenharmony_ci		if (branch_target_ip >= validation_state->max_ip) {
6918c2ecf20Sopenharmony_ci			DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n",
6928c2ecf20Sopenharmony_ci				  ip, branch_target_ip,
6938c2ecf20Sopenharmony_ci				  validation_state->max_ip);
6948c2ecf20Sopenharmony_ci			return false;
6958c2ecf20Sopenharmony_ci		}
6968c2ecf20Sopenharmony_ci		set_bit(branch_target_ip, validation_state->branch_targets);
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci		/* Make sure that the non-branching path is also not outside
6998c2ecf20Sopenharmony_ci		 * the shader.
7008c2ecf20Sopenharmony_ci		 */
7018c2ecf20Sopenharmony_ci		if (after_delay_ip >= validation_state->max_ip) {
7028c2ecf20Sopenharmony_ci			DRM_DEBUG("Branch at %d continues past shader end "
7038c2ecf20Sopenharmony_ci				  "(%d/%d)\n",
7048c2ecf20Sopenharmony_ci				  ip, after_delay_ip, validation_state->max_ip);
7058c2ecf20Sopenharmony_ci			return false;
7068c2ecf20Sopenharmony_ci		}
7078c2ecf20Sopenharmony_ci		set_bit(after_delay_ip, validation_state->branch_targets);
7088c2ecf20Sopenharmony_ci		max_branch_target = max(max_branch_target, after_delay_ip);
7098c2ecf20Sopenharmony_ci	}
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_ci	if (max_branch_target > validation_state->max_ip - 3) {
7128c2ecf20Sopenharmony_ci		DRM_DEBUG("Branch landed after QPU_SIG_PROG_END");
7138c2ecf20Sopenharmony_ci		return false;
7148c2ecf20Sopenharmony_ci	}
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci	return true;
7178c2ecf20Sopenharmony_ci}
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_ci/* Resets any known state for the shader, used when we may be branched to from
7208c2ecf20Sopenharmony_ci * multiple locations in the program (or at shader start).
7218c2ecf20Sopenharmony_ci */
7228c2ecf20Sopenharmony_cistatic void
7238c2ecf20Sopenharmony_cireset_validation_state(struct vc4_shader_validation_state *validation_state)
7248c2ecf20Sopenharmony_ci{
7258c2ecf20Sopenharmony_ci	int i;
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	for (i = 0; i < 8; i++)
7288c2ecf20Sopenharmony_ci		validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0;
7298c2ecf20Sopenharmony_ci
7308c2ecf20Sopenharmony_ci	for (i = 0; i < LIVE_REG_COUNT; i++) {
7318c2ecf20Sopenharmony_ci		validation_state->live_min_clamp_offsets[i] = ~0;
7328c2ecf20Sopenharmony_ci		validation_state->live_max_clamp_regs[i] = false;
7338c2ecf20Sopenharmony_ci		validation_state->live_immediates[i] = ~0;
7348c2ecf20Sopenharmony_ci	}
7358c2ecf20Sopenharmony_ci}
7368c2ecf20Sopenharmony_ci
7378c2ecf20Sopenharmony_cistatic bool
7388c2ecf20Sopenharmony_citexturing_in_progress(struct vc4_shader_validation_state *validation_state)
7398c2ecf20Sopenharmony_ci{
7408c2ecf20Sopenharmony_ci	return (validation_state->tmu_write_count[0] != 0 ||
7418c2ecf20Sopenharmony_ci		validation_state->tmu_write_count[1] != 0);
7428c2ecf20Sopenharmony_ci}
7438c2ecf20Sopenharmony_ci
7448c2ecf20Sopenharmony_cistatic bool
7458c2ecf20Sopenharmony_civc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
7468c2ecf20Sopenharmony_ci{
7478c2ecf20Sopenharmony_ci	uint32_t ip = validation_state->ip;
7488c2ecf20Sopenharmony_ci
7498c2ecf20Sopenharmony_ci	if (!test_bit(ip, validation_state->branch_targets))
7508c2ecf20Sopenharmony_ci		return true;
7518c2ecf20Sopenharmony_ci
7528c2ecf20Sopenharmony_ci	if (texturing_in_progress(validation_state)) {
7538c2ecf20Sopenharmony_ci		DRM_DEBUG("Branch target landed during TMU setup\n");
7548c2ecf20Sopenharmony_ci		return false;
7558c2ecf20Sopenharmony_ci	}
7568c2ecf20Sopenharmony_ci
7578c2ecf20Sopenharmony_ci	/* Reset our live values tracking, since this instruction may have
7588c2ecf20Sopenharmony_ci	 * multiple predecessors.
7598c2ecf20Sopenharmony_ci	 *
7608c2ecf20Sopenharmony_ci	 * One could potentially do analysis to determine that, for
7618c2ecf20Sopenharmony_ci	 * example, all predecessors have a live max clamp in the same
7628c2ecf20Sopenharmony_ci	 * register, but we don't bother with that.
7638c2ecf20Sopenharmony_ci	 */
7648c2ecf20Sopenharmony_ci	reset_validation_state(validation_state);
7658c2ecf20Sopenharmony_ci
7668c2ecf20Sopenharmony_ci	/* Since we've entered a basic block from potentially multiple
7678c2ecf20Sopenharmony_ci	 * predecessors, we need the uniforms address to be updated before any
7688c2ecf20Sopenharmony_ci	 * unforms are read.  We require that after any branch point, the next
7698c2ecf20Sopenharmony_ci	 * uniform to be loaded is a uniform address offset.  That uniform's
7708c2ecf20Sopenharmony_ci	 * offset will be marked by the uniform address register write
7718c2ecf20Sopenharmony_ci	 * validation, or a one-off the end-of-program check.
7728c2ecf20Sopenharmony_ci	 */
7738c2ecf20Sopenharmony_ci	validation_state->needs_uniform_address_update = true;
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci	return true;
7768c2ecf20Sopenharmony_ci}
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_cistruct vc4_validated_shader_info *
7798c2ecf20Sopenharmony_civc4_validate_shader(struct drm_gem_cma_object *shader_obj)
7808c2ecf20Sopenharmony_ci{
7818c2ecf20Sopenharmony_ci	bool found_shader_end = false;
7828c2ecf20Sopenharmony_ci	int shader_end_ip = 0;
7838c2ecf20Sopenharmony_ci	uint32_t last_thread_switch_ip = -3;
7848c2ecf20Sopenharmony_ci	uint32_t ip;
7858c2ecf20Sopenharmony_ci	struct vc4_validated_shader_info *validated_shader = NULL;
7868c2ecf20Sopenharmony_ci	struct vc4_shader_validation_state validation_state;
7878c2ecf20Sopenharmony_ci
7888c2ecf20Sopenharmony_ci	memset(&validation_state, 0, sizeof(validation_state));
7898c2ecf20Sopenharmony_ci	validation_state.shader = shader_obj->vaddr;
7908c2ecf20Sopenharmony_ci	validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
7918c2ecf20Sopenharmony_ci
7928c2ecf20Sopenharmony_ci	reset_validation_state(&validation_state);
7938c2ecf20Sopenharmony_ci
7948c2ecf20Sopenharmony_ci	validation_state.branch_targets =
7958c2ecf20Sopenharmony_ci		kcalloc(BITS_TO_LONGS(validation_state.max_ip),
7968c2ecf20Sopenharmony_ci			sizeof(unsigned long), GFP_KERNEL);
7978c2ecf20Sopenharmony_ci	if (!validation_state.branch_targets)
7988c2ecf20Sopenharmony_ci		goto fail;
7998c2ecf20Sopenharmony_ci
8008c2ecf20Sopenharmony_ci	validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
8018c2ecf20Sopenharmony_ci	if (!validated_shader)
8028c2ecf20Sopenharmony_ci		goto fail;
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_ci	if (!vc4_validate_branches(&validation_state))
8058c2ecf20Sopenharmony_ci		goto fail;
8068c2ecf20Sopenharmony_ci
8078c2ecf20Sopenharmony_ci	for (ip = 0; ip < validation_state.max_ip; ip++) {
8088c2ecf20Sopenharmony_ci		uint64_t inst = validation_state.shader[ip];
8098c2ecf20Sopenharmony_ci		uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
8108c2ecf20Sopenharmony_ci
8118c2ecf20Sopenharmony_ci		validation_state.ip = ip;
8128c2ecf20Sopenharmony_ci
8138c2ecf20Sopenharmony_ci		if (!vc4_handle_branch_target(&validation_state))
8148c2ecf20Sopenharmony_ci			goto fail;
8158c2ecf20Sopenharmony_ci
8168c2ecf20Sopenharmony_ci		if (ip == last_thread_switch_ip + 3) {
8178c2ecf20Sopenharmony_ci			/* Reset r0-r3 live clamp data */
8188c2ecf20Sopenharmony_ci			int i;
8198c2ecf20Sopenharmony_ci
8208c2ecf20Sopenharmony_ci			for (i = 64; i < LIVE_REG_COUNT; i++) {
8218c2ecf20Sopenharmony_ci				validation_state.live_min_clamp_offsets[i] = ~0;
8228c2ecf20Sopenharmony_ci				validation_state.live_max_clamp_regs[i] = false;
8238c2ecf20Sopenharmony_ci				validation_state.live_immediates[i] = ~0;
8248c2ecf20Sopenharmony_ci			}
8258c2ecf20Sopenharmony_ci		}
8268c2ecf20Sopenharmony_ci
8278c2ecf20Sopenharmony_ci		switch (sig) {
8288c2ecf20Sopenharmony_ci		case QPU_SIG_NONE:
8298c2ecf20Sopenharmony_ci		case QPU_SIG_WAIT_FOR_SCOREBOARD:
8308c2ecf20Sopenharmony_ci		case QPU_SIG_SCOREBOARD_UNLOCK:
8318c2ecf20Sopenharmony_ci		case QPU_SIG_COLOR_LOAD:
8328c2ecf20Sopenharmony_ci		case QPU_SIG_LOAD_TMU0:
8338c2ecf20Sopenharmony_ci		case QPU_SIG_LOAD_TMU1:
8348c2ecf20Sopenharmony_ci		case QPU_SIG_PROG_END:
8358c2ecf20Sopenharmony_ci		case QPU_SIG_SMALL_IMM:
8368c2ecf20Sopenharmony_ci		case QPU_SIG_THREAD_SWITCH:
8378c2ecf20Sopenharmony_ci		case QPU_SIG_LAST_THREAD_SWITCH:
8388c2ecf20Sopenharmony_ci			if (!check_instruction_writes(validated_shader,
8398c2ecf20Sopenharmony_ci						      &validation_state)) {
8408c2ecf20Sopenharmony_ci				DRM_DEBUG("Bad write at ip %d\n", ip);
8418c2ecf20Sopenharmony_ci				goto fail;
8428c2ecf20Sopenharmony_ci			}
8438c2ecf20Sopenharmony_ci
8448c2ecf20Sopenharmony_ci			if (!check_instruction_reads(validated_shader,
8458c2ecf20Sopenharmony_ci						     &validation_state))
8468c2ecf20Sopenharmony_ci				goto fail;
8478c2ecf20Sopenharmony_ci
8488c2ecf20Sopenharmony_ci			if (sig == QPU_SIG_PROG_END) {
8498c2ecf20Sopenharmony_ci				found_shader_end = true;
8508c2ecf20Sopenharmony_ci				shader_end_ip = ip;
8518c2ecf20Sopenharmony_ci			}
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci			if (sig == QPU_SIG_THREAD_SWITCH ||
8548c2ecf20Sopenharmony_ci			    sig == QPU_SIG_LAST_THREAD_SWITCH) {
8558c2ecf20Sopenharmony_ci				validated_shader->is_threaded = true;
8568c2ecf20Sopenharmony_ci
8578c2ecf20Sopenharmony_ci				if (ip < last_thread_switch_ip + 3) {
8588c2ecf20Sopenharmony_ci					DRM_DEBUG("Thread switch too soon after "
8598c2ecf20Sopenharmony_ci						  "last switch at ip %d\n", ip);
8608c2ecf20Sopenharmony_ci					goto fail;
8618c2ecf20Sopenharmony_ci				}
8628c2ecf20Sopenharmony_ci				last_thread_switch_ip = ip;
8638c2ecf20Sopenharmony_ci			}
8648c2ecf20Sopenharmony_ci
8658c2ecf20Sopenharmony_ci			break;
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_ci		case QPU_SIG_LOAD_IMM:
8688c2ecf20Sopenharmony_ci			if (!check_instruction_writes(validated_shader,
8698c2ecf20Sopenharmony_ci						      &validation_state)) {
8708c2ecf20Sopenharmony_ci				DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip);
8718c2ecf20Sopenharmony_ci				goto fail;
8728c2ecf20Sopenharmony_ci			}
8738c2ecf20Sopenharmony_ci			break;
8748c2ecf20Sopenharmony_ci
8758c2ecf20Sopenharmony_ci		case QPU_SIG_BRANCH:
8768c2ecf20Sopenharmony_ci			if (!check_branch(inst, validated_shader,
8778c2ecf20Sopenharmony_ci					  &validation_state, ip))
8788c2ecf20Sopenharmony_ci				goto fail;
8798c2ecf20Sopenharmony_ci
8808c2ecf20Sopenharmony_ci			if (ip < last_thread_switch_ip + 3) {
8818c2ecf20Sopenharmony_ci				DRM_DEBUG("Branch in thread switch at ip %d",
8828c2ecf20Sopenharmony_ci					  ip);
8838c2ecf20Sopenharmony_ci				goto fail;
8848c2ecf20Sopenharmony_ci			}
8858c2ecf20Sopenharmony_ci
8868c2ecf20Sopenharmony_ci			break;
8878c2ecf20Sopenharmony_ci		default:
8888c2ecf20Sopenharmony_ci			DRM_DEBUG("Unsupported QPU signal %d at "
8898c2ecf20Sopenharmony_ci				  "instruction %d\n", sig, ip);
8908c2ecf20Sopenharmony_ci			goto fail;
8918c2ecf20Sopenharmony_ci		}
8928c2ecf20Sopenharmony_ci
8938c2ecf20Sopenharmony_ci		/* There are two delay slots after program end is signaled
8948c2ecf20Sopenharmony_ci		 * that are still executed, then we're finished.
8958c2ecf20Sopenharmony_ci		 */
8968c2ecf20Sopenharmony_ci		if (found_shader_end && ip == shader_end_ip + 2)
8978c2ecf20Sopenharmony_ci			break;
8988c2ecf20Sopenharmony_ci	}
8998c2ecf20Sopenharmony_ci
9008c2ecf20Sopenharmony_ci	if (ip == validation_state.max_ip) {
9018c2ecf20Sopenharmony_ci		DRM_DEBUG("shader failed to terminate before "
9028c2ecf20Sopenharmony_ci			  "shader BO end at %zd\n",
9038c2ecf20Sopenharmony_ci			  shader_obj->base.size);
9048c2ecf20Sopenharmony_ci		goto fail;
9058c2ecf20Sopenharmony_ci	}
9068c2ecf20Sopenharmony_ci
9078c2ecf20Sopenharmony_ci	/* Might corrupt other thread */
9088c2ecf20Sopenharmony_ci	if (validated_shader->is_threaded &&
9098c2ecf20Sopenharmony_ci	    validation_state.all_registers_used) {
9108c2ecf20Sopenharmony_ci		DRM_DEBUG("Shader uses threading, but uses the upper "
9118c2ecf20Sopenharmony_ci			  "half of the registers, too\n");
9128c2ecf20Sopenharmony_ci		goto fail;
9138c2ecf20Sopenharmony_ci	}
9148c2ecf20Sopenharmony_ci
9158c2ecf20Sopenharmony_ci	/* If we did a backwards branch and we haven't emitted a uniforms
9168c2ecf20Sopenharmony_ci	 * reset since then, we still need the uniforms stream to have the
9178c2ecf20Sopenharmony_ci	 * uniforms address available so that the backwards branch can do its
9188c2ecf20Sopenharmony_ci	 * uniforms reset.
9198c2ecf20Sopenharmony_ci	 *
9208c2ecf20Sopenharmony_ci	 * We could potentially prove that the backwards branch doesn't
9218c2ecf20Sopenharmony_ci	 * contain any uses of uniforms until program exit, but that doesn't
9228c2ecf20Sopenharmony_ci	 * seem to be worth the trouble.
9238c2ecf20Sopenharmony_ci	 */
9248c2ecf20Sopenharmony_ci	if (validation_state.needs_uniform_address_for_loop) {
9258c2ecf20Sopenharmony_ci		if (!require_uniform_address_uniform(validated_shader))
9268c2ecf20Sopenharmony_ci			goto fail;
9278c2ecf20Sopenharmony_ci		validated_shader->uniforms_size += 4;
9288c2ecf20Sopenharmony_ci	}
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_ci	/* Again, no chance of integer overflow here because the worst case
9318c2ecf20Sopenharmony_ci	 * scenario is 8 bytes of uniforms plus handles per 8-byte
9328c2ecf20Sopenharmony_ci	 * instruction.
9338c2ecf20Sopenharmony_ci	 */
9348c2ecf20Sopenharmony_ci	validated_shader->uniforms_src_size =
9358c2ecf20Sopenharmony_ci		(validated_shader->uniforms_size +
9368c2ecf20Sopenharmony_ci		 4 * validated_shader->num_texture_samples);
9378c2ecf20Sopenharmony_ci
9388c2ecf20Sopenharmony_ci	kfree(validation_state.branch_targets);
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_ci	return validated_shader;
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_cifail:
9438c2ecf20Sopenharmony_ci	kfree(validation_state.branch_targets);
9448c2ecf20Sopenharmony_ci	if (validated_shader) {
9458c2ecf20Sopenharmony_ci		kfree(validated_shader->uniform_addr_offsets);
9468c2ecf20Sopenharmony_ci		kfree(validated_shader->texture_samples);
9478c2ecf20Sopenharmony_ci		kfree(validated_shader);
9488c2ecf20Sopenharmony_ci	}
9498c2ecf20Sopenharmony_ci	return NULL;
9508c2ecf20Sopenharmony_ci}
951