162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright © 2014 Broadcom 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation 762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * The above copyright notice and this permission notice (including the next 1262306a36Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 1362306a36Sopenharmony_ci * Software. 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1662306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1762306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1862306a36Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1962306a36Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2062306a36Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2162306a36Sopenharmony_ci * IN THE SOFTWARE. 2262306a36Sopenharmony_ci */ 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci/** 2562306a36Sopenharmony_ci * DOC: Shader validator for VC4. 2662306a36Sopenharmony_ci * 2762306a36Sopenharmony_ci * Since the VC4 has no IOMMU between it and system memory, a user 2862306a36Sopenharmony_ci * with access to execute shaders could escalate privilege by 2962306a36Sopenharmony_ci * overwriting system memory (using the VPM write address register in 3062306a36Sopenharmony_ci * the general-purpose DMA mode) or reading system memory it shouldn't 3162306a36Sopenharmony_ci * (reading it as a texture, uniform data, or direct-addressed TMU 3262306a36Sopenharmony_ci * lookup). 3362306a36Sopenharmony_ci * 3462306a36Sopenharmony_ci * The shader validator walks over a shader's BO, ensuring that its 3562306a36Sopenharmony_ci * accesses are appropriately bounded, and recording where texture 3662306a36Sopenharmony_ci * accesses are made so that we can do relocations for them in the 3762306a36Sopenharmony_ci * uniform stream. 3862306a36Sopenharmony_ci * 3962306a36Sopenharmony_ci * Shader BO are immutable for their lifetimes (enforced by not 4062306a36Sopenharmony_ci * allowing mmaps, GEM prime export, or rendering to from a CL), so 4162306a36Sopenharmony_ci * this validation is only performed at BO creation time. 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci#include "vc4_drv.h" 4562306a36Sopenharmony_ci#include "vc4_qpu_defines.h" 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci#define LIVE_REG_COUNT (32 + 32 + 4) 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_cistruct vc4_shader_validation_state { 5062306a36Sopenharmony_ci /* Current IP being validated. */ 5162306a36Sopenharmony_ci uint32_t ip; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci /* IP at the end of the BO, do not read shader[max_ip] */ 5462306a36Sopenharmony_ci uint32_t max_ip; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci uint64_t *shader; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci struct vc4_texture_sample_info tmu_setup[2]; 5962306a36Sopenharmony_ci int tmu_write_count[2]; 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci /* For registers that were last written to by a MIN instruction with 6262306a36Sopenharmony_ci * one argument being a uniform, the address of the uniform. 6362306a36Sopenharmony_ci * Otherwise, ~0. 6462306a36Sopenharmony_ci * 6562306a36Sopenharmony_ci * This is used for the validation of direct address memory reads. 6662306a36Sopenharmony_ci */ 6762306a36Sopenharmony_ci uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; 6862306a36Sopenharmony_ci bool live_max_clamp_regs[LIVE_REG_COUNT]; 6962306a36Sopenharmony_ci uint32_t live_immediates[LIVE_REG_COUNT]; 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci /* Bitfield of which IPs are used as branch targets. 7262306a36Sopenharmony_ci * 7362306a36Sopenharmony_ci * Used for validation that the uniform stream is updated at the right 7462306a36Sopenharmony_ci * points and clearing the texturing/clamping state. 7562306a36Sopenharmony_ci */ 7662306a36Sopenharmony_ci unsigned long *branch_targets; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci /* Set when entering a basic block, and cleared when the uniform 7962306a36Sopenharmony_ci * address update is found. This is used to make sure that we don't 8062306a36Sopenharmony_ci * read uniforms when the address is undefined. 8162306a36Sopenharmony_ci */ 8262306a36Sopenharmony_ci bool needs_uniform_address_update; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci /* Set when we find a backwards branch. If the branch is backwards, 8562306a36Sopenharmony_ci * the taraget is probably doing an address reset to read uniforms, 8662306a36Sopenharmony_ci * and so we need to be sure that a uniforms address is present in the 8762306a36Sopenharmony_ci * stream, even if the shader didn't need to read uniforms in later 8862306a36Sopenharmony_ci * basic blocks. 8962306a36Sopenharmony_ci */ 9062306a36Sopenharmony_ci bool needs_uniform_address_for_loop; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci /* Set when we find an instruction writing the top half of the 9362306a36Sopenharmony_ci * register files. If we allowed writing the unusable regs in 9462306a36Sopenharmony_ci * a threaded shader, then the other shader running on our 9562306a36Sopenharmony_ci * QPU's clamp validation would be invalid. 9662306a36Sopenharmony_ci */ 9762306a36Sopenharmony_ci bool all_registers_used; 9862306a36Sopenharmony_ci}; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistatic uint32_t 10162306a36Sopenharmony_ciwaddr_to_live_reg_index(uint32_t waddr, bool is_b) 10262306a36Sopenharmony_ci{ 10362306a36Sopenharmony_ci if (waddr < 32) { 10462306a36Sopenharmony_ci if (is_b) 10562306a36Sopenharmony_ci return 32 + waddr; 10662306a36Sopenharmony_ci else 10762306a36Sopenharmony_ci return waddr; 10862306a36Sopenharmony_ci } else if (waddr <= QPU_W_ACC3) { 10962306a36Sopenharmony_ci return 64 + waddr - QPU_W_ACC0; 11062306a36Sopenharmony_ci } else { 11162306a36Sopenharmony_ci return ~0; 11262306a36Sopenharmony_ci } 11362306a36Sopenharmony_ci} 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_cistatic uint32_t 11662306a36Sopenharmony_ciraddr_add_a_to_live_reg_index(uint64_t inst) 11762306a36Sopenharmony_ci{ 11862306a36Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 11962306a36Sopenharmony_ci uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 12062306a36Sopenharmony_ci uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 12162306a36Sopenharmony_ci uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci if (add_a == QPU_MUX_A) 12462306a36Sopenharmony_ci return raddr_a; 12562306a36Sopenharmony_ci else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) 12662306a36Sopenharmony_ci return 32 + raddr_b; 12762306a36Sopenharmony_ci else if (add_a <= QPU_MUX_R3) 12862306a36Sopenharmony_ci return 64 + add_a; 12962306a36Sopenharmony_ci else 13062306a36Sopenharmony_ci return ~0; 13162306a36Sopenharmony_ci} 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_cistatic bool 13462306a36Sopenharmony_cilive_reg_is_upper_half(uint32_t lri) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci return (lri >= 16 && lri < 32) || 13762306a36Sopenharmony_ci (lri >= 32 + 16 && lri < 32 + 32); 13862306a36Sopenharmony_ci} 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_cistatic bool 14162306a36Sopenharmony_ciis_tmu_submit(uint32_t waddr) 14262306a36Sopenharmony_ci{ 14362306a36Sopenharmony_ci return (waddr == QPU_W_TMU0_S || 14462306a36Sopenharmony_ci waddr == QPU_W_TMU1_S); 14562306a36Sopenharmony_ci} 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_cistatic bool 14862306a36Sopenharmony_ciis_tmu_write(uint32_t waddr) 14962306a36Sopenharmony_ci{ 15062306a36Sopenharmony_ci return (waddr >= QPU_W_TMU0_S && 15162306a36Sopenharmony_ci waddr <= QPU_W_TMU1_B); 15262306a36Sopenharmony_ci} 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_cistatic bool 15562306a36Sopenharmony_cirecord_texture_sample(struct vc4_validated_shader_info *validated_shader, 15662306a36Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 15762306a36Sopenharmony_ci int tmu) 15862306a36Sopenharmony_ci{ 15962306a36Sopenharmony_ci uint32_t s = validated_shader->num_texture_samples; 16062306a36Sopenharmony_ci int i; 16162306a36Sopenharmony_ci struct vc4_texture_sample_info *temp_samples; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci temp_samples = krealloc(validated_shader->texture_samples, 16462306a36Sopenharmony_ci (s + 1) * sizeof(*temp_samples), 16562306a36Sopenharmony_ci GFP_KERNEL); 16662306a36Sopenharmony_ci if (!temp_samples) 16762306a36Sopenharmony_ci return false; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci memcpy(&temp_samples[s], 17062306a36Sopenharmony_ci &validation_state->tmu_setup[tmu], 17162306a36Sopenharmony_ci sizeof(*temp_samples)); 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci validated_shader->num_texture_samples = s + 1; 17462306a36Sopenharmony_ci validated_shader->texture_samples = temp_samples; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci for (i = 0; i < 4; i++) 17762306a36Sopenharmony_ci validation_state->tmu_setup[tmu].p_offset[i] = ~0; 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci return true; 18062306a36Sopenharmony_ci} 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_cistatic bool 18362306a36Sopenharmony_cicheck_tmu_write(struct vc4_validated_shader_info *validated_shader, 18462306a36Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 18562306a36Sopenharmony_ci bool is_mul) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 18862306a36Sopenharmony_ci uint32_t waddr = (is_mul ? 18962306a36Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 19062306a36Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 19162306a36Sopenharmony_ci uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 19262306a36Sopenharmony_ci uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 19362306a36Sopenharmony_ci int tmu = waddr > QPU_W_TMU0_B; 19462306a36Sopenharmony_ci bool submit = is_tmu_submit(waddr); 19562306a36Sopenharmony_ci bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; 19662306a36Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci if (is_direct) { 19962306a36Sopenharmony_ci uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 20062306a36Sopenharmony_ci uint32_t clamp_reg, clamp_offset; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci if (sig == QPU_SIG_SMALL_IMM) { 20362306a36Sopenharmony_ci DRM_DEBUG("direct TMU read used small immediate\n"); 20462306a36Sopenharmony_ci return false; 20562306a36Sopenharmony_ci } 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_ci /* Make sure that this texture load is an add of the base 20862306a36Sopenharmony_ci * address of the UBO to a clamped offset within the UBO. 20962306a36Sopenharmony_ci */ 21062306a36Sopenharmony_ci if (is_mul || 21162306a36Sopenharmony_ci QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 21262306a36Sopenharmony_ci DRM_DEBUG("direct TMU load wasn't an add\n"); 21362306a36Sopenharmony_ci return false; 21462306a36Sopenharmony_ci } 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci /* We assert that the clamped address is the first 21762306a36Sopenharmony_ci * argument, and the UBO base address is the second argument. 21862306a36Sopenharmony_ci * This is arbitrary, but simpler than supporting flipping the 21962306a36Sopenharmony_ci * two either way. 22062306a36Sopenharmony_ci */ 22162306a36Sopenharmony_ci clamp_reg = raddr_add_a_to_live_reg_index(inst); 22262306a36Sopenharmony_ci if (clamp_reg == ~0) { 22362306a36Sopenharmony_ci DRM_DEBUG("direct TMU load wasn't clamped\n"); 22462306a36Sopenharmony_ci return false; 22562306a36Sopenharmony_ci } 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; 22862306a36Sopenharmony_ci if (clamp_offset == ~0) { 22962306a36Sopenharmony_ci DRM_DEBUG("direct TMU load wasn't clamped\n"); 23062306a36Sopenharmony_ci return false; 23162306a36Sopenharmony_ci } 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci /* Store the clamp value's offset in p1 (see reloc_tex() in 23462306a36Sopenharmony_ci * vc4_validate.c). 23562306a36Sopenharmony_ci */ 23662306a36Sopenharmony_ci validation_state->tmu_setup[tmu].p_offset[1] = 23762306a36Sopenharmony_ci clamp_offset; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 24062306a36Sopenharmony_ci !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 24162306a36Sopenharmony_ci DRM_DEBUG("direct TMU load didn't add to a uniform\n"); 24262306a36Sopenharmony_ci return false; 24362306a36Sopenharmony_ci } 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci validation_state->tmu_setup[tmu].is_direct = true; 24662306a36Sopenharmony_ci } else { 24762306a36Sopenharmony_ci if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && 24862306a36Sopenharmony_ci raddr_b == QPU_R_UNIF)) { 24962306a36Sopenharmony_ci DRM_DEBUG("uniform read in the same instruction as " 25062306a36Sopenharmony_ci "texture setup.\n"); 25162306a36Sopenharmony_ci return false; 25262306a36Sopenharmony_ci } 25362306a36Sopenharmony_ci } 25462306a36Sopenharmony_ci 25562306a36Sopenharmony_ci if (validation_state->tmu_write_count[tmu] >= 4) { 25662306a36Sopenharmony_ci DRM_DEBUG("TMU%d got too many parameters before dispatch\n", 25762306a36Sopenharmony_ci tmu); 25862306a36Sopenharmony_ci return false; 25962306a36Sopenharmony_ci } 26062306a36Sopenharmony_ci validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = 26162306a36Sopenharmony_ci validated_shader->uniforms_size; 26262306a36Sopenharmony_ci validation_state->tmu_write_count[tmu]++; 26362306a36Sopenharmony_ci /* Since direct uses a RADDR uniform reference, it will get counted in 26462306a36Sopenharmony_ci * check_instruction_reads() 26562306a36Sopenharmony_ci */ 26662306a36Sopenharmony_ci if (!is_direct) { 26762306a36Sopenharmony_ci if (validation_state->needs_uniform_address_update) { 26862306a36Sopenharmony_ci DRM_DEBUG("Texturing with undefined uniform address\n"); 26962306a36Sopenharmony_ci return false; 27062306a36Sopenharmony_ci } 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci validated_shader->uniforms_size += 4; 27362306a36Sopenharmony_ci } 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci if (submit) { 27662306a36Sopenharmony_ci if (!record_texture_sample(validated_shader, 27762306a36Sopenharmony_ci validation_state, tmu)) { 27862306a36Sopenharmony_ci return false; 27962306a36Sopenharmony_ci } 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_ci validation_state->tmu_write_count[tmu] = 0; 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci return true; 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_cistatic bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci uint32_t o = validated_shader->num_uniform_addr_offsets; 29062306a36Sopenharmony_ci uint32_t num_uniforms = validated_shader->uniforms_size / 4; 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci validated_shader->uniform_addr_offsets = 29362306a36Sopenharmony_ci krealloc(validated_shader->uniform_addr_offsets, 29462306a36Sopenharmony_ci (o + 1) * 29562306a36Sopenharmony_ci sizeof(*validated_shader->uniform_addr_offsets), 29662306a36Sopenharmony_ci GFP_KERNEL); 29762306a36Sopenharmony_ci if (!validated_shader->uniform_addr_offsets) 29862306a36Sopenharmony_ci return false; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci validated_shader->uniform_addr_offsets[o] = num_uniforms; 30162306a36Sopenharmony_ci validated_shader->num_uniform_addr_offsets++; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci return true; 30462306a36Sopenharmony_ci} 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_cistatic bool 30762306a36Sopenharmony_civalidate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, 30862306a36Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 30962306a36Sopenharmony_ci bool is_mul) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 31262306a36Sopenharmony_ci u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 31362306a36Sopenharmony_ci u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 31462306a36Sopenharmony_ci u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 31562306a36Sopenharmony_ci u32 add_lri = raddr_add_a_to_live_reg_index(inst); 31662306a36Sopenharmony_ci /* We want our reset to be pointing at whatever uniform follows the 31762306a36Sopenharmony_ci * uniforms base address. 31862306a36Sopenharmony_ci */ 31962306a36Sopenharmony_ci u32 expected_offset = validated_shader->uniforms_size + 4; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci /* We only support absolute uniform address changes, and we 32262306a36Sopenharmony_ci * require that they be in the current basic block before any 32362306a36Sopenharmony_ci * of its uniform reads. 32462306a36Sopenharmony_ci * 32562306a36Sopenharmony_ci * One could potentially emit more efficient QPU code, by 32662306a36Sopenharmony_ci * noticing that (say) an if statement does uniform control 32762306a36Sopenharmony_ci * flow for all threads and that the if reads the same number 32862306a36Sopenharmony_ci * of uniforms on each side. However, this scheme is easy to 32962306a36Sopenharmony_ci * validate so it's all we allow for now. 33062306a36Sopenharmony_ci */ 33162306a36Sopenharmony_ci switch (QPU_GET_FIELD(inst, QPU_SIG)) { 33262306a36Sopenharmony_ci case QPU_SIG_NONE: 33362306a36Sopenharmony_ci case QPU_SIG_SCOREBOARD_UNLOCK: 33462306a36Sopenharmony_ci case QPU_SIG_COLOR_LOAD: 33562306a36Sopenharmony_ci case QPU_SIG_LOAD_TMU0: 33662306a36Sopenharmony_ci case QPU_SIG_LOAD_TMU1: 33762306a36Sopenharmony_ci break; 33862306a36Sopenharmony_ci default: 33962306a36Sopenharmony_ci DRM_DEBUG("uniforms address change must be " 34062306a36Sopenharmony_ci "normal math\n"); 34162306a36Sopenharmony_ci return false; 34262306a36Sopenharmony_ci } 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 34562306a36Sopenharmony_ci DRM_DEBUG("Uniform address reset must be an ADD.\n"); 34662306a36Sopenharmony_ci return false; 34762306a36Sopenharmony_ci } 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { 35062306a36Sopenharmony_ci DRM_DEBUG("Uniform address reset must be unconditional.\n"); 35162306a36Sopenharmony_ci return false; 35262306a36Sopenharmony_ci } 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && 35562306a36Sopenharmony_ci !(inst & QPU_PM)) { 35662306a36Sopenharmony_ci DRM_DEBUG("No packing allowed on uniforms reset\n"); 35762306a36Sopenharmony_ci return false; 35862306a36Sopenharmony_ci } 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci if (add_lri == -1) { 36162306a36Sopenharmony_ci DRM_DEBUG("First argument of uniform address write must be " 36262306a36Sopenharmony_ci "an immediate value.\n"); 36362306a36Sopenharmony_ci return false; 36462306a36Sopenharmony_ci } 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci if (validation_state->live_immediates[add_lri] != expected_offset) { 36762306a36Sopenharmony_ci DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n", 36862306a36Sopenharmony_ci validation_state->live_immediates[add_lri], 36962306a36Sopenharmony_ci expected_offset); 37062306a36Sopenharmony_ci return false; 37162306a36Sopenharmony_ci } 37262306a36Sopenharmony_ci 37362306a36Sopenharmony_ci if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 37462306a36Sopenharmony_ci !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 37562306a36Sopenharmony_ci DRM_DEBUG("Second argument of uniform address write must be " 37662306a36Sopenharmony_ci "a uniform.\n"); 37762306a36Sopenharmony_ci return false; 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci 38062306a36Sopenharmony_ci validation_state->needs_uniform_address_update = false; 38162306a36Sopenharmony_ci validation_state->needs_uniform_address_for_loop = false; 38262306a36Sopenharmony_ci return require_uniform_address_uniform(validated_shader); 38362306a36Sopenharmony_ci} 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_cistatic bool 38662306a36Sopenharmony_cicheck_reg_write(struct vc4_validated_shader_info *validated_shader, 38762306a36Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 38862306a36Sopenharmony_ci bool is_mul) 38962306a36Sopenharmony_ci{ 39062306a36Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 39162306a36Sopenharmony_ci uint32_t waddr = (is_mul ? 39262306a36Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 39362306a36Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 39462306a36Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 39562306a36Sopenharmony_ci bool ws = inst & QPU_WS; 39662306a36Sopenharmony_ci bool is_b = is_mul ^ ws; 39762306a36Sopenharmony_ci u32 lri = waddr_to_live_reg_index(waddr, is_b); 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci if (lri != -1) { 40062306a36Sopenharmony_ci uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 40162306a36Sopenharmony_ci uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci if (sig == QPU_SIG_LOAD_IMM && 40462306a36Sopenharmony_ci QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && 40562306a36Sopenharmony_ci ((is_mul && cond_mul == QPU_COND_ALWAYS) || 40662306a36Sopenharmony_ci (!is_mul && cond_add == QPU_COND_ALWAYS))) { 40762306a36Sopenharmony_ci validation_state->live_immediates[lri] = 40862306a36Sopenharmony_ci QPU_GET_FIELD(inst, QPU_LOAD_IMM); 40962306a36Sopenharmony_ci } else { 41062306a36Sopenharmony_ci validation_state->live_immediates[lri] = ~0; 41162306a36Sopenharmony_ci } 41262306a36Sopenharmony_ci 41362306a36Sopenharmony_ci if (live_reg_is_upper_half(lri)) 41462306a36Sopenharmony_ci validation_state->all_registers_used = true; 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci switch (waddr) { 41862306a36Sopenharmony_ci case QPU_W_UNIFORMS_ADDRESS: 41962306a36Sopenharmony_ci if (is_b) { 42062306a36Sopenharmony_ci DRM_DEBUG("relative uniforms address change " 42162306a36Sopenharmony_ci "unsupported\n"); 42262306a36Sopenharmony_ci return false; 42362306a36Sopenharmony_ci } 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci return validate_uniform_address_write(validated_shader, 42662306a36Sopenharmony_ci validation_state, 42762306a36Sopenharmony_ci is_mul); 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci case QPU_W_TLB_COLOR_MS: 43062306a36Sopenharmony_ci case QPU_W_TLB_COLOR_ALL: 43162306a36Sopenharmony_ci case QPU_W_TLB_Z: 43262306a36Sopenharmony_ci /* These only interact with the tile buffer, not main memory, 43362306a36Sopenharmony_ci * so they're safe. 43462306a36Sopenharmony_ci */ 43562306a36Sopenharmony_ci return true; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci case QPU_W_TMU0_S: 43862306a36Sopenharmony_ci case QPU_W_TMU0_T: 43962306a36Sopenharmony_ci case QPU_W_TMU0_R: 44062306a36Sopenharmony_ci case QPU_W_TMU0_B: 44162306a36Sopenharmony_ci case QPU_W_TMU1_S: 44262306a36Sopenharmony_ci case QPU_W_TMU1_T: 44362306a36Sopenharmony_ci case QPU_W_TMU1_R: 44462306a36Sopenharmony_ci case QPU_W_TMU1_B: 44562306a36Sopenharmony_ci return check_tmu_write(validated_shader, validation_state, 44662306a36Sopenharmony_ci is_mul); 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_ci case QPU_W_HOST_INT: 44962306a36Sopenharmony_ci case QPU_W_TMU_NOSWAP: 45062306a36Sopenharmony_ci case QPU_W_TLB_ALPHA_MASK: 45162306a36Sopenharmony_ci case QPU_W_MUTEX_RELEASE: 45262306a36Sopenharmony_ci /* XXX: I haven't thought about these, so don't support them 45362306a36Sopenharmony_ci * for now. 45462306a36Sopenharmony_ci */ 45562306a36Sopenharmony_ci DRM_DEBUG("Unsupported waddr %d\n", waddr); 45662306a36Sopenharmony_ci return false; 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci case QPU_W_VPM_ADDR: 45962306a36Sopenharmony_ci DRM_DEBUG("General VPM DMA unsupported\n"); 46062306a36Sopenharmony_ci return false; 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci case QPU_W_VPM: 46362306a36Sopenharmony_ci case QPU_W_VPMVCD_SETUP: 46462306a36Sopenharmony_ci /* We allow VPM setup in general, even including VPM DMA 46562306a36Sopenharmony_ci * configuration setup, because the (unsafe) DMA can only be 46662306a36Sopenharmony_ci * triggered by QPU_W_VPM_ADDR writes. 46762306a36Sopenharmony_ci */ 46862306a36Sopenharmony_ci return true; 46962306a36Sopenharmony_ci 47062306a36Sopenharmony_ci case QPU_W_TLB_STENCIL_SETUP: 47162306a36Sopenharmony_ci return true; 47262306a36Sopenharmony_ci } 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci return true; 47562306a36Sopenharmony_ci} 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_cistatic void 47862306a36Sopenharmony_citrack_live_clamps(struct vc4_validated_shader_info *validated_shader, 47962306a36Sopenharmony_ci struct vc4_shader_validation_state *validation_state) 48062306a36Sopenharmony_ci{ 48162306a36Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 48262306a36Sopenharmony_ci uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 48362306a36Sopenharmony_ci uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 48462306a36Sopenharmony_ci uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 48562306a36Sopenharmony_ci uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 48662306a36Sopenharmony_ci uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 48762306a36Sopenharmony_ci uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 48862306a36Sopenharmony_ci uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 48962306a36Sopenharmony_ci uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 49062306a36Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 49162306a36Sopenharmony_ci bool ws = inst & QPU_WS; 49262306a36Sopenharmony_ci uint32_t lri_add_a, lri_add, lri_mul; 49362306a36Sopenharmony_ci bool add_a_is_min_0; 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), 49662306a36Sopenharmony_ci * before we clear previous live state. 49762306a36Sopenharmony_ci */ 49862306a36Sopenharmony_ci lri_add_a = raddr_add_a_to_live_reg_index(inst); 49962306a36Sopenharmony_ci add_a_is_min_0 = (lri_add_a != ~0 && 50062306a36Sopenharmony_ci validation_state->live_max_clamp_regs[lri_add_a]); 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci /* Clear live state for registers written by our instruction. */ 50362306a36Sopenharmony_ci lri_add = waddr_to_live_reg_index(waddr_add, ws); 50462306a36Sopenharmony_ci lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); 50562306a36Sopenharmony_ci if (lri_mul != ~0) { 50662306a36Sopenharmony_ci validation_state->live_max_clamp_regs[lri_mul] = false; 50762306a36Sopenharmony_ci validation_state->live_min_clamp_offsets[lri_mul] = ~0; 50862306a36Sopenharmony_ci } 50962306a36Sopenharmony_ci if (lri_add != ~0) { 51062306a36Sopenharmony_ci validation_state->live_max_clamp_regs[lri_add] = false; 51162306a36Sopenharmony_ci validation_state->live_min_clamp_offsets[lri_add] = ~0; 51262306a36Sopenharmony_ci } else { 51362306a36Sopenharmony_ci /* Nothing further to do for live tracking, since only ADDs 51462306a36Sopenharmony_ci * generate new live clamp registers. 51562306a36Sopenharmony_ci */ 51662306a36Sopenharmony_ci return; 51762306a36Sopenharmony_ci } 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_ci /* Now, handle remaining live clamp tracking for the ADD operation. */ 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci if (cond_add != QPU_COND_ALWAYS) 52262306a36Sopenharmony_ci return; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci if (op_add == QPU_A_MAX) { 52562306a36Sopenharmony_ci /* Track live clamps of a value to a minimum of 0 (in either 52662306a36Sopenharmony_ci * arg). 52762306a36Sopenharmony_ci */ 52862306a36Sopenharmony_ci if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || 52962306a36Sopenharmony_ci (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { 53062306a36Sopenharmony_ci return; 53162306a36Sopenharmony_ci } 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_ci validation_state->live_max_clamp_regs[lri_add] = true; 53462306a36Sopenharmony_ci } else if (op_add == QPU_A_MIN) { 53562306a36Sopenharmony_ci /* Track live clamps of a value clamped to a minimum of 0 and 53662306a36Sopenharmony_ci * a maximum of some uniform's offset. 53762306a36Sopenharmony_ci */ 53862306a36Sopenharmony_ci if (!add_a_is_min_0) 53962306a36Sopenharmony_ci return; 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 54262306a36Sopenharmony_ci !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && 54362306a36Sopenharmony_ci sig != QPU_SIG_SMALL_IMM)) { 54462306a36Sopenharmony_ci return; 54562306a36Sopenharmony_ci } 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci validation_state->live_min_clamp_offsets[lri_add] = 54862306a36Sopenharmony_ci validated_shader->uniforms_size; 54962306a36Sopenharmony_ci } 55062306a36Sopenharmony_ci} 55162306a36Sopenharmony_ci 55262306a36Sopenharmony_cistatic bool 55362306a36Sopenharmony_cicheck_instruction_writes(struct vc4_validated_shader_info *validated_shader, 55462306a36Sopenharmony_ci struct vc4_shader_validation_state *validation_state) 55562306a36Sopenharmony_ci{ 55662306a36Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 55762306a36Sopenharmony_ci uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 55862306a36Sopenharmony_ci uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 55962306a36Sopenharmony_ci bool ok; 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { 56262306a36Sopenharmony_ci DRM_DEBUG("ADD and MUL both set up textures\n"); 56362306a36Sopenharmony_ci return false; 56462306a36Sopenharmony_ci } 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci ok = (check_reg_write(validated_shader, validation_state, false) && 56762306a36Sopenharmony_ci check_reg_write(validated_shader, validation_state, true)); 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci track_live_clamps(validated_shader, validation_state); 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci return ok; 57262306a36Sopenharmony_ci} 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_cistatic bool 57562306a36Sopenharmony_cicheck_branch(uint64_t inst, 57662306a36Sopenharmony_ci struct vc4_validated_shader_info *validated_shader, 57762306a36Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 57862306a36Sopenharmony_ci int ip) 57962306a36Sopenharmony_ci{ 58062306a36Sopenharmony_ci int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 58162306a36Sopenharmony_ci uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 58262306a36Sopenharmony_ci uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci if ((int)branch_imm < 0) 58562306a36Sopenharmony_ci validation_state->needs_uniform_address_for_loop = true; 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_ci /* We don't want to have to worry about validation of this, and 58862306a36Sopenharmony_ci * there's no need for it. 58962306a36Sopenharmony_ci */ 59062306a36Sopenharmony_ci if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { 59162306a36Sopenharmony_ci DRM_DEBUG("branch instruction at %d wrote a register.\n", 59262306a36Sopenharmony_ci validation_state->ip); 59362306a36Sopenharmony_ci return false; 59462306a36Sopenharmony_ci } 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci return true; 59762306a36Sopenharmony_ci} 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_cistatic bool 60062306a36Sopenharmony_cicheck_instruction_reads(struct vc4_validated_shader_info *validated_shader, 60162306a36Sopenharmony_ci struct vc4_shader_validation_state *validation_state) 60262306a36Sopenharmony_ci{ 60362306a36Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 60462306a36Sopenharmony_ci uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 60562306a36Sopenharmony_ci uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 60662306a36Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 60762306a36Sopenharmony_ci 60862306a36Sopenharmony_ci if (raddr_a == QPU_R_UNIF || 60962306a36Sopenharmony_ci (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { 61062306a36Sopenharmony_ci /* This can't overflow the uint32_t, because we're reading 8 61162306a36Sopenharmony_ci * bytes of instruction to increment by 4 here, so we'd 61262306a36Sopenharmony_ci * already be OOM. 61362306a36Sopenharmony_ci */ 61462306a36Sopenharmony_ci validated_shader->uniforms_size += 4; 61562306a36Sopenharmony_ci 61662306a36Sopenharmony_ci if (validation_state->needs_uniform_address_update) { 61762306a36Sopenharmony_ci DRM_DEBUG("Uniform read with undefined uniform " 61862306a36Sopenharmony_ci "address\n"); 61962306a36Sopenharmony_ci return false; 62062306a36Sopenharmony_ci } 62162306a36Sopenharmony_ci } 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci if ((raddr_a >= 16 && raddr_a < 32) || 62462306a36Sopenharmony_ci (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) { 62562306a36Sopenharmony_ci validation_state->all_registers_used = true; 62662306a36Sopenharmony_ci } 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci return true; 62962306a36Sopenharmony_ci} 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci/* Make sure that all branches are absolute and point within the shader, and 63262306a36Sopenharmony_ci * note their targets for later. 63362306a36Sopenharmony_ci */ 63462306a36Sopenharmony_cistatic bool 63562306a36Sopenharmony_civc4_validate_branches(struct vc4_shader_validation_state *validation_state) 63662306a36Sopenharmony_ci{ 63762306a36Sopenharmony_ci uint32_t max_branch_target = 0; 63862306a36Sopenharmony_ci int ip; 63962306a36Sopenharmony_ci int last_branch = -2; 64062306a36Sopenharmony_ci 64162306a36Sopenharmony_ci for (ip = 0; ip < validation_state->max_ip; ip++) { 64262306a36Sopenharmony_ci uint64_t inst = validation_state->shader[ip]; 64362306a36Sopenharmony_ci int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 64462306a36Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 64562306a36Sopenharmony_ci uint32_t after_delay_ip = ip + 4; 64662306a36Sopenharmony_ci uint32_t branch_target_ip; 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci if (sig == QPU_SIG_PROG_END) { 64962306a36Sopenharmony_ci /* There are two delay slots after program end is 65062306a36Sopenharmony_ci * signaled that are still executed, then we're 65162306a36Sopenharmony_ci * finished. validation_state->max_ip is the 65262306a36Sopenharmony_ci * instruction after the last valid instruction in the 65362306a36Sopenharmony_ci * program. 65462306a36Sopenharmony_ci */ 65562306a36Sopenharmony_ci validation_state->max_ip = ip + 3; 65662306a36Sopenharmony_ci continue; 65762306a36Sopenharmony_ci } 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci if (sig != QPU_SIG_BRANCH) 66062306a36Sopenharmony_ci continue; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci if (ip - last_branch < 4) { 66362306a36Sopenharmony_ci DRM_DEBUG("Branch at %d during delay slots\n", ip); 66462306a36Sopenharmony_ci return false; 66562306a36Sopenharmony_ci } 66662306a36Sopenharmony_ci last_branch = ip; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci if (inst & QPU_BRANCH_REG) { 66962306a36Sopenharmony_ci DRM_DEBUG("branching from register relative " 67062306a36Sopenharmony_ci "not supported\n"); 67162306a36Sopenharmony_ci return false; 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci if (!(inst & QPU_BRANCH_REL)) { 67562306a36Sopenharmony_ci DRM_DEBUG("relative branching required\n"); 67662306a36Sopenharmony_ci return false; 67762306a36Sopenharmony_ci } 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci /* The actual branch target is the instruction after the delay 68062306a36Sopenharmony_ci * slots, plus whatever byte offset is in the low 32 bits of 68162306a36Sopenharmony_ci * the instruction. Make sure we're not branching beyond the 68262306a36Sopenharmony_ci * end of the shader object. 68362306a36Sopenharmony_ci */ 68462306a36Sopenharmony_ci if (branch_imm % sizeof(inst) != 0) { 68562306a36Sopenharmony_ci DRM_DEBUG("branch target not aligned\n"); 68662306a36Sopenharmony_ci return false; 68762306a36Sopenharmony_ci } 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci branch_target_ip = after_delay_ip + (branch_imm >> 3); 69062306a36Sopenharmony_ci if (branch_target_ip >= validation_state->max_ip) { 69162306a36Sopenharmony_ci DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n", 69262306a36Sopenharmony_ci ip, branch_target_ip, 69362306a36Sopenharmony_ci validation_state->max_ip); 69462306a36Sopenharmony_ci return false; 69562306a36Sopenharmony_ci } 69662306a36Sopenharmony_ci set_bit(branch_target_ip, validation_state->branch_targets); 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci /* Make sure that the non-branching path is also not outside 69962306a36Sopenharmony_ci * the shader. 70062306a36Sopenharmony_ci */ 70162306a36Sopenharmony_ci if (after_delay_ip >= validation_state->max_ip) { 70262306a36Sopenharmony_ci DRM_DEBUG("Branch at %d continues past shader end " 70362306a36Sopenharmony_ci "(%d/%d)\n", 70462306a36Sopenharmony_ci ip, after_delay_ip, validation_state->max_ip); 70562306a36Sopenharmony_ci return false; 70662306a36Sopenharmony_ci } 70762306a36Sopenharmony_ci set_bit(after_delay_ip, validation_state->branch_targets); 70862306a36Sopenharmony_ci max_branch_target = max(max_branch_target, after_delay_ip); 70962306a36Sopenharmony_ci } 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci if (max_branch_target > validation_state->max_ip - 3) { 71262306a36Sopenharmony_ci DRM_DEBUG("Branch landed after QPU_SIG_PROG_END"); 71362306a36Sopenharmony_ci return false; 71462306a36Sopenharmony_ci } 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci return true; 71762306a36Sopenharmony_ci} 71862306a36Sopenharmony_ci 71962306a36Sopenharmony_ci/* Resets any known state for the shader, used when we may be branched to from 72062306a36Sopenharmony_ci * multiple locations in the program (or at shader start). 72162306a36Sopenharmony_ci */ 72262306a36Sopenharmony_cistatic void 72362306a36Sopenharmony_cireset_validation_state(struct vc4_shader_validation_state *validation_state) 72462306a36Sopenharmony_ci{ 72562306a36Sopenharmony_ci int i; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci for (i = 0; i < 8; i++) 72862306a36Sopenharmony_ci validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci for (i = 0; i < LIVE_REG_COUNT; i++) { 73162306a36Sopenharmony_ci validation_state->live_min_clamp_offsets[i] = ~0; 73262306a36Sopenharmony_ci validation_state->live_max_clamp_regs[i] = false; 73362306a36Sopenharmony_ci validation_state->live_immediates[i] = ~0; 73462306a36Sopenharmony_ci } 73562306a36Sopenharmony_ci} 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_cistatic bool 73862306a36Sopenharmony_citexturing_in_progress(struct vc4_shader_validation_state *validation_state) 73962306a36Sopenharmony_ci{ 74062306a36Sopenharmony_ci return (validation_state->tmu_write_count[0] != 0 || 74162306a36Sopenharmony_ci validation_state->tmu_write_count[1] != 0); 74262306a36Sopenharmony_ci} 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_cistatic bool 74562306a36Sopenharmony_civc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) 74662306a36Sopenharmony_ci{ 74762306a36Sopenharmony_ci uint32_t ip = validation_state->ip; 74862306a36Sopenharmony_ci 74962306a36Sopenharmony_ci if (!test_bit(ip, validation_state->branch_targets)) 75062306a36Sopenharmony_ci return true; 75162306a36Sopenharmony_ci 75262306a36Sopenharmony_ci if (texturing_in_progress(validation_state)) { 75362306a36Sopenharmony_ci DRM_DEBUG("Branch target landed during TMU setup\n"); 75462306a36Sopenharmony_ci return false; 75562306a36Sopenharmony_ci } 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci /* Reset our live values tracking, since this instruction may have 75862306a36Sopenharmony_ci * multiple predecessors. 75962306a36Sopenharmony_ci * 76062306a36Sopenharmony_ci * One could potentially do analysis to determine that, for 76162306a36Sopenharmony_ci * example, all predecessors have a live max clamp in the same 76262306a36Sopenharmony_ci * register, but we don't bother with that. 76362306a36Sopenharmony_ci */ 76462306a36Sopenharmony_ci reset_validation_state(validation_state); 76562306a36Sopenharmony_ci 76662306a36Sopenharmony_ci /* Since we've entered a basic block from potentially multiple 76762306a36Sopenharmony_ci * predecessors, we need the uniforms address to be updated before any 76862306a36Sopenharmony_ci * unforms are read. We require that after any branch point, the next 76962306a36Sopenharmony_ci * uniform to be loaded is a uniform address offset. That uniform's 77062306a36Sopenharmony_ci * offset will be marked by the uniform address register write 77162306a36Sopenharmony_ci * validation, or a one-off the end-of-program check. 77262306a36Sopenharmony_ci */ 77362306a36Sopenharmony_ci validation_state->needs_uniform_address_update = true; 77462306a36Sopenharmony_ci 77562306a36Sopenharmony_ci return true; 77662306a36Sopenharmony_ci} 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_cistruct vc4_validated_shader_info * 77962306a36Sopenharmony_civc4_validate_shader(struct drm_gem_dma_object *shader_obj) 78062306a36Sopenharmony_ci{ 78162306a36Sopenharmony_ci struct vc4_dev *vc4 = to_vc4_dev(shader_obj->base.dev); 78262306a36Sopenharmony_ci bool found_shader_end = false; 78362306a36Sopenharmony_ci int shader_end_ip = 0; 78462306a36Sopenharmony_ci uint32_t last_thread_switch_ip = -3; 78562306a36Sopenharmony_ci uint32_t ip; 78662306a36Sopenharmony_ci struct vc4_validated_shader_info *validated_shader = NULL; 78762306a36Sopenharmony_ci struct vc4_shader_validation_state validation_state; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci if (WARN_ON_ONCE(vc4->is_vc5)) 79062306a36Sopenharmony_ci return NULL; 79162306a36Sopenharmony_ci 79262306a36Sopenharmony_ci memset(&validation_state, 0, sizeof(validation_state)); 79362306a36Sopenharmony_ci validation_state.shader = shader_obj->vaddr; 79462306a36Sopenharmony_ci validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); 79562306a36Sopenharmony_ci 79662306a36Sopenharmony_ci reset_validation_state(&validation_state); 79762306a36Sopenharmony_ci 79862306a36Sopenharmony_ci validation_state.branch_targets = 79962306a36Sopenharmony_ci kcalloc(BITS_TO_LONGS(validation_state.max_ip), 80062306a36Sopenharmony_ci sizeof(unsigned long), GFP_KERNEL); 80162306a36Sopenharmony_ci if (!validation_state.branch_targets) 80262306a36Sopenharmony_ci goto fail; 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); 80562306a36Sopenharmony_ci if (!validated_shader) 80662306a36Sopenharmony_ci goto fail; 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci if (!vc4_validate_branches(&validation_state)) 80962306a36Sopenharmony_ci goto fail; 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci for (ip = 0; ip < validation_state.max_ip; ip++) { 81262306a36Sopenharmony_ci uint64_t inst = validation_state.shader[ip]; 81362306a36Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci validation_state.ip = ip; 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci if (!vc4_handle_branch_target(&validation_state)) 81862306a36Sopenharmony_ci goto fail; 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_ci if (ip == last_thread_switch_ip + 3) { 82162306a36Sopenharmony_ci /* Reset r0-r3 live clamp data */ 82262306a36Sopenharmony_ci int i; 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci for (i = 64; i < LIVE_REG_COUNT; i++) { 82562306a36Sopenharmony_ci validation_state.live_min_clamp_offsets[i] = ~0; 82662306a36Sopenharmony_ci validation_state.live_max_clamp_regs[i] = false; 82762306a36Sopenharmony_ci validation_state.live_immediates[i] = ~0; 82862306a36Sopenharmony_ci } 82962306a36Sopenharmony_ci } 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci switch (sig) { 83262306a36Sopenharmony_ci case QPU_SIG_NONE: 83362306a36Sopenharmony_ci case QPU_SIG_WAIT_FOR_SCOREBOARD: 83462306a36Sopenharmony_ci case QPU_SIG_SCOREBOARD_UNLOCK: 83562306a36Sopenharmony_ci case QPU_SIG_COLOR_LOAD: 83662306a36Sopenharmony_ci case QPU_SIG_LOAD_TMU0: 83762306a36Sopenharmony_ci case QPU_SIG_LOAD_TMU1: 83862306a36Sopenharmony_ci case QPU_SIG_PROG_END: 83962306a36Sopenharmony_ci case QPU_SIG_SMALL_IMM: 84062306a36Sopenharmony_ci case QPU_SIG_THREAD_SWITCH: 84162306a36Sopenharmony_ci case QPU_SIG_LAST_THREAD_SWITCH: 84262306a36Sopenharmony_ci if (!check_instruction_writes(validated_shader, 84362306a36Sopenharmony_ci &validation_state)) { 84462306a36Sopenharmony_ci DRM_DEBUG("Bad write at ip %d\n", ip); 84562306a36Sopenharmony_ci goto fail; 84662306a36Sopenharmony_ci } 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci if (!check_instruction_reads(validated_shader, 84962306a36Sopenharmony_ci &validation_state)) 85062306a36Sopenharmony_ci goto fail; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci if (sig == QPU_SIG_PROG_END) { 85362306a36Sopenharmony_ci found_shader_end = true; 85462306a36Sopenharmony_ci shader_end_ip = ip; 85562306a36Sopenharmony_ci } 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci if (sig == QPU_SIG_THREAD_SWITCH || 85862306a36Sopenharmony_ci sig == QPU_SIG_LAST_THREAD_SWITCH) { 85962306a36Sopenharmony_ci validated_shader->is_threaded = true; 86062306a36Sopenharmony_ci 86162306a36Sopenharmony_ci if (ip < last_thread_switch_ip + 3) { 86262306a36Sopenharmony_ci DRM_DEBUG("Thread switch too soon after " 86362306a36Sopenharmony_ci "last switch at ip %d\n", ip); 86462306a36Sopenharmony_ci goto fail; 86562306a36Sopenharmony_ci } 86662306a36Sopenharmony_ci last_thread_switch_ip = ip; 86762306a36Sopenharmony_ci } 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci break; 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci case QPU_SIG_LOAD_IMM: 87262306a36Sopenharmony_ci if (!check_instruction_writes(validated_shader, 87362306a36Sopenharmony_ci &validation_state)) { 87462306a36Sopenharmony_ci DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip); 87562306a36Sopenharmony_ci goto fail; 87662306a36Sopenharmony_ci } 87762306a36Sopenharmony_ci break; 87862306a36Sopenharmony_ci 87962306a36Sopenharmony_ci case QPU_SIG_BRANCH: 88062306a36Sopenharmony_ci if (!check_branch(inst, validated_shader, 88162306a36Sopenharmony_ci &validation_state, ip)) 88262306a36Sopenharmony_ci goto fail; 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_ci if (ip < last_thread_switch_ip + 3) { 88562306a36Sopenharmony_ci DRM_DEBUG("Branch in thread switch at ip %d", 88662306a36Sopenharmony_ci ip); 88762306a36Sopenharmony_ci goto fail; 88862306a36Sopenharmony_ci } 88962306a36Sopenharmony_ci 89062306a36Sopenharmony_ci break; 89162306a36Sopenharmony_ci default: 89262306a36Sopenharmony_ci DRM_DEBUG("Unsupported QPU signal %d at " 89362306a36Sopenharmony_ci "instruction %d\n", sig, ip); 89462306a36Sopenharmony_ci goto fail; 89562306a36Sopenharmony_ci } 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci /* There are two delay slots after program end is signaled 89862306a36Sopenharmony_ci * that are still executed, then we're finished. 89962306a36Sopenharmony_ci */ 90062306a36Sopenharmony_ci if (found_shader_end && ip == shader_end_ip + 2) 90162306a36Sopenharmony_ci break; 90262306a36Sopenharmony_ci } 90362306a36Sopenharmony_ci 90462306a36Sopenharmony_ci if (ip == validation_state.max_ip) { 90562306a36Sopenharmony_ci DRM_DEBUG("shader failed to terminate before " 90662306a36Sopenharmony_ci "shader BO end at %zd\n", 90762306a36Sopenharmony_ci shader_obj->base.size); 90862306a36Sopenharmony_ci goto fail; 90962306a36Sopenharmony_ci } 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci /* Might corrupt other thread */ 91262306a36Sopenharmony_ci if (validated_shader->is_threaded && 91362306a36Sopenharmony_ci validation_state.all_registers_used) { 91462306a36Sopenharmony_ci DRM_DEBUG("Shader uses threading, but uses the upper " 91562306a36Sopenharmony_ci "half of the registers, too\n"); 91662306a36Sopenharmony_ci goto fail; 91762306a36Sopenharmony_ci } 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_ci /* If we did a backwards branch and we haven't emitted a uniforms 92062306a36Sopenharmony_ci * reset since then, we still need the uniforms stream to have the 92162306a36Sopenharmony_ci * uniforms address available so that the backwards branch can do its 92262306a36Sopenharmony_ci * uniforms reset. 92362306a36Sopenharmony_ci * 92462306a36Sopenharmony_ci * We could potentially prove that the backwards branch doesn't 92562306a36Sopenharmony_ci * contain any uses of uniforms until program exit, but that doesn't 92662306a36Sopenharmony_ci * seem to be worth the trouble. 92762306a36Sopenharmony_ci */ 92862306a36Sopenharmony_ci if (validation_state.needs_uniform_address_for_loop) { 92962306a36Sopenharmony_ci if (!require_uniform_address_uniform(validated_shader)) 93062306a36Sopenharmony_ci goto fail; 93162306a36Sopenharmony_ci validated_shader->uniforms_size += 4; 93262306a36Sopenharmony_ci } 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_ci /* Again, no chance of integer overflow here because the worst case 93562306a36Sopenharmony_ci * scenario is 8 bytes of uniforms plus handles per 8-byte 93662306a36Sopenharmony_ci * instruction. 93762306a36Sopenharmony_ci */ 93862306a36Sopenharmony_ci validated_shader->uniforms_src_size = 93962306a36Sopenharmony_ci (validated_shader->uniforms_size + 94062306a36Sopenharmony_ci 4 * validated_shader->num_texture_samples); 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci kfree(validation_state.branch_targets); 94362306a36Sopenharmony_ci 94462306a36Sopenharmony_ci return validated_shader; 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_cifail: 94762306a36Sopenharmony_ci kfree(validation_state.branch_targets); 94862306a36Sopenharmony_ci if (validated_shader) { 94962306a36Sopenharmony_ci kfree(validated_shader->uniform_addr_offsets); 95062306a36Sopenharmony_ci kfree(validated_shader->texture_samples); 95162306a36Sopenharmony_ci kfree(validated_shader); 95262306a36Sopenharmony_ci } 95362306a36Sopenharmony_ci return NULL; 95462306a36Sopenharmony_ci} 955