18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright © 2014 Broadcom 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice (including the next 128c2ecf20Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 138c2ecf20Sopenharmony_ci * Software. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 168c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 178c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 188c2ecf20Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 198c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 208c2ecf20Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 218c2ecf20Sopenharmony_ci * IN THE SOFTWARE. 228c2ecf20Sopenharmony_ci */ 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci/** 258c2ecf20Sopenharmony_ci * DOC: Shader validator for VC4. 268c2ecf20Sopenharmony_ci * 278c2ecf20Sopenharmony_ci * Since the VC4 has no IOMMU between it and system memory, a user 288c2ecf20Sopenharmony_ci * with access to execute shaders could escalate privilege by 298c2ecf20Sopenharmony_ci * overwriting system memory (using the VPM write address register in 308c2ecf20Sopenharmony_ci * the general-purpose DMA mode) or reading system memory it shouldn't 318c2ecf20Sopenharmony_ci * (reading it as a texture, uniform data, or direct-addressed TMU 328c2ecf20Sopenharmony_ci * lookup). 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * The shader validator walks over a shader's BO, ensuring that its 358c2ecf20Sopenharmony_ci * accesses are appropriately bounded, and recording where texture 368c2ecf20Sopenharmony_ci * accesses are made so that we can do relocations for them in the 378c2ecf20Sopenharmony_ci * uniform stream. 388c2ecf20Sopenharmony_ci * 398c2ecf20Sopenharmony_ci * Shader BO are immutable for their lifetimes (enforced by not 408c2ecf20Sopenharmony_ci * allowing mmaps, GEM prime export, or rendering to from a CL), so 418c2ecf20Sopenharmony_ci * this validation is only performed at BO creation time. 428c2ecf20Sopenharmony_ci */ 438c2ecf20Sopenharmony_ci 448c2ecf20Sopenharmony_ci#include "vc4_drv.h" 458c2ecf20Sopenharmony_ci#include "vc4_qpu_defines.h" 468c2ecf20Sopenharmony_ci 478c2ecf20Sopenharmony_ci#define LIVE_REG_COUNT (32 + 32 + 4) 488c2ecf20Sopenharmony_ci 498c2ecf20Sopenharmony_cistruct vc4_shader_validation_state { 508c2ecf20Sopenharmony_ci /* Current IP being validated. */ 518c2ecf20Sopenharmony_ci uint32_t ip; 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci /* IP at the end of the BO, do not read shader[max_ip] */ 548c2ecf20Sopenharmony_ci uint32_t max_ip; 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci uint64_t *shader; 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_ci struct vc4_texture_sample_info tmu_setup[2]; 598c2ecf20Sopenharmony_ci int tmu_write_count[2]; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci /* For registers that were last written to by a MIN instruction with 628c2ecf20Sopenharmony_ci * one argument being a uniform, the address of the uniform. 638c2ecf20Sopenharmony_ci * Otherwise, ~0. 648c2ecf20Sopenharmony_ci * 658c2ecf20Sopenharmony_ci * This is used for the validation of direct address memory reads. 668c2ecf20Sopenharmony_ci */ 678c2ecf20Sopenharmony_ci uint32_t live_min_clamp_offsets[LIVE_REG_COUNT]; 688c2ecf20Sopenharmony_ci bool live_max_clamp_regs[LIVE_REG_COUNT]; 698c2ecf20Sopenharmony_ci uint32_t live_immediates[LIVE_REG_COUNT]; 708c2ecf20Sopenharmony_ci 718c2ecf20Sopenharmony_ci /* Bitfield of which IPs are used as branch targets. 728c2ecf20Sopenharmony_ci * 738c2ecf20Sopenharmony_ci * Used for validation that the uniform stream is updated at the right 748c2ecf20Sopenharmony_ci * points and clearing the texturing/clamping state. 758c2ecf20Sopenharmony_ci */ 768c2ecf20Sopenharmony_ci unsigned long *branch_targets; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci /* Set when entering a basic block, and cleared when the uniform 798c2ecf20Sopenharmony_ci * address update is found. This is used to make sure that we don't 808c2ecf20Sopenharmony_ci * read uniforms when the address is undefined. 818c2ecf20Sopenharmony_ci */ 828c2ecf20Sopenharmony_ci bool needs_uniform_address_update; 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci /* Set when we find a backwards branch. If the branch is backwards, 858c2ecf20Sopenharmony_ci * the taraget is probably doing an address reset to read uniforms, 868c2ecf20Sopenharmony_ci * and so we need to be sure that a uniforms address is present in the 878c2ecf20Sopenharmony_ci * stream, even if the shader didn't need to read uniforms in later 888c2ecf20Sopenharmony_ci * basic blocks. 898c2ecf20Sopenharmony_ci */ 908c2ecf20Sopenharmony_ci bool needs_uniform_address_for_loop; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci /* Set when we find an instruction writing the top half of the 938c2ecf20Sopenharmony_ci * register files. If we allowed writing the unusable regs in 948c2ecf20Sopenharmony_ci * a threaded shader, then the other shader running on our 958c2ecf20Sopenharmony_ci * QPU's clamp validation would be invalid. 968c2ecf20Sopenharmony_ci */ 978c2ecf20Sopenharmony_ci bool all_registers_used; 988c2ecf20Sopenharmony_ci}; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_cistatic uint32_t 1018c2ecf20Sopenharmony_ciwaddr_to_live_reg_index(uint32_t waddr, bool is_b) 1028c2ecf20Sopenharmony_ci{ 1038c2ecf20Sopenharmony_ci if (waddr < 32) { 1048c2ecf20Sopenharmony_ci if (is_b) 1058c2ecf20Sopenharmony_ci return 32 + waddr; 1068c2ecf20Sopenharmony_ci else 1078c2ecf20Sopenharmony_ci return waddr; 1088c2ecf20Sopenharmony_ci } else if (waddr <= QPU_W_ACC3) { 1098c2ecf20Sopenharmony_ci return 64 + waddr - QPU_W_ACC0; 1108c2ecf20Sopenharmony_ci } else { 1118c2ecf20Sopenharmony_ci return ~0; 1128c2ecf20Sopenharmony_ci } 1138c2ecf20Sopenharmony_ci} 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_cistatic uint32_t 1168c2ecf20Sopenharmony_ciraddr_add_a_to_live_reg_index(uint64_t inst) 1178c2ecf20Sopenharmony_ci{ 1188c2ecf20Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 1198c2ecf20Sopenharmony_ci uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 1208c2ecf20Sopenharmony_ci uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 1218c2ecf20Sopenharmony_ci uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci if (add_a == QPU_MUX_A) 1248c2ecf20Sopenharmony_ci return raddr_a; 1258c2ecf20Sopenharmony_ci else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) 1268c2ecf20Sopenharmony_ci return 32 + raddr_b; 1278c2ecf20Sopenharmony_ci else if (add_a <= QPU_MUX_R3) 1288c2ecf20Sopenharmony_ci return 64 + add_a; 1298c2ecf20Sopenharmony_ci else 1308c2ecf20Sopenharmony_ci return ~0; 1318c2ecf20Sopenharmony_ci} 1328c2ecf20Sopenharmony_ci 1338c2ecf20Sopenharmony_cistatic bool 1348c2ecf20Sopenharmony_cilive_reg_is_upper_half(uint32_t lri) 1358c2ecf20Sopenharmony_ci{ 1368c2ecf20Sopenharmony_ci return (lri >= 16 && lri < 32) || 1378c2ecf20Sopenharmony_ci (lri >= 32 + 16 && lri < 32 + 32); 1388c2ecf20Sopenharmony_ci} 1398c2ecf20Sopenharmony_ci 1408c2ecf20Sopenharmony_cistatic bool 1418c2ecf20Sopenharmony_ciis_tmu_submit(uint32_t waddr) 1428c2ecf20Sopenharmony_ci{ 1438c2ecf20Sopenharmony_ci return (waddr == QPU_W_TMU0_S || 1448c2ecf20Sopenharmony_ci waddr == QPU_W_TMU1_S); 1458c2ecf20Sopenharmony_ci} 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_cistatic bool 1488c2ecf20Sopenharmony_ciis_tmu_write(uint32_t waddr) 1498c2ecf20Sopenharmony_ci{ 1508c2ecf20Sopenharmony_ci return (waddr >= QPU_W_TMU0_S && 1518c2ecf20Sopenharmony_ci waddr <= QPU_W_TMU1_B); 1528c2ecf20Sopenharmony_ci} 1538c2ecf20Sopenharmony_ci 1548c2ecf20Sopenharmony_cistatic bool 1558c2ecf20Sopenharmony_cirecord_texture_sample(struct vc4_validated_shader_info *validated_shader, 1568c2ecf20Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 1578c2ecf20Sopenharmony_ci int tmu) 1588c2ecf20Sopenharmony_ci{ 1598c2ecf20Sopenharmony_ci uint32_t s = validated_shader->num_texture_samples; 1608c2ecf20Sopenharmony_ci int i; 1618c2ecf20Sopenharmony_ci struct vc4_texture_sample_info *temp_samples; 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci temp_samples = krealloc(validated_shader->texture_samples, 1648c2ecf20Sopenharmony_ci (s + 1) * sizeof(*temp_samples), 1658c2ecf20Sopenharmony_ci GFP_KERNEL); 1668c2ecf20Sopenharmony_ci if (!temp_samples) 1678c2ecf20Sopenharmony_ci return false; 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci memcpy(&temp_samples[s], 1708c2ecf20Sopenharmony_ci &validation_state->tmu_setup[tmu], 1718c2ecf20Sopenharmony_ci sizeof(*temp_samples)); 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_ci validated_shader->num_texture_samples = s + 1; 1748c2ecf20Sopenharmony_ci validated_shader->texture_samples = temp_samples; 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci for (i = 0; i < 4; i++) 1778c2ecf20Sopenharmony_ci validation_state->tmu_setup[tmu].p_offset[i] = ~0; 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci return true; 1808c2ecf20Sopenharmony_ci} 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_cistatic bool 1838c2ecf20Sopenharmony_cicheck_tmu_write(struct vc4_validated_shader_info *validated_shader, 1848c2ecf20Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 1858c2ecf20Sopenharmony_ci bool is_mul) 1868c2ecf20Sopenharmony_ci{ 1878c2ecf20Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 1888c2ecf20Sopenharmony_ci uint32_t waddr = (is_mul ? 1898c2ecf20Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 1908c2ecf20Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 1918c2ecf20Sopenharmony_ci uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 1928c2ecf20Sopenharmony_ci uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 1938c2ecf20Sopenharmony_ci int tmu = waddr > QPU_W_TMU0_B; 1948c2ecf20Sopenharmony_ci bool submit = is_tmu_submit(waddr); 1958c2ecf20Sopenharmony_ci bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; 1968c2ecf20Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci if (is_direct) { 1998c2ecf20Sopenharmony_ci uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 2008c2ecf20Sopenharmony_ci uint32_t clamp_reg, clamp_offset; 2018c2ecf20Sopenharmony_ci 2028c2ecf20Sopenharmony_ci if (sig == QPU_SIG_SMALL_IMM) { 2038c2ecf20Sopenharmony_ci DRM_DEBUG("direct TMU read used small immediate\n"); 2048c2ecf20Sopenharmony_ci return false; 2058c2ecf20Sopenharmony_ci } 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci /* Make sure that this texture load is an add of the base 2088c2ecf20Sopenharmony_ci * address of the UBO to a clamped offset within the UBO. 2098c2ecf20Sopenharmony_ci */ 2108c2ecf20Sopenharmony_ci if (is_mul || 2118c2ecf20Sopenharmony_ci QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 2128c2ecf20Sopenharmony_ci DRM_DEBUG("direct TMU load wasn't an add\n"); 2138c2ecf20Sopenharmony_ci return false; 2148c2ecf20Sopenharmony_ci } 2158c2ecf20Sopenharmony_ci 2168c2ecf20Sopenharmony_ci /* We assert that the clamped address is the first 2178c2ecf20Sopenharmony_ci * argument, and the UBO base address is the second argument. 2188c2ecf20Sopenharmony_ci * This is arbitrary, but simpler than supporting flipping the 2198c2ecf20Sopenharmony_ci * two either way. 2208c2ecf20Sopenharmony_ci */ 2218c2ecf20Sopenharmony_ci clamp_reg = raddr_add_a_to_live_reg_index(inst); 2228c2ecf20Sopenharmony_ci if (clamp_reg == ~0) { 2238c2ecf20Sopenharmony_ci DRM_DEBUG("direct TMU load wasn't clamped\n"); 2248c2ecf20Sopenharmony_ci return false; 2258c2ecf20Sopenharmony_ci } 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_ci clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; 2288c2ecf20Sopenharmony_ci if (clamp_offset == ~0) { 2298c2ecf20Sopenharmony_ci DRM_DEBUG("direct TMU load wasn't clamped\n"); 2308c2ecf20Sopenharmony_ci return false; 2318c2ecf20Sopenharmony_ci } 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci /* Store the clamp value's offset in p1 (see reloc_tex() in 2348c2ecf20Sopenharmony_ci * vc4_validate.c). 2358c2ecf20Sopenharmony_ci */ 2368c2ecf20Sopenharmony_ci validation_state->tmu_setup[tmu].p_offset[1] = 2378c2ecf20Sopenharmony_ci clamp_offset; 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 2408c2ecf20Sopenharmony_ci !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 2418c2ecf20Sopenharmony_ci DRM_DEBUG("direct TMU load didn't add to a uniform\n"); 2428c2ecf20Sopenharmony_ci return false; 2438c2ecf20Sopenharmony_ci } 2448c2ecf20Sopenharmony_ci 2458c2ecf20Sopenharmony_ci validation_state->tmu_setup[tmu].is_direct = true; 2468c2ecf20Sopenharmony_ci } else { 2478c2ecf20Sopenharmony_ci if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && 2488c2ecf20Sopenharmony_ci raddr_b == QPU_R_UNIF)) { 2498c2ecf20Sopenharmony_ci DRM_DEBUG("uniform read in the same instruction as " 2508c2ecf20Sopenharmony_ci "texture setup.\n"); 2518c2ecf20Sopenharmony_ci return false; 2528c2ecf20Sopenharmony_ci } 2538c2ecf20Sopenharmony_ci } 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci if (validation_state->tmu_write_count[tmu] >= 4) { 2568c2ecf20Sopenharmony_ci DRM_DEBUG("TMU%d got too many parameters before dispatch\n", 2578c2ecf20Sopenharmony_ci tmu); 2588c2ecf20Sopenharmony_ci return false; 2598c2ecf20Sopenharmony_ci } 2608c2ecf20Sopenharmony_ci validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = 2618c2ecf20Sopenharmony_ci validated_shader->uniforms_size; 2628c2ecf20Sopenharmony_ci validation_state->tmu_write_count[tmu]++; 2638c2ecf20Sopenharmony_ci /* Since direct uses a RADDR uniform reference, it will get counted in 2648c2ecf20Sopenharmony_ci * check_instruction_reads() 2658c2ecf20Sopenharmony_ci */ 2668c2ecf20Sopenharmony_ci if (!is_direct) { 2678c2ecf20Sopenharmony_ci if (validation_state->needs_uniform_address_update) { 2688c2ecf20Sopenharmony_ci DRM_DEBUG("Texturing with undefined uniform address\n"); 2698c2ecf20Sopenharmony_ci return false; 2708c2ecf20Sopenharmony_ci } 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci validated_shader->uniforms_size += 4; 2738c2ecf20Sopenharmony_ci } 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci if (submit) { 2768c2ecf20Sopenharmony_ci if (!record_texture_sample(validated_shader, 2778c2ecf20Sopenharmony_ci validation_state, tmu)) { 2788c2ecf20Sopenharmony_ci return false; 2798c2ecf20Sopenharmony_ci } 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_ci validation_state->tmu_write_count[tmu] = 0; 2828c2ecf20Sopenharmony_ci } 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci return true; 2858c2ecf20Sopenharmony_ci} 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_cistatic bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader) 2888c2ecf20Sopenharmony_ci{ 2898c2ecf20Sopenharmony_ci uint32_t o = validated_shader->num_uniform_addr_offsets; 2908c2ecf20Sopenharmony_ci uint32_t num_uniforms = validated_shader->uniforms_size / 4; 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci validated_shader->uniform_addr_offsets = 2938c2ecf20Sopenharmony_ci krealloc(validated_shader->uniform_addr_offsets, 2948c2ecf20Sopenharmony_ci (o + 1) * 2958c2ecf20Sopenharmony_ci sizeof(*validated_shader->uniform_addr_offsets), 2968c2ecf20Sopenharmony_ci GFP_KERNEL); 2978c2ecf20Sopenharmony_ci if (!validated_shader->uniform_addr_offsets) 2988c2ecf20Sopenharmony_ci return false; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci validated_shader->uniform_addr_offsets[o] = num_uniforms; 3018c2ecf20Sopenharmony_ci validated_shader->num_uniform_addr_offsets++; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci return true; 3048c2ecf20Sopenharmony_ci} 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_cistatic bool 3078c2ecf20Sopenharmony_civalidate_uniform_address_write(struct vc4_validated_shader_info *validated_shader, 3088c2ecf20Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 3098c2ecf20Sopenharmony_ci bool is_mul) 3108c2ecf20Sopenharmony_ci{ 3118c2ecf20Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 3128c2ecf20Sopenharmony_ci u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 3138c2ecf20Sopenharmony_ci u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 3148c2ecf20Sopenharmony_ci u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 3158c2ecf20Sopenharmony_ci u32 add_lri = raddr_add_a_to_live_reg_index(inst); 3168c2ecf20Sopenharmony_ci /* We want our reset to be pointing at whatever uniform follows the 3178c2ecf20Sopenharmony_ci * uniforms base address. 3188c2ecf20Sopenharmony_ci */ 3198c2ecf20Sopenharmony_ci u32 expected_offset = validated_shader->uniforms_size + 4; 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci /* We only support absolute uniform address changes, and we 3228c2ecf20Sopenharmony_ci * require that they be in the current basic block before any 3238c2ecf20Sopenharmony_ci * of its uniform reads. 3248c2ecf20Sopenharmony_ci * 3258c2ecf20Sopenharmony_ci * One could potentially emit more efficient QPU code, by 3268c2ecf20Sopenharmony_ci * noticing that (say) an if statement does uniform control 3278c2ecf20Sopenharmony_ci * flow for all threads and that the if reads the same number 3288c2ecf20Sopenharmony_ci * of uniforms on each side. However, this scheme is easy to 3298c2ecf20Sopenharmony_ci * validate so it's all we allow for now. 3308c2ecf20Sopenharmony_ci */ 3318c2ecf20Sopenharmony_ci switch (QPU_GET_FIELD(inst, QPU_SIG)) { 3328c2ecf20Sopenharmony_ci case QPU_SIG_NONE: 3338c2ecf20Sopenharmony_ci case QPU_SIG_SCOREBOARD_UNLOCK: 3348c2ecf20Sopenharmony_ci case QPU_SIG_COLOR_LOAD: 3358c2ecf20Sopenharmony_ci case QPU_SIG_LOAD_TMU0: 3368c2ecf20Sopenharmony_ci case QPU_SIG_LOAD_TMU1: 3378c2ecf20Sopenharmony_ci break; 3388c2ecf20Sopenharmony_ci default: 3398c2ecf20Sopenharmony_ci DRM_DEBUG("uniforms address change must be " 3408c2ecf20Sopenharmony_ci "normal math\n"); 3418c2ecf20Sopenharmony_ci return false; 3428c2ecf20Sopenharmony_ci } 3438c2ecf20Sopenharmony_ci 3448c2ecf20Sopenharmony_ci if (is_mul || QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { 3458c2ecf20Sopenharmony_ci DRM_DEBUG("Uniform address reset must be an ADD.\n"); 3468c2ecf20Sopenharmony_ci return false; 3478c2ecf20Sopenharmony_ci } 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) { 3508c2ecf20Sopenharmony_ci DRM_DEBUG("Uniform address reset must be unconditional.\n"); 3518c2ecf20Sopenharmony_ci return false; 3528c2ecf20Sopenharmony_ci } 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP && 3558c2ecf20Sopenharmony_ci !(inst & QPU_PM)) { 3568c2ecf20Sopenharmony_ci DRM_DEBUG("No packing allowed on uniforms reset\n"); 3578c2ecf20Sopenharmony_ci return false; 3588c2ecf20Sopenharmony_ci } 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci if (add_lri == -1) { 3618c2ecf20Sopenharmony_ci DRM_DEBUG("First argument of uniform address write must be " 3628c2ecf20Sopenharmony_ci "an immediate value.\n"); 3638c2ecf20Sopenharmony_ci return false; 3648c2ecf20Sopenharmony_ci } 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci if (validation_state->live_immediates[add_lri] != expected_offset) { 3678c2ecf20Sopenharmony_ci DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n", 3688c2ecf20Sopenharmony_ci validation_state->live_immediates[add_lri], 3698c2ecf20Sopenharmony_ci expected_offset); 3708c2ecf20Sopenharmony_ci return false; 3718c2ecf20Sopenharmony_ci } 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 3748c2ecf20Sopenharmony_ci !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { 3758c2ecf20Sopenharmony_ci DRM_DEBUG("Second argument of uniform address write must be " 3768c2ecf20Sopenharmony_ci "a uniform.\n"); 3778c2ecf20Sopenharmony_ci return false; 3788c2ecf20Sopenharmony_ci } 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci validation_state->needs_uniform_address_update = false; 3818c2ecf20Sopenharmony_ci validation_state->needs_uniform_address_for_loop = false; 3828c2ecf20Sopenharmony_ci return require_uniform_address_uniform(validated_shader); 3838c2ecf20Sopenharmony_ci} 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_cistatic bool 3868c2ecf20Sopenharmony_cicheck_reg_write(struct vc4_validated_shader_info *validated_shader, 3878c2ecf20Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 3888c2ecf20Sopenharmony_ci bool is_mul) 3898c2ecf20Sopenharmony_ci{ 3908c2ecf20Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 3918c2ecf20Sopenharmony_ci uint32_t waddr = (is_mul ? 3928c2ecf20Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 3938c2ecf20Sopenharmony_ci QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 3948c2ecf20Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 3958c2ecf20Sopenharmony_ci bool ws = inst & QPU_WS; 3968c2ecf20Sopenharmony_ci bool is_b = is_mul ^ ws; 3978c2ecf20Sopenharmony_ci u32 lri = waddr_to_live_reg_index(waddr, is_b); 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci if (lri != -1) { 4008c2ecf20Sopenharmony_ci uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 4018c2ecf20Sopenharmony_ci uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci if (sig == QPU_SIG_LOAD_IMM && 4048c2ecf20Sopenharmony_ci QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP && 4058c2ecf20Sopenharmony_ci ((is_mul && cond_mul == QPU_COND_ALWAYS) || 4068c2ecf20Sopenharmony_ci (!is_mul && cond_add == QPU_COND_ALWAYS))) { 4078c2ecf20Sopenharmony_ci validation_state->live_immediates[lri] = 4088c2ecf20Sopenharmony_ci QPU_GET_FIELD(inst, QPU_LOAD_IMM); 4098c2ecf20Sopenharmony_ci } else { 4108c2ecf20Sopenharmony_ci validation_state->live_immediates[lri] = ~0; 4118c2ecf20Sopenharmony_ci } 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci if (live_reg_is_upper_half(lri)) 4148c2ecf20Sopenharmony_ci validation_state->all_registers_used = true; 4158c2ecf20Sopenharmony_ci } 4168c2ecf20Sopenharmony_ci 4178c2ecf20Sopenharmony_ci switch (waddr) { 4188c2ecf20Sopenharmony_ci case QPU_W_UNIFORMS_ADDRESS: 4198c2ecf20Sopenharmony_ci if (is_b) { 4208c2ecf20Sopenharmony_ci DRM_DEBUG("relative uniforms address change " 4218c2ecf20Sopenharmony_ci "unsupported\n"); 4228c2ecf20Sopenharmony_ci return false; 4238c2ecf20Sopenharmony_ci } 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci return validate_uniform_address_write(validated_shader, 4268c2ecf20Sopenharmony_ci validation_state, 4278c2ecf20Sopenharmony_ci is_mul); 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci case QPU_W_TLB_COLOR_MS: 4308c2ecf20Sopenharmony_ci case QPU_W_TLB_COLOR_ALL: 4318c2ecf20Sopenharmony_ci case QPU_W_TLB_Z: 4328c2ecf20Sopenharmony_ci /* These only interact with the tile buffer, not main memory, 4338c2ecf20Sopenharmony_ci * so they're safe. 4348c2ecf20Sopenharmony_ci */ 4358c2ecf20Sopenharmony_ci return true; 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci case QPU_W_TMU0_S: 4388c2ecf20Sopenharmony_ci case QPU_W_TMU0_T: 4398c2ecf20Sopenharmony_ci case QPU_W_TMU0_R: 4408c2ecf20Sopenharmony_ci case QPU_W_TMU0_B: 4418c2ecf20Sopenharmony_ci case QPU_W_TMU1_S: 4428c2ecf20Sopenharmony_ci case QPU_W_TMU1_T: 4438c2ecf20Sopenharmony_ci case QPU_W_TMU1_R: 4448c2ecf20Sopenharmony_ci case QPU_W_TMU1_B: 4458c2ecf20Sopenharmony_ci return check_tmu_write(validated_shader, validation_state, 4468c2ecf20Sopenharmony_ci is_mul); 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci case QPU_W_HOST_INT: 4498c2ecf20Sopenharmony_ci case QPU_W_TMU_NOSWAP: 4508c2ecf20Sopenharmony_ci case QPU_W_TLB_ALPHA_MASK: 4518c2ecf20Sopenharmony_ci case QPU_W_MUTEX_RELEASE: 4528c2ecf20Sopenharmony_ci /* XXX: I haven't thought about these, so don't support them 4538c2ecf20Sopenharmony_ci * for now. 4548c2ecf20Sopenharmony_ci */ 4558c2ecf20Sopenharmony_ci DRM_DEBUG("Unsupported waddr %d\n", waddr); 4568c2ecf20Sopenharmony_ci return false; 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci case QPU_W_VPM_ADDR: 4598c2ecf20Sopenharmony_ci DRM_DEBUG("General VPM DMA unsupported\n"); 4608c2ecf20Sopenharmony_ci return false; 4618c2ecf20Sopenharmony_ci 4628c2ecf20Sopenharmony_ci case QPU_W_VPM: 4638c2ecf20Sopenharmony_ci case QPU_W_VPMVCD_SETUP: 4648c2ecf20Sopenharmony_ci /* We allow VPM setup in general, even including VPM DMA 4658c2ecf20Sopenharmony_ci * configuration setup, because the (unsafe) DMA can only be 4668c2ecf20Sopenharmony_ci * triggered by QPU_W_VPM_ADDR writes. 4678c2ecf20Sopenharmony_ci */ 4688c2ecf20Sopenharmony_ci return true; 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci case QPU_W_TLB_STENCIL_SETUP: 4718c2ecf20Sopenharmony_ci return true; 4728c2ecf20Sopenharmony_ci } 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci return true; 4758c2ecf20Sopenharmony_ci} 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_cistatic void 4788c2ecf20Sopenharmony_citrack_live_clamps(struct vc4_validated_shader_info *validated_shader, 4798c2ecf20Sopenharmony_ci struct vc4_shader_validation_state *validation_state) 4808c2ecf20Sopenharmony_ci{ 4818c2ecf20Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 4828c2ecf20Sopenharmony_ci uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 4838c2ecf20Sopenharmony_ci uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 4848c2ecf20Sopenharmony_ci uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 4858c2ecf20Sopenharmony_ci uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 4868c2ecf20Sopenharmony_ci uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); 4878c2ecf20Sopenharmony_ci uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); 4888c2ecf20Sopenharmony_ci uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 4898c2ecf20Sopenharmony_ci uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 4908c2ecf20Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 4918c2ecf20Sopenharmony_ci bool ws = inst & QPU_WS; 4928c2ecf20Sopenharmony_ci uint32_t lri_add_a, lri_add, lri_mul; 4938c2ecf20Sopenharmony_ci bool add_a_is_min_0; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), 4968c2ecf20Sopenharmony_ci * before we clear previous live state. 4978c2ecf20Sopenharmony_ci */ 4988c2ecf20Sopenharmony_ci lri_add_a = raddr_add_a_to_live_reg_index(inst); 4998c2ecf20Sopenharmony_ci add_a_is_min_0 = (lri_add_a != ~0 && 5008c2ecf20Sopenharmony_ci validation_state->live_max_clamp_regs[lri_add_a]); 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci /* Clear live state for registers written by our instruction. */ 5038c2ecf20Sopenharmony_ci lri_add = waddr_to_live_reg_index(waddr_add, ws); 5048c2ecf20Sopenharmony_ci lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); 5058c2ecf20Sopenharmony_ci if (lri_mul != ~0) { 5068c2ecf20Sopenharmony_ci validation_state->live_max_clamp_regs[lri_mul] = false; 5078c2ecf20Sopenharmony_ci validation_state->live_min_clamp_offsets[lri_mul] = ~0; 5088c2ecf20Sopenharmony_ci } 5098c2ecf20Sopenharmony_ci if (lri_add != ~0) { 5108c2ecf20Sopenharmony_ci validation_state->live_max_clamp_regs[lri_add] = false; 5118c2ecf20Sopenharmony_ci validation_state->live_min_clamp_offsets[lri_add] = ~0; 5128c2ecf20Sopenharmony_ci } else { 5138c2ecf20Sopenharmony_ci /* Nothing further to do for live tracking, since only ADDs 5148c2ecf20Sopenharmony_ci * generate new live clamp registers. 5158c2ecf20Sopenharmony_ci */ 5168c2ecf20Sopenharmony_ci return; 5178c2ecf20Sopenharmony_ci } 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci /* Now, handle remaining live clamp tracking for the ADD operation. */ 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci if (cond_add != QPU_COND_ALWAYS) 5228c2ecf20Sopenharmony_ci return; 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_ci if (op_add == QPU_A_MAX) { 5258c2ecf20Sopenharmony_ci /* Track live clamps of a value to a minimum of 0 (in either 5268c2ecf20Sopenharmony_ci * arg). 5278c2ecf20Sopenharmony_ci */ 5288c2ecf20Sopenharmony_ci if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || 5298c2ecf20Sopenharmony_ci (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { 5308c2ecf20Sopenharmony_ci return; 5318c2ecf20Sopenharmony_ci } 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci validation_state->live_max_clamp_regs[lri_add] = true; 5348c2ecf20Sopenharmony_ci } else if (op_add == QPU_A_MIN) { 5358c2ecf20Sopenharmony_ci /* Track live clamps of a value clamped to a minimum of 0 and 5368c2ecf20Sopenharmony_ci * a maximum of some uniform's offset. 5378c2ecf20Sopenharmony_ci */ 5388c2ecf20Sopenharmony_ci if (!add_a_is_min_0) 5398c2ecf20Sopenharmony_ci return; 5408c2ecf20Sopenharmony_ci 5418c2ecf20Sopenharmony_ci if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && 5428c2ecf20Sopenharmony_ci !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && 5438c2ecf20Sopenharmony_ci sig != QPU_SIG_SMALL_IMM)) { 5448c2ecf20Sopenharmony_ci return; 5458c2ecf20Sopenharmony_ci } 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci validation_state->live_min_clamp_offsets[lri_add] = 5488c2ecf20Sopenharmony_ci validated_shader->uniforms_size; 5498c2ecf20Sopenharmony_ci } 5508c2ecf20Sopenharmony_ci} 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_cistatic bool 5538c2ecf20Sopenharmony_cicheck_instruction_writes(struct vc4_validated_shader_info *validated_shader, 5548c2ecf20Sopenharmony_ci struct vc4_shader_validation_state *validation_state) 5558c2ecf20Sopenharmony_ci{ 5568c2ecf20Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 5578c2ecf20Sopenharmony_ci uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 5588c2ecf20Sopenharmony_ci uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 5598c2ecf20Sopenharmony_ci bool ok; 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_ci if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { 5628c2ecf20Sopenharmony_ci DRM_DEBUG("ADD and MUL both set up textures\n"); 5638c2ecf20Sopenharmony_ci return false; 5648c2ecf20Sopenharmony_ci } 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci ok = (check_reg_write(validated_shader, validation_state, false) && 5678c2ecf20Sopenharmony_ci check_reg_write(validated_shader, validation_state, true)); 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci track_live_clamps(validated_shader, validation_state); 5708c2ecf20Sopenharmony_ci 5718c2ecf20Sopenharmony_ci return ok; 5728c2ecf20Sopenharmony_ci} 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_cistatic bool 5758c2ecf20Sopenharmony_cicheck_branch(uint64_t inst, 5768c2ecf20Sopenharmony_ci struct vc4_validated_shader_info *validated_shader, 5778c2ecf20Sopenharmony_ci struct vc4_shader_validation_state *validation_state, 5788c2ecf20Sopenharmony_ci int ip) 5798c2ecf20Sopenharmony_ci{ 5808c2ecf20Sopenharmony_ci int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 5818c2ecf20Sopenharmony_ci uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 5828c2ecf20Sopenharmony_ci uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci if ((int)branch_imm < 0) 5858c2ecf20Sopenharmony_ci validation_state->needs_uniform_address_for_loop = true; 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci /* We don't want to have to worry about validation of this, and 5888c2ecf20Sopenharmony_ci * there's no need for it. 5898c2ecf20Sopenharmony_ci */ 5908c2ecf20Sopenharmony_ci if (waddr_add != QPU_W_NOP || waddr_mul != QPU_W_NOP) { 5918c2ecf20Sopenharmony_ci DRM_DEBUG("branch instruction at %d wrote a register.\n", 5928c2ecf20Sopenharmony_ci validation_state->ip); 5938c2ecf20Sopenharmony_ci return false; 5948c2ecf20Sopenharmony_ci } 5958c2ecf20Sopenharmony_ci 5968c2ecf20Sopenharmony_ci return true; 5978c2ecf20Sopenharmony_ci} 5988c2ecf20Sopenharmony_ci 5998c2ecf20Sopenharmony_cistatic bool 6008c2ecf20Sopenharmony_cicheck_instruction_reads(struct vc4_validated_shader_info *validated_shader, 6018c2ecf20Sopenharmony_ci struct vc4_shader_validation_state *validation_state) 6028c2ecf20Sopenharmony_ci{ 6038c2ecf20Sopenharmony_ci uint64_t inst = validation_state->shader[validation_state->ip]; 6048c2ecf20Sopenharmony_ci uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); 6058c2ecf20Sopenharmony_ci uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); 6068c2ecf20Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci if (raddr_a == QPU_R_UNIF || 6098c2ecf20Sopenharmony_ci (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { 6108c2ecf20Sopenharmony_ci /* This can't overflow the uint32_t, because we're reading 8 6118c2ecf20Sopenharmony_ci * bytes of instruction to increment by 4 here, so we'd 6128c2ecf20Sopenharmony_ci * already be OOM. 6138c2ecf20Sopenharmony_ci */ 6148c2ecf20Sopenharmony_ci validated_shader->uniforms_size += 4; 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci if (validation_state->needs_uniform_address_update) { 6178c2ecf20Sopenharmony_ci DRM_DEBUG("Uniform read with undefined uniform " 6188c2ecf20Sopenharmony_ci "address\n"); 6198c2ecf20Sopenharmony_ci return false; 6208c2ecf20Sopenharmony_ci } 6218c2ecf20Sopenharmony_ci } 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci if ((raddr_a >= 16 && raddr_a < 32) || 6248c2ecf20Sopenharmony_ci (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) { 6258c2ecf20Sopenharmony_ci validation_state->all_registers_used = true; 6268c2ecf20Sopenharmony_ci } 6278c2ecf20Sopenharmony_ci 6288c2ecf20Sopenharmony_ci return true; 6298c2ecf20Sopenharmony_ci} 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci/* Make sure that all branches are absolute and point within the shader, and 6328c2ecf20Sopenharmony_ci * note their targets for later. 6338c2ecf20Sopenharmony_ci */ 6348c2ecf20Sopenharmony_cistatic bool 6358c2ecf20Sopenharmony_civc4_validate_branches(struct vc4_shader_validation_state *validation_state) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci uint32_t max_branch_target = 0; 6388c2ecf20Sopenharmony_ci int ip; 6398c2ecf20Sopenharmony_ci int last_branch = -2; 6408c2ecf20Sopenharmony_ci 6418c2ecf20Sopenharmony_ci for (ip = 0; ip < validation_state->max_ip; ip++) { 6428c2ecf20Sopenharmony_ci uint64_t inst = validation_state->shader[ip]; 6438c2ecf20Sopenharmony_ci int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET); 6448c2ecf20Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 6458c2ecf20Sopenharmony_ci uint32_t after_delay_ip = ip + 4; 6468c2ecf20Sopenharmony_ci uint32_t branch_target_ip; 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_ci if (sig == QPU_SIG_PROG_END) { 6498c2ecf20Sopenharmony_ci /* There are two delay slots after program end is 6508c2ecf20Sopenharmony_ci * signaled that are still executed, then we're 6518c2ecf20Sopenharmony_ci * finished. validation_state->max_ip is the 6528c2ecf20Sopenharmony_ci * instruction after the last valid instruction in the 6538c2ecf20Sopenharmony_ci * program. 6548c2ecf20Sopenharmony_ci */ 6558c2ecf20Sopenharmony_ci validation_state->max_ip = ip + 3; 6568c2ecf20Sopenharmony_ci continue; 6578c2ecf20Sopenharmony_ci } 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci if (sig != QPU_SIG_BRANCH) 6608c2ecf20Sopenharmony_ci continue; 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci if (ip - last_branch < 4) { 6638c2ecf20Sopenharmony_ci DRM_DEBUG("Branch at %d during delay slots\n", ip); 6648c2ecf20Sopenharmony_ci return false; 6658c2ecf20Sopenharmony_ci } 6668c2ecf20Sopenharmony_ci last_branch = ip; 6678c2ecf20Sopenharmony_ci 6688c2ecf20Sopenharmony_ci if (inst & QPU_BRANCH_REG) { 6698c2ecf20Sopenharmony_ci DRM_DEBUG("branching from register relative " 6708c2ecf20Sopenharmony_ci "not supported\n"); 6718c2ecf20Sopenharmony_ci return false; 6728c2ecf20Sopenharmony_ci } 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci if (!(inst & QPU_BRANCH_REL)) { 6758c2ecf20Sopenharmony_ci DRM_DEBUG("relative branching required\n"); 6768c2ecf20Sopenharmony_ci return false; 6778c2ecf20Sopenharmony_ci } 6788c2ecf20Sopenharmony_ci 6798c2ecf20Sopenharmony_ci /* The actual branch target is the instruction after the delay 6808c2ecf20Sopenharmony_ci * slots, plus whatever byte offset is in the low 32 bits of 6818c2ecf20Sopenharmony_ci * the instruction. Make sure we're not branching beyond the 6828c2ecf20Sopenharmony_ci * end of the shader object. 6838c2ecf20Sopenharmony_ci */ 6848c2ecf20Sopenharmony_ci if (branch_imm % sizeof(inst) != 0) { 6858c2ecf20Sopenharmony_ci DRM_DEBUG("branch target not aligned\n"); 6868c2ecf20Sopenharmony_ci return false; 6878c2ecf20Sopenharmony_ci } 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci branch_target_ip = after_delay_ip + (branch_imm >> 3); 6908c2ecf20Sopenharmony_ci if (branch_target_ip >= validation_state->max_ip) { 6918c2ecf20Sopenharmony_ci DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n", 6928c2ecf20Sopenharmony_ci ip, branch_target_ip, 6938c2ecf20Sopenharmony_ci validation_state->max_ip); 6948c2ecf20Sopenharmony_ci return false; 6958c2ecf20Sopenharmony_ci } 6968c2ecf20Sopenharmony_ci set_bit(branch_target_ip, validation_state->branch_targets); 6978c2ecf20Sopenharmony_ci 6988c2ecf20Sopenharmony_ci /* Make sure that the non-branching path is also not outside 6998c2ecf20Sopenharmony_ci * the shader. 7008c2ecf20Sopenharmony_ci */ 7018c2ecf20Sopenharmony_ci if (after_delay_ip >= validation_state->max_ip) { 7028c2ecf20Sopenharmony_ci DRM_DEBUG("Branch at %d continues past shader end " 7038c2ecf20Sopenharmony_ci "(%d/%d)\n", 7048c2ecf20Sopenharmony_ci ip, after_delay_ip, validation_state->max_ip); 7058c2ecf20Sopenharmony_ci return false; 7068c2ecf20Sopenharmony_ci } 7078c2ecf20Sopenharmony_ci set_bit(after_delay_ip, validation_state->branch_targets); 7088c2ecf20Sopenharmony_ci max_branch_target = max(max_branch_target, after_delay_ip); 7098c2ecf20Sopenharmony_ci } 7108c2ecf20Sopenharmony_ci 7118c2ecf20Sopenharmony_ci if (max_branch_target > validation_state->max_ip - 3) { 7128c2ecf20Sopenharmony_ci DRM_DEBUG("Branch landed after QPU_SIG_PROG_END"); 7138c2ecf20Sopenharmony_ci return false; 7148c2ecf20Sopenharmony_ci } 7158c2ecf20Sopenharmony_ci 7168c2ecf20Sopenharmony_ci return true; 7178c2ecf20Sopenharmony_ci} 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_ci/* Resets any known state for the shader, used when we may be branched to from 7208c2ecf20Sopenharmony_ci * multiple locations in the program (or at shader start). 7218c2ecf20Sopenharmony_ci */ 7228c2ecf20Sopenharmony_cistatic void 7238c2ecf20Sopenharmony_cireset_validation_state(struct vc4_shader_validation_state *validation_state) 7248c2ecf20Sopenharmony_ci{ 7258c2ecf20Sopenharmony_ci int i; 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci for (i = 0; i < 8; i++) 7288c2ecf20Sopenharmony_ci validation_state->tmu_setup[i / 4].p_offset[i % 4] = ~0; 7298c2ecf20Sopenharmony_ci 7308c2ecf20Sopenharmony_ci for (i = 0; i < LIVE_REG_COUNT; i++) { 7318c2ecf20Sopenharmony_ci validation_state->live_min_clamp_offsets[i] = ~0; 7328c2ecf20Sopenharmony_ci validation_state->live_max_clamp_regs[i] = false; 7338c2ecf20Sopenharmony_ci validation_state->live_immediates[i] = ~0; 7348c2ecf20Sopenharmony_ci } 7358c2ecf20Sopenharmony_ci} 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_cistatic bool 7388c2ecf20Sopenharmony_citexturing_in_progress(struct vc4_shader_validation_state *validation_state) 7398c2ecf20Sopenharmony_ci{ 7408c2ecf20Sopenharmony_ci return (validation_state->tmu_write_count[0] != 0 || 7418c2ecf20Sopenharmony_ci validation_state->tmu_write_count[1] != 0); 7428c2ecf20Sopenharmony_ci} 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_cistatic bool 7458c2ecf20Sopenharmony_civc4_handle_branch_target(struct vc4_shader_validation_state *validation_state) 7468c2ecf20Sopenharmony_ci{ 7478c2ecf20Sopenharmony_ci uint32_t ip = validation_state->ip; 7488c2ecf20Sopenharmony_ci 7498c2ecf20Sopenharmony_ci if (!test_bit(ip, validation_state->branch_targets)) 7508c2ecf20Sopenharmony_ci return true; 7518c2ecf20Sopenharmony_ci 7528c2ecf20Sopenharmony_ci if (texturing_in_progress(validation_state)) { 7538c2ecf20Sopenharmony_ci DRM_DEBUG("Branch target landed during TMU setup\n"); 7548c2ecf20Sopenharmony_ci return false; 7558c2ecf20Sopenharmony_ci } 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_ci /* Reset our live values tracking, since this instruction may have 7588c2ecf20Sopenharmony_ci * multiple predecessors. 7598c2ecf20Sopenharmony_ci * 7608c2ecf20Sopenharmony_ci * One could potentially do analysis to determine that, for 7618c2ecf20Sopenharmony_ci * example, all predecessors have a live max clamp in the same 7628c2ecf20Sopenharmony_ci * register, but we don't bother with that. 7638c2ecf20Sopenharmony_ci */ 7648c2ecf20Sopenharmony_ci reset_validation_state(validation_state); 7658c2ecf20Sopenharmony_ci 7668c2ecf20Sopenharmony_ci /* Since we've entered a basic block from potentially multiple 7678c2ecf20Sopenharmony_ci * predecessors, we need the uniforms address to be updated before any 7688c2ecf20Sopenharmony_ci * unforms are read. We require that after any branch point, the next 7698c2ecf20Sopenharmony_ci * uniform to be loaded is a uniform address offset. That uniform's 7708c2ecf20Sopenharmony_ci * offset will be marked by the uniform address register write 7718c2ecf20Sopenharmony_ci * validation, or a one-off the end-of-program check. 7728c2ecf20Sopenharmony_ci */ 7738c2ecf20Sopenharmony_ci validation_state->needs_uniform_address_update = true; 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_ci return true; 7768c2ecf20Sopenharmony_ci} 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_cistruct vc4_validated_shader_info * 7798c2ecf20Sopenharmony_civc4_validate_shader(struct drm_gem_cma_object *shader_obj) 7808c2ecf20Sopenharmony_ci{ 7818c2ecf20Sopenharmony_ci bool found_shader_end = false; 7828c2ecf20Sopenharmony_ci int shader_end_ip = 0; 7838c2ecf20Sopenharmony_ci uint32_t last_thread_switch_ip = -3; 7848c2ecf20Sopenharmony_ci uint32_t ip; 7858c2ecf20Sopenharmony_ci struct vc4_validated_shader_info *validated_shader = NULL; 7868c2ecf20Sopenharmony_ci struct vc4_shader_validation_state validation_state; 7878c2ecf20Sopenharmony_ci 7888c2ecf20Sopenharmony_ci memset(&validation_state, 0, sizeof(validation_state)); 7898c2ecf20Sopenharmony_ci validation_state.shader = shader_obj->vaddr; 7908c2ecf20Sopenharmony_ci validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t); 7918c2ecf20Sopenharmony_ci 7928c2ecf20Sopenharmony_ci reset_validation_state(&validation_state); 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci validation_state.branch_targets = 7958c2ecf20Sopenharmony_ci kcalloc(BITS_TO_LONGS(validation_state.max_ip), 7968c2ecf20Sopenharmony_ci sizeof(unsigned long), GFP_KERNEL); 7978c2ecf20Sopenharmony_ci if (!validation_state.branch_targets) 7988c2ecf20Sopenharmony_ci goto fail; 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); 8018c2ecf20Sopenharmony_ci if (!validated_shader) 8028c2ecf20Sopenharmony_ci goto fail; 8038c2ecf20Sopenharmony_ci 8048c2ecf20Sopenharmony_ci if (!vc4_validate_branches(&validation_state)) 8058c2ecf20Sopenharmony_ci goto fail; 8068c2ecf20Sopenharmony_ci 8078c2ecf20Sopenharmony_ci for (ip = 0; ip < validation_state.max_ip; ip++) { 8088c2ecf20Sopenharmony_ci uint64_t inst = validation_state.shader[ip]; 8098c2ecf20Sopenharmony_ci uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci validation_state.ip = ip; 8128c2ecf20Sopenharmony_ci 8138c2ecf20Sopenharmony_ci if (!vc4_handle_branch_target(&validation_state)) 8148c2ecf20Sopenharmony_ci goto fail; 8158c2ecf20Sopenharmony_ci 8168c2ecf20Sopenharmony_ci if (ip == last_thread_switch_ip + 3) { 8178c2ecf20Sopenharmony_ci /* Reset r0-r3 live clamp data */ 8188c2ecf20Sopenharmony_ci int i; 8198c2ecf20Sopenharmony_ci 8208c2ecf20Sopenharmony_ci for (i = 64; i < LIVE_REG_COUNT; i++) { 8218c2ecf20Sopenharmony_ci validation_state.live_min_clamp_offsets[i] = ~0; 8228c2ecf20Sopenharmony_ci validation_state.live_max_clamp_regs[i] = false; 8238c2ecf20Sopenharmony_ci validation_state.live_immediates[i] = ~0; 8248c2ecf20Sopenharmony_ci } 8258c2ecf20Sopenharmony_ci } 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ci switch (sig) { 8288c2ecf20Sopenharmony_ci case QPU_SIG_NONE: 8298c2ecf20Sopenharmony_ci case QPU_SIG_WAIT_FOR_SCOREBOARD: 8308c2ecf20Sopenharmony_ci case QPU_SIG_SCOREBOARD_UNLOCK: 8318c2ecf20Sopenharmony_ci case QPU_SIG_COLOR_LOAD: 8328c2ecf20Sopenharmony_ci case QPU_SIG_LOAD_TMU0: 8338c2ecf20Sopenharmony_ci case QPU_SIG_LOAD_TMU1: 8348c2ecf20Sopenharmony_ci case QPU_SIG_PROG_END: 8358c2ecf20Sopenharmony_ci case QPU_SIG_SMALL_IMM: 8368c2ecf20Sopenharmony_ci case QPU_SIG_THREAD_SWITCH: 8378c2ecf20Sopenharmony_ci case QPU_SIG_LAST_THREAD_SWITCH: 8388c2ecf20Sopenharmony_ci if (!check_instruction_writes(validated_shader, 8398c2ecf20Sopenharmony_ci &validation_state)) { 8408c2ecf20Sopenharmony_ci DRM_DEBUG("Bad write at ip %d\n", ip); 8418c2ecf20Sopenharmony_ci goto fail; 8428c2ecf20Sopenharmony_ci } 8438c2ecf20Sopenharmony_ci 8448c2ecf20Sopenharmony_ci if (!check_instruction_reads(validated_shader, 8458c2ecf20Sopenharmony_ci &validation_state)) 8468c2ecf20Sopenharmony_ci goto fail; 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_ci if (sig == QPU_SIG_PROG_END) { 8498c2ecf20Sopenharmony_ci found_shader_end = true; 8508c2ecf20Sopenharmony_ci shader_end_ip = ip; 8518c2ecf20Sopenharmony_ci } 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci if (sig == QPU_SIG_THREAD_SWITCH || 8548c2ecf20Sopenharmony_ci sig == QPU_SIG_LAST_THREAD_SWITCH) { 8558c2ecf20Sopenharmony_ci validated_shader->is_threaded = true; 8568c2ecf20Sopenharmony_ci 8578c2ecf20Sopenharmony_ci if (ip < last_thread_switch_ip + 3) { 8588c2ecf20Sopenharmony_ci DRM_DEBUG("Thread switch too soon after " 8598c2ecf20Sopenharmony_ci "last switch at ip %d\n", ip); 8608c2ecf20Sopenharmony_ci goto fail; 8618c2ecf20Sopenharmony_ci } 8628c2ecf20Sopenharmony_ci last_thread_switch_ip = ip; 8638c2ecf20Sopenharmony_ci } 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_ci break; 8668c2ecf20Sopenharmony_ci 8678c2ecf20Sopenharmony_ci case QPU_SIG_LOAD_IMM: 8688c2ecf20Sopenharmony_ci if (!check_instruction_writes(validated_shader, 8698c2ecf20Sopenharmony_ci &validation_state)) { 8708c2ecf20Sopenharmony_ci DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip); 8718c2ecf20Sopenharmony_ci goto fail; 8728c2ecf20Sopenharmony_ci } 8738c2ecf20Sopenharmony_ci break; 8748c2ecf20Sopenharmony_ci 8758c2ecf20Sopenharmony_ci case QPU_SIG_BRANCH: 8768c2ecf20Sopenharmony_ci if (!check_branch(inst, validated_shader, 8778c2ecf20Sopenharmony_ci &validation_state, ip)) 8788c2ecf20Sopenharmony_ci goto fail; 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci if (ip < last_thread_switch_ip + 3) { 8818c2ecf20Sopenharmony_ci DRM_DEBUG("Branch in thread switch at ip %d", 8828c2ecf20Sopenharmony_ci ip); 8838c2ecf20Sopenharmony_ci goto fail; 8848c2ecf20Sopenharmony_ci } 8858c2ecf20Sopenharmony_ci 8868c2ecf20Sopenharmony_ci break; 8878c2ecf20Sopenharmony_ci default: 8888c2ecf20Sopenharmony_ci DRM_DEBUG("Unsupported QPU signal %d at " 8898c2ecf20Sopenharmony_ci "instruction %d\n", sig, ip); 8908c2ecf20Sopenharmony_ci goto fail; 8918c2ecf20Sopenharmony_ci } 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci /* There are two delay slots after program end is signaled 8948c2ecf20Sopenharmony_ci * that are still executed, then we're finished. 8958c2ecf20Sopenharmony_ci */ 8968c2ecf20Sopenharmony_ci if (found_shader_end && ip == shader_end_ip + 2) 8978c2ecf20Sopenharmony_ci break; 8988c2ecf20Sopenharmony_ci } 8998c2ecf20Sopenharmony_ci 9008c2ecf20Sopenharmony_ci if (ip == validation_state.max_ip) { 9018c2ecf20Sopenharmony_ci DRM_DEBUG("shader failed to terminate before " 9028c2ecf20Sopenharmony_ci "shader BO end at %zd\n", 9038c2ecf20Sopenharmony_ci shader_obj->base.size); 9048c2ecf20Sopenharmony_ci goto fail; 9058c2ecf20Sopenharmony_ci } 9068c2ecf20Sopenharmony_ci 9078c2ecf20Sopenharmony_ci /* Might corrupt other thread */ 9088c2ecf20Sopenharmony_ci if (validated_shader->is_threaded && 9098c2ecf20Sopenharmony_ci validation_state.all_registers_used) { 9108c2ecf20Sopenharmony_ci DRM_DEBUG("Shader uses threading, but uses the upper " 9118c2ecf20Sopenharmony_ci "half of the registers, too\n"); 9128c2ecf20Sopenharmony_ci goto fail; 9138c2ecf20Sopenharmony_ci } 9148c2ecf20Sopenharmony_ci 9158c2ecf20Sopenharmony_ci /* If we did a backwards branch and we haven't emitted a uniforms 9168c2ecf20Sopenharmony_ci * reset since then, we still need the uniforms stream to have the 9178c2ecf20Sopenharmony_ci * uniforms address available so that the backwards branch can do its 9188c2ecf20Sopenharmony_ci * uniforms reset. 9198c2ecf20Sopenharmony_ci * 9208c2ecf20Sopenharmony_ci * We could potentially prove that the backwards branch doesn't 9218c2ecf20Sopenharmony_ci * contain any uses of uniforms until program exit, but that doesn't 9228c2ecf20Sopenharmony_ci * seem to be worth the trouble. 9238c2ecf20Sopenharmony_ci */ 9248c2ecf20Sopenharmony_ci if (validation_state.needs_uniform_address_for_loop) { 9258c2ecf20Sopenharmony_ci if (!require_uniform_address_uniform(validated_shader)) 9268c2ecf20Sopenharmony_ci goto fail; 9278c2ecf20Sopenharmony_ci validated_shader->uniforms_size += 4; 9288c2ecf20Sopenharmony_ci } 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci /* Again, no chance of integer overflow here because the worst case 9318c2ecf20Sopenharmony_ci * scenario is 8 bytes of uniforms plus handles per 8-byte 9328c2ecf20Sopenharmony_ci * instruction. 9338c2ecf20Sopenharmony_ci */ 9348c2ecf20Sopenharmony_ci validated_shader->uniforms_src_size = 9358c2ecf20Sopenharmony_ci (validated_shader->uniforms_size + 9368c2ecf20Sopenharmony_ci 4 * validated_shader->num_texture_samples); 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci kfree(validation_state.branch_targets); 9398c2ecf20Sopenharmony_ci 9408c2ecf20Sopenharmony_ci return validated_shader; 9418c2ecf20Sopenharmony_ci 9428c2ecf20Sopenharmony_cifail: 9438c2ecf20Sopenharmony_ci kfree(validation_state.branch_targets); 9448c2ecf20Sopenharmony_ci if (validated_shader) { 9458c2ecf20Sopenharmony_ci kfree(validated_shader->uniform_addr_offsets); 9468c2ecf20Sopenharmony_ci kfree(validated_shader->texture_samples); 9478c2ecf20Sopenharmony_ci kfree(validated_shader); 9488c2ecf20Sopenharmony_ci } 9498c2ecf20Sopenharmony_ci return NULL; 9508c2ecf20Sopenharmony_ci} 951