18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright © 2014 Broadcom 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 58c2ecf20Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 68c2ecf20Sopenharmony_ci * to deal in the Software without restriction, including without limitation 78c2ecf20Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 88c2ecf20Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 98c2ecf20Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 108c2ecf20Sopenharmony_ci * 118c2ecf20Sopenharmony_ci * The above copyright notice and this permission notice (including the next 128c2ecf20Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 138c2ecf20Sopenharmony_ci * Software. 148c2ecf20Sopenharmony_ci * 158c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 168c2ecf20Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 178c2ecf20Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 188c2ecf20Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 198c2ecf20Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 208c2ecf20Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 218c2ecf20Sopenharmony_ci * IN THE SOFTWARE. 228c2ecf20Sopenharmony_ci */ 238c2ecf20Sopenharmony_ci 248c2ecf20Sopenharmony_ci/** 258c2ecf20Sopenharmony_ci * DOC: Command list validator for VC4. 268c2ecf20Sopenharmony_ci * 278c2ecf20Sopenharmony_ci * Since the VC4 has no IOMMU between it and system memory, a user 288c2ecf20Sopenharmony_ci * with access to execute command lists could escalate privilege by 298c2ecf20Sopenharmony_ci * overwriting system memory (drawing to it as a framebuffer) or 308c2ecf20Sopenharmony_ci * reading system memory it shouldn't (reading it as a vertex buffer 318c2ecf20Sopenharmony_ci * or index buffer) 328c2ecf20Sopenharmony_ci * 338c2ecf20Sopenharmony_ci * We validate binner command lists to ensure that all accesses are 348c2ecf20Sopenharmony_ci * within the bounds of the GEM objects referenced by the submitted 358c2ecf20Sopenharmony_ci * job. It explicitly whitelists packets, and looks at the offsets in 368c2ecf20Sopenharmony_ci * any address fields to make sure they're contained within the BOs 378c2ecf20Sopenharmony_ci * they reference. 388c2ecf20Sopenharmony_ci * 398c2ecf20Sopenharmony_ci * Note that because CL validation is already reading the 408c2ecf20Sopenharmony_ci * user-submitted CL and writing the validated copy out to the memory 418c2ecf20Sopenharmony_ci * that the GPU will actually read, this is also where GEM relocation 428c2ecf20Sopenharmony_ci * processing (turning BO references into actual addresses for the GPU 438c2ecf20Sopenharmony_ci * to use) happens. 448c2ecf20Sopenharmony_ci */ 458c2ecf20Sopenharmony_ci 468c2ecf20Sopenharmony_ci#include "uapi/drm/vc4_drm.h" 478c2ecf20Sopenharmony_ci#include "vc4_drv.h" 488c2ecf20Sopenharmony_ci#include "vc4_packet.h" 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_ci#define VALIDATE_ARGS \ 518c2ecf20Sopenharmony_ci struct vc4_exec_info *exec, \ 528c2ecf20Sopenharmony_ci void *validated, \ 538c2ecf20Sopenharmony_ci void *untrusted 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci/** Return the width in pixels of a 64-byte microtile. */ 568c2ecf20Sopenharmony_cistatic uint32_t 578c2ecf20Sopenharmony_ciutile_width(int cpp) 588c2ecf20Sopenharmony_ci{ 598c2ecf20Sopenharmony_ci switch (cpp) { 608c2ecf20Sopenharmony_ci case 1: 618c2ecf20Sopenharmony_ci case 2: 628c2ecf20Sopenharmony_ci return 8; 638c2ecf20Sopenharmony_ci case 4: 648c2ecf20Sopenharmony_ci return 4; 658c2ecf20Sopenharmony_ci case 8: 668c2ecf20Sopenharmony_ci return 2; 678c2ecf20Sopenharmony_ci default: 688c2ecf20Sopenharmony_ci DRM_ERROR("unknown cpp: %d\n", cpp); 698c2ecf20Sopenharmony_ci return 1; 708c2ecf20Sopenharmony_ci } 718c2ecf20Sopenharmony_ci} 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_ci/** Return the height in pixels of a 64-byte microtile. */ 748c2ecf20Sopenharmony_cistatic uint32_t 758c2ecf20Sopenharmony_ciutile_height(int cpp) 768c2ecf20Sopenharmony_ci{ 778c2ecf20Sopenharmony_ci switch (cpp) { 788c2ecf20Sopenharmony_ci case 1: 798c2ecf20Sopenharmony_ci return 8; 808c2ecf20Sopenharmony_ci case 2: 818c2ecf20Sopenharmony_ci case 4: 828c2ecf20Sopenharmony_ci case 8: 838c2ecf20Sopenharmony_ci return 4; 848c2ecf20Sopenharmony_ci default: 858c2ecf20Sopenharmony_ci DRM_ERROR("unknown cpp: %d\n", cpp); 868c2ecf20Sopenharmony_ci return 1; 878c2ecf20Sopenharmony_ci } 888c2ecf20Sopenharmony_ci} 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci/** 918c2ecf20Sopenharmony_ci * size_is_lt() - Returns whether a miplevel of the given size will 928c2ecf20Sopenharmony_ci * use the lineartile (LT) tiling layout rather than the normal T 938c2ecf20Sopenharmony_ci * tiling layout. 948c2ecf20Sopenharmony_ci * @width: Width in pixels of the miplevel 958c2ecf20Sopenharmony_ci * @height: Height in pixels of the miplevel 968c2ecf20Sopenharmony_ci * @cpp: Bytes per pixel of the pixel format 978c2ecf20Sopenharmony_ci */ 988c2ecf20Sopenharmony_cistatic bool 998c2ecf20Sopenharmony_cisize_is_lt(uint32_t width, uint32_t height, int cpp) 1008c2ecf20Sopenharmony_ci{ 1018c2ecf20Sopenharmony_ci return (width <= 4 * utile_width(cpp) || 1028c2ecf20Sopenharmony_ci height <= 4 * utile_height(cpp)); 1038c2ecf20Sopenharmony_ci} 1048c2ecf20Sopenharmony_ci 1058c2ecf20Sopenharmony_cistruct drm_gem_cma_object * 1068c2ecf20Sopenharmony_civc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex) 1078c2ecf20Sopenharmony_ci{ 1088c2ecf20Sopenharmony_ci struct drm_gem_cma_object *obj; 1098c2ecf20Sopenharmony_ci struct vc4_bo *bo; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci if (hindex >= exec->bo_count) { 1128c2ecf20Sopenharmony_ci DRM_DEBUG("BO index %d greater than BO count %d\n", 1138c2ecf20Sopenharmony_ci hindex, exec->bo_count); 1148c2ecf20Sopenharmony_ci return NULL; 1158c2ecf20Sopenharmony_ci } 1168c2ecf20Sopenharmony_ci obj = exec->bo[hindex]; 1178c2ecf20Sopenharmony_ci bo = to_vc4_bo(&obj->base); 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci if (bo->validated_shader) { 1208c2ecf20Sopenharmony_ci DRM_DEBUG("Trying to use shader BO as something other than " 1218c2ecf20Sopenharmony_ci "a shader\n"); 1228c2ecf20Sopenharmony_ci return NULL; 1238c2ecf20Sopenharmony_ci } 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_ci return obj; 1268c2ecf20Sopenharmony_ci} 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_cistatic struct drm_gem_cma_object * 1298c2ecf20Sopenharmony_civc4_use_handle(struct vc4_exec_info *exec, uint32_t gem_handles_packet_index) 1308c2ecf20Sopenharmony_ci{ 1318c2ecf20Sopenharmony_ci return vc4_use_bo(exec, exec->bo_index[gem_handles_packet_index]); 1328c2ecf20Sopenharmony_ci} 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_cistatic bool 1358c2ecf20Sopenharmony_civalidate_bin_pos(struct vc4_exec_info *exec, void *untrusted, uint32_t pos) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci /* Note that the untrusted pointer passed to these functions is 1388c2ecf20Sopenharmony_ci * incremented past the packet byte. 1398c2ecf20Sopenharmony_ci */ 1408c2ecf20Sopenharmony_ci return (untrusted - 1 == exec->bin_u + pos); 1418c2ecf20Sopenharmony_ci} 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_cistatic uint32_t 1448c2ecf20Sopenharmony_cigl_shader_rec_size(uint32_t pointer_bits) 1458c2ecf20Sopenharmony_ci{ 1468c2ecf20Sopenharmony_ci uint32_t attribute_count = pointer_bits & 7; 1478c2ecf20Sopenharmony_ci bool extended = pointer_bits & 8; 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci if (attribute_count == 0) 1508c2ecf20Sopenharmony_ci attribute_count = 8; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci if (extended) 1538c2ecf20Sopenharmony_ci return 100 + attribute_count * 4; 1548c2ecf20Sopenharmony_ci else 1558c2ecf20Sopenharmony_ci return 36 + attribute_count * 8; 1568c2ecf20Sopenharmony_ci} 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_cibool 1598c2ecf20Sopenharmony_civc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo, 1608c2ecf20Sopenharmony_ci uint32_t offset, uint8_t tiling_format, 1618c2ecf20Sopenharmony_ci uint32_t width, uint32_t height, uint8_t cpp) 1628c2ecf20Sopenharmony_ci{ 1638c2ecf20Sopenharmony_ci uint32_t aligned_width, aligned_height, stride, size; 1648c2ecf20Sopenharmony_ci uint32_t utile_w = utile_width(cpp); 1658c2ecf20Sopenharmony_ci uint32_t utile_h = utile_height(cpp); 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci /* The shaded vertex format stores signed 12.4 fixed point 1688c2ecf20Sopenharmony_ci * (-2048,2047) offsets from the viewport center, so we should 1698c2ecf20Sopenharmony_ci * never have a render target larger than 4096. The texture 1708c2ecf20Sopenharmony_ci * unit can only sample from 2048x2048, so it's even more 1718c2ecf20Sopenharmony_ci * restricted. This lets us avoid worrying about overflow in 1728c2ecf20Sopenharmony_ci * our math. 1738c2ecf20Sopenharmony_ci */ 1748c2ecf20Sopenharmony_ci if (width > 4096 || height > 4096) { 1758c2ecf20Sopenharmony_ci DRM_DEBUG("Surface dimensions (%d,%d) too large", 1768c2ecf20Sopenharmony_ci width, height); 1778c2ecf20Sopenharmony_ci return false; 1788c2ecf20Sopenharmony_ci } 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_ci switch (tiling_format) { 1818c2ecf20Sopenharmony_ci case VC4_TILING_FORMAT_LINEAR: 1828c2ecf20Sopenharmony_ci aligned_width = round_up(width, utile_w); 1838c2ecf20Sopenharmony_ci aligned_height = height; 1848c2ecf20Sopenharmony_ci break; 1858c2ecf20Sopenharmony_ci case VC4_TILING_FORMAT_T: 1868c2ecf20Sopenharmony_ci aligned_width = round_up(width, utile_w * 8); 1878c2ecf20Sopenharmony_ci aligned_height = round_up(height, utile_h * 8); 1888c2ecf20Sopenharmony_ci break; 1898c2ecf20Sopenharmony_ci case VC4_TILING_FORMAT_LT: 1908c2ecf20Sopenharmony_ci aligned_width = round_up(width, utile_w); 1918c2ecf20Sopenharmony_ci aligned_height = round_up(height, utile_h); 1928c2ecf20Sopenharmony_ci break; 1938c2ecf20Sopenharmony_ci default: 1948c2ecf20Sopenharmony_ci DRM_DEBUG("buffer tiling %d unsupported\n", tiling_format); 1958c2ecf20Sopenharmony_ci return false; 1968c2ecf20Sopenharmony_ci } 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci stride = aligned_width * cpp; 1998c2ecf20Sopenharmony_ci size = stride * aligned_height; 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci if (size + offset < size || 2028c2ecf20Sopenharmony_ci size + offset > fbo->base.size) { 2038c2ecf20Sopenharmony_ci DRM_DEBUG("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n", 2048c2ecf20Sopenharmony_ci width, height, 2058c2ecf20Sopenharmony_ci aligned_width, aligned_height, 2068c2ecf20Sopenharmony_ci size, offset, fbo->base.size); 2078c2ecf20Sopenharmony_ci return false; 2088c2ecf20Sopenharmony_ci } 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci return true; 2118c2ecf20Sopenharmony_ci} 2128c2ecf20Sopenharmony_ci 2138c2ecf20Sopenharmony_cistatic int 2148c2ecf20Sopenharmony_civalidate_flush(VALIDATE_ARGS) 2158c2ecf20Sopenharmony_ci{ 2168c2ecf20Sopenharmony_ci if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 1)) { 2178c2ecf20Sopenharmony_ci DRM_DEBUG("Bin CL must end with VC4_PACKET_FLUSH\n"); 2188c2ecf20Sopenharmony_ci return -EINVAL; 2198c2ecf20Sopenharmony_ci } 2208c2ecf20Sopenharmony_ci exec->found_flush = true; 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci return 0; 2238c2ecf20Sopenharmony_ci} 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_cistatic int 2268c2ecf20Sopenharmony_civalidate_start_tile_binning(VALIDATE_ARGS) 2278c2ecf20Sopenharmony_ci{ 2288c2ecf20Sopenharmony_ci if (exec->found_start_tile_binning_packet) { 2298c2ecf20Sopenharmony_ci DRM_DEBUG("Duplicate VC4_PACKET_START_TILE_BINNING\n"); 2308c2ecf20Sopenharmony_ci return -EINVAL; 2318c2ecf20Sopenharmony_ci } 2328c2ecf20Sopenharmony_ci exec->found_start_tile_binning_packet = true; 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci if (!exec->found_tile_binning_mode_config_packet) { 2358c2ecf20Sopenharmony_ci DRM_DEBUG("missing VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); 2368c2ecf20Sopenharmony_ci return -EINVAL; 2378c2ecf20Sopenharmony_ci } 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci return 0; 2408c2ecf20Sopenharmony_ci} 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_cistatic int 2438c2ecf20Sopenharmony_civalidate_increment_semaphore(VALIDATE_ARGS) 2448c2ecf20Sopenharmony_ci{ 2458c2ecf20Sopenharmony_ci if (!validate_bin_pos(exec, untrusted, exec->args->bin_cl_size - 2)) { 2468c2ecf20Sopenharmony_ci DRM_DEBUG("Bin CL must end with " 2478c2ecf20Sopenharmony_ci "VC4_PACKET_INCREMENT_SEMAPHORE\n"); 2488c2ecf20Sopenharmony_ci return -EINVAL; 2498c2ecf20Sopenharmony_ci } 2508c2ecf20Sopenharmony_ci exec->found_increment_semaphore_packet = true; 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci return 0; 2538c2ecf20Sopenharmony_ci} 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_cistatic int 2568c2ecf20Sopenharmony_civalidate_indexed_prim_list(VALIDATE_ARGS) 2578c2ecf20Sopenharmony_ci{ 2588c2ecf20Sopenharmony_ci struct drm_gem_cma_object *ib; 2598c2ecf20Sopenharmony_ci uint32_t length = *(uint32_t *)(untrusted + 1); 2608c2ecf20Sopenharmony_ci uint32_t offset = *(uint32_t *)(untrusted + 5); 2618c2ecf20Sopenharmony_ci uint32_t max_index = *(uint32_t *)(untrusted + 9); 2628c2ecf20Sopenharmony_ci uint32_t index_size = (*(uint8_t *)(untrusted + 0) >> 4) ? 2 : 1; 2638c2ecf20Sopenharmony_ci struct vc4_shader_state *shader_state; 2648c2ecf20Sopenharmony_ci 2658c2ecf20Sopenharmony_ci /* Check overflow condition */ 2668c2ecf20Sopenharmony_ci if (exec->shader_state_count == 0) { 2678c2ecf20Sopenharmony_ci DRM_DEBUG("shader state must precede primitives\n"); 2688c2ecf20Sopenharmony_ci return -EINVAL; 2698c2ecf20Sopenharmony_ci } 2708c2ecf20Sopenharmony_ci shader_state = &exec->shader_state[exec->shader_state_count - 1]; 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci if (max_index > shader_state->max_index) 2738c2ecf20Sopenharmony_ci shader_state->max_index = max_index; 2748c2ecf20Sopenharmony_ci 2758c2ecf20Sopenharmony_ci ib = vc4_use_handle(exec, 0); 2768c2ecf20Sopenharmony_ci if (!ib) 2778c2ecf20Sopenharmony_ci return -EINVAL; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci exec->bin_dep_seqno = max(exec->bin_dep_seqno, 2808c2ecf20Sopenharmony_ci to_vc4_bo(&ib->base)->write_seqno); 2818c2ecf20Sopenharmony_ci 2828c2ecf20Sopenharmony_ci if (offset > ib->base.size || 2838c2ecf20Sopenharmony_ci (ib->base.size - offset) / index_size < length) { 2848c2ecf20Sopenharmony_ci DRM_DEBUG("IB access overflow (%d + %d*%d > %zd)\n", 2858c2ecf20Sopenharmony_ci offset, length, index_size, ib->base.size); 2868c2ecf20Sopenharmony_ci return -EINVAL; 2878c2ecf20Sopenharmony_ci } 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci *(uint32_t *)(validated + 5) = ib->paddr + offset; 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci return 0; 2928c2ecf20Sopenharmony_ci} 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_cistatic int 2958c2ecf20Sopenharmony_civalidate_gl_array_primitive(VALIDATE_ARGS) 2968c2ecf20Sopenharmony_ci{ 2978c2ecf20Sopenharmony_ci uint32_t length = *(uint32_t *)(untrusted + 1); 2988c2ecf20Sopenharmony_ci uint32_t base_index = *(uint32_t *)(untrusted + 5); 2998c2ecf20Sopenharmony_ci uint32_t max_index; 3008c2ecf20Sopenharmony_ci struct vc4_shader_state *shader_state; 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci /* Check overflow condition */ 3038c2ecf20Sopenharmony_ci if (exec->shader_state_count == 0) { 3048c2ecf20Sopenharmony_ci DRM_DEBUG("shader state must precede primitives\n"); 3058c2ecf20Sopenharmony_ci return -EINVAL; 3068c2ecf20Sopenharmony_ci } 3078c2ecf20Sopenharmony_ci shader_state = &exec->shader_state[exec->shader_state_count - 1]; 3088c2ecf20Sopenharmony_ci 3098c2ecf20Sopenharmony_ci if (length + base_index < length) { 3108c2ecf20Sopenharmony_ci DRM_DEBUG("primitive vertex count overflow\n"); 3118c2ecf20Sopenharmony_ci return -EINVAL; 3128c2ecf20Sopenharmony_ci } 3138c2ecf20Sopenharmony_ci max_index = length + base_index - 1; 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci if (max_index > shader_state->max_index) 3168c2ecf20Sopenharmony_ci shader_state->max_index = max_index; 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ci return 0; 3198c2ecf20Sopenharmony_ci} 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_cistatic int 3228c2ecf20Sopenharmony_civalidate_gl_shader_state(VALIDATE_ARGS) 3238c2ecf20Sopenharmony_ci{ 3248c2ecf20Sopenharmony_ci uint32_t i = exec->shader_state_count++; 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci if (i >= exec->shader_state_size) { 3278c2ecf20Sopenharmony_ci DRM_DEBUG("More requests for shader states than declared\n"); 3288c2ecf20Sopenharmony_ci return -EINVAL; 3298c2ecf20Sopenharmony_ci } 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci exec->shader_state[i].addr = *(uint32_t *)untrusted; 3328c2ecf20Sopenharmony_ci exec->shader_state[i].max_index = 0; 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci if (exec->shader_state[i].addr & ~0xf) { 3358c2ecf20Sopenharmony_ci DRM_DEBUG("high bits set in GL shader rec reference\n"); 3368c2ecf20Sopenharmony_ci return -EINVAL; 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci *(uint32_t *)validated = (exec->shader_rec_p + 3408c2ecf20Sopenharmony_ci exec->shader_state[i].addr); 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci exec->shader_rec_p += 3438c2ecf20Sopenharmony_ci roundup(gl_shader_rec_size(exec->shader_state[i].addr), 16); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci return 0; 3468c2ecf20Sopenharmony_ci} 3478c2ecf20Sopenharmony_ci 3488c2ecf20Sopenharmony_cistatic int 3498c2ecf20Sopenharmony_civalidate_tile_binning_config(VALIDATE_ARGS) 3508c2ecf20Sopenharmony_ci{ 3518c2ecf20Sopenharmony_ci struct drm_device *dev = exec->exec_bo->base.dev; 3528c2ecf20Sopenharmony_ci struct vc4_dev *vc4 = to_vc4_dev(dev); 3538c2ecf20Sopenharmony_ci uint8_t flags; 3548c2ecf20Sopenharmony_ci uint32_t tile_state_size; 3558c2ecf20Sopenharmony_ci uint32_t tile_count, bin_addr; 3568c2ecf20Sopenharmony_ci int bin_slot; 3578c2ecf20Sopenharmony_ci 3588c2ecf20Sopenharmony_ci if (exec->found_tile_binning_mode_config_packet) { 3598c2ecf20Sopenharmony_ci DRM_DEBUG("Duplicate VC4_PACKET_TILE_BINNING_MODE_CONFIG\n"); 3608c2ecf20Sopenharmony_ci return -EINVAL; 3618c2ecf20Sopenharmony_ci } 3628c2ecf20Sopenharmony_ci exec->found_tile_binning_mode_config_packet = true; 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_ci exec->bin_tiles_x = *(uint8_t *)(untrusted + 12); 3658c2ecf20Sopenharmony_ci exec->bin_tiles_y = *(uint8_t *)(untrusted + 13); 3668c2ecf20Sopenharmony_ci tile_count = exec->bin_tiles_x * exec->bin_tiles_y; 3678c2ecf20Sopenharmony_ci flags = *(uint8_t *)(untrusted + 14); 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_ci if (exec->bin_tiles_x == 0 || 3708c2ecf20Sopenharmony_ci exec->bin_tiles_y == 0) { 3718c2ecf20Sopenharmony_ci DRM_DEBUG("Tile binning config of %dx%d too small\n", 3728c2ecf20Sopenharmony_ci exec->bin_tiles_x, exec->bin_tiles_y); 3738c2ecf20Sopenharmony_ci return -EINVAL; 3748c2ecf20Sopenharmony_ci } 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci if (flags & (VC4_BIN_CONFIG_DB_NON_MS | 3778c2ecf20Sopenharmony_ci VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) { 3788c2ecf20Sopenharmony_ci DRM_DEBUG("unsupported binning config flags 0x%02x\n", flags); 3798c2ecf20Sopenharmony_ci return -EINVAL; 3808c2ecf20Sopenharmony_ci } 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci bin_slot = vc4_v3d_get_bin_slot(vc4); 3838c2ecf20Sopenharmony_ci if (bin_slot < 0) { 3848c2ecf20Sopenharmony_ci if (bin_slot != -EINTR && bin_slot != -ERESTARTSYS) { 3858c2ecf20Sopenharmony_ci DRM_ERROR("Failed to allocate binner memory: %d\n", 3868c2ecf20Sopenharmony_ci bin_slot); 3878c2ecf20Sopenharmony_ci } 3888c2ecf20Sopenharmony_ci return bin_slot; 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci /* The slot we allocated will only be used by this job, and is 3928c2ecf20Sopenharmony_ci * free when the job completes rendering. 3938c2ecf20Sopenharmony_ci */ 3948c2ecf20Sopenharmony_ci exec->bin_slots |= BIT(bin_slot); 3958c2ecf20Sopenharmony_ci bin_addr = vc4->bin_bo->base.paddr + bin_slot * vc4->bin_alloc_size; 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci /* The tile state data array is 48 bytes per tile, and we put it at 3988c2ecf20Sopenharmony_ci * the start of a BO containing both it and the tile alloc. 3998c2ecf20Sopenharmony_ci */ 4008c2ecf20Sopenharmony_ci tile_state_size = 48 * tile_count; 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci /* Since the tile alloc array will follow us, align. */ 4038c2ecf20Sopenharmony_ci exec->tile_alloc_offset = bin_addr + roundup(tile_state_size, 4096); 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci *(uint8_t *)(validated + 14) = 4068c2ecf20Sopenharmony_ci ((flags & ~(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_MASK | 4078c2ecf20Sopenharmony_ci VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_MASK)) | 4088c2ecf20Sopenharmony_ci VC4_BIN_CONFIG_AUTO_INIT_TSDA | 4098c2ecf20Sopenharmony_ci VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE_32, 4108c2ecf20Sopenharmony_ci VC4_BIN_CONFIG_ALLOC_INIT_BLOCK_SIZE) | 4118c2ecf20Sopenharmony_ci VC4_SET_FIELD(VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE_128, 4128c2ecf20Sopenharmony_ci VC4_BIN_CONFIG_ALLOC_BLOCK_SIZE)); 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci /* tile alloc address. */ 4158c2ecf20Sopenharmony_ci *(uint32_t *)(validated + 0) = exec->tile_alloc_offset; 4168c2ecf20Sopenharmony_ci /* tile alloc size. */ 4178c2ecf20Sopenharmony_ci *(uint32_t *)(validated + 4) = (bin_addr + vc4->bin_alloc_size - 4188c2ecf20Sopenharmony_ci exec->tile_alloc_offset); 4198c2ecf20Sopenharmony_ci /* tile state address. */ 4208c2ecf20Sopenharmony_ci *(uint32_t *)(validated + 8) = bin_addr; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci return 0; 4238c2ecf20Sopenharmony_ci} 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_cistatic int 4268c2ecf20Sopenharmony_civalidate_gem_handles(VALIDATE_ARGS) 4278c2ecf20Sopenharmony_ci{ 4288c2ecf20Sopenharmony_ci memcpy(exec->bo_index, untrusted, sizeof(exec->bo_index)); 4298c2ecf20Sopenharmony_ci return 0; 4308c2ecf20Sopenharmony_ci} 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci#define VC4_DEFINE_PACKET(packet, func) \ 4338c2ecf20Sopenharmony_ci [packet] = { packet ## _SIZE, #packet, func } 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_cistatic const struct cmd_info { 4368c2ecf20Sopenharmony_ci uint16_t len; 4378c2ecf20Sopenharmony_ci const char *name; 4388c2ecf20Sopenharmony_ci int (*func)(struct vc4_exec_info *exec, void *validated, 4398c2ecf20Sopenharmony_ci void *untrusted); 4408c2ecf20Sopenharmony_ci} cmd_info[] = { 4418c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL), 4428c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL), 4438c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush), 4448c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL), 4458c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, 4468c2ecf20Sopenharmony_ci validate_start_tile_binning), 4478c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, 4488c2ecf20Sopenharmony_ci validate_increment_semaphore), 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, 4518c2ecf20Sopenharmony_ci validate_indexed_prim_list), 4528c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, 4538c2ecf20Sopenharmony_ci validate_gl_array_primitive), 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL), 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state), 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL), 4608c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL), 4618c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL), 4628c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL), 4638c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL), 4648c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL), 4658c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL), 4668c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL), 4678c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL), 4688c2ecf20Sopenharmony_ci /* Note: The docs say this was also 105, but it was 106 in the 4698c2ecf20Sopenharmony_ci * initial userland code drop. 4708c2ecf20Sopenharmony_ci */ 4718c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL), 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, 4748c2ecf20Sopenharmony_ci validate_tile_binning_config), 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles), 4778c2ecf20Sopenharmony_ci}; 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ciint 4808c2ecf20Sopenharmony_civc4_validate_bin_cl(struct drm_device *dev, 4818c2ecf20Sopenharmony_ci void *validated, 4828c2ecf20Sopenharmony_ci void *unvalidated, 4838c2ecf20Sopenharmony_ci struct vc4_exec_info *exec) 4848c2ecf20Sopenharmony_ci{ 4858c2ecf20Sopenharmony_ci uint32_t len = exec->args->bin_cl_size; 4868c2ecf20Sopenharmony_ci uint32_t dst_offset = 0; 4878c2ecf20Sopenharmony_ci uint32_t src_offset = 0; 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci while (src_offset < len) { 4908c2ecf20Sopenharmony_ci void *dst_pkt = validated + dst_offset; 4918c2ecf20Sopenharmony_ci void *src_pkt = unvalidated + src_offset; 4928c2ecf20Sopenharmony_ci u8 cmd = *(uint8_t *)src_pkt; 4938c2ecf20Sopenharmony_ci const struct cmd_info *info; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci if (cmd >= ARRAY_SIZE(cmd_info)) { 4968c2ecf20Sopenharmony_ci DRM_DEBUG("0x%08x: packet %d out of bounds\n", 4978c2ecf20Sopenharmony_ci src_offset, cmd); 4988c2ecf20Sopenharmony_ci return -EINVAL; 4998c2ecf20Sopenharmony_ci } 5008c2ecf20Sopenharmony_ci 5018c2ecf20Sopenharmony_ci info = &cmd_info[cmd]; 5028c2ecf20Sopenharmony_ci if (!info->name) { 5038c2ecf20Sopenharmony_ci DRM_DEBUG("0x%08x: packet %d invalid\n", 5048c2ecf20Sopenharmony_ci src_offset, cmd); 5058c2ecf20Sopenharmony_ci return -EINVAL; 5068c2ecf20Sopenharmony_ci } 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_ci if (src_offset + info->len > len) { 5098c2ecf20Sopenharmony_ci DRM_DEBUG("0x%08x: packet %d (%s) length 0x%08x " 5108c2ecf20Sopenharmony_ci "exceeds bounds (0x%08x)\n", 5118c2ecf20Sopenharmony_ci src_offset, cmd, info->name, info->len, 5128c2ecf20Sopenharmony_ci src_offset + len); 5138c2ecf20Sopenharmony_ci return -EINVAL; 5148c2ecf20Sopenharmony_ci } 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci if (cmd != VC4_PACKET_GEM_HANDLES) 5178c2ecf20Sopenharmony_ci memcpy(dst_pkt, src_pkt, info->len); 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci if (info->func && info->func(exec, 5208c2ecf20Sopenharmony_ci dst_pkt + 1, 5218c2ecf20Sopenharmony_ci src_pkt + 1)) { 5228c2ecf20Sopenharmony_ci DRM_DEBUG("0x%08x: packet %d (%s) failed to validate\n", 5238c2ecf20Sopenharmony_ci src_offset, cmd, info->name); 5248c2ecf20Sopenharmony_ci return -EINVAL; 5258c2ecf20Sopenharmony_ci } 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci src_offset += info->len; 5288c2ecf20Sopenharmony_ci /* GEM handle loading doesn't produce HW packets. */ 5298c2ecf20Sopenharmony_ci if (cmd != VC4_PACKET_GEM_HANDLES) 5308c2ecf20Sopenharmony_ci dst_offset += info->len; 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci /* When the CL hits halt, it'll stop reading anything else. */ 5338c2ecf20Sopenharmony_ci if (cmd == VC4_PACKET_HALT) 5348c2ecf20Sopenharmony_ci break; 5358c2ecf20Sopenharmony_ci } 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci exec->ct0ea = exec->ct0ca + dst_offset; 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci if (!exec->found_start_tile_binning_packet) { 5408c2ecf20Sopenharmony_ci DRM_DEBUG("Bin CL missing VC4_PACKET_START_TILE_BINNING\n"); 5418c2ecf20Sopenharmony_ci return -EINVAL; 5428c2ecf20Sopenharmony_ci } 5438c2ecf20Sopenharmony_ci 5448c2ecf20Sopenharmony_ci /* The bin CL must be ended with INCREMENT_SEMAPHORE and FLUSH. The 5458c2ecf20Sopenharmony_ci * semaphore is used to trigger the render CL to start up, and the 5468c2ecf20Sopenharmony_ci * FLUSH is what caps the bin lists with 5478c2ecf20Sopenharmony_ci * VC4_PACKET_RETURN_FROM_SUB_LIST (so they jump back to the main 5488c2ecf20Sopenharmony_ci * render CL when they get called to) and actually triggers the queued 5498c2ecf20Sopenharmony_ci * semaphore increment. 5508c2ecf20Sopenharmony_ci */ 5518c2ecf20Sopenharmony_ci if (!exec->found_increment_semaphore_packet || !exec->found_flush) { 5528c2ecf20Sopenharmony_ci DRM_DEBUG("Bin CL missing VC4_PACKET_INCREMENT_SEMAPHORE + " 5538c2ecf20Sopenharmony_ci "VC4_PACKET_FLUSH\n"); 5548c2ecf20Sopenharmony_ci return -EINVAL; 5558c2ecf20Sopenharmony_ci } 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci return 0; 5588c2ecf20Sopenharmony_ci} 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_cistatic bool 5618c2ecf20Sopenharmony_cireloc_tex(struct vc4_exec_info *exec, 5628c2ecf20Sopenharmony_ci void *uniform_data_u, 5638c2ecf20Sopenharmony_ci struct vc4_texture_sample_info *sample, 5648c2ecf20Sopenharmony_ci uint32_t texture_handle_index, bool is_cs) 5658c2ecf20Sopenharmony_ci{ 5668c2ecf20Sopenharmony_ci struct drm_gem_cma_object *tex; 5678c2ecf20Sopenharmony_ci uint32_t p0 = *(uint32_t *)(uniform_data_u + sample->p_offset[0]); 5688c2ecf20Sopenharmony_ci uint32_t p1 = *(uint32_t *)(uniform_data_u + sample->p_offset[1]); 5698c2ecf20Sopenharmony_ci uint32_t p2 = (sample->p_offset[2] != ~0 ? 5708c2ecf20Sopenharmony_ci *(uint32_t *)(uniform_data_u + sample->p_offset[2]) : 0); 5718c2ecf20Sopenharmony_ci uint32_t p3 = (sample->p_offset[3] != ~0 ? 5728c2ecf20Sopenharmony_ci *(uint32_t *)(uniform_data_u + sample->p_offset[3]) : 0); 5738c2ecf20Sopenharmony_ci uint32_t *validated_p0 = exec->uniforms_v + sample->p_offset[0]; 5748c2ecf20Sopenharmony_ci uint32_t offset = p0 & VC4_TEX_P0_OFFSET_MASK; 5758c2ecf20Sopenharmony_ci uint32_t miplevels = VC4_GET_FIELD(p0, VC4_TEX_P0_MIPLVLS); 5768c2ecf20Sopenharmony_ci uint32_t width = VC4_GET_FIELD(p1, VC4_TEX_P1_WIDTH); 5778c2ecf20Sopenharmony_ci uint32_t height = VC4_GET_FIELD(p1, VC4_TEX_P1_HEIGHT); 5788c2ecf20Sopenharmony_ci uint32_t cpp, tiling_format, utile_w, utile_h; 5798c2ecf20Sopenharmony_ci uint32_t i; 5808c2ecf20Sopenharmony_ci uint32_t cube_map_stride = 0; 5818c2ecf20Sopenharmony_ci enum vc4_texture_data_type type; 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_ci tex = vc4_use_bo(exec, texture_handle_index); 5848c2ecf20Sopenharmony_ci if (!tex) 5858c2ecf20Sopenharmony_ci return false; 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_ci if (sample->is_direct) { 5888c2ecf20Sopenharmony_ci uint32_t remaining_size = tex->base.size - p0; 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci if (p0 > tex->base.size - 4) { 5918c2ecf20Sopenharmony_ci DRM_DEBUG("UBO offset greater than UBO size\n"); 5928c2ecf20Sopenharmony_ci goto fail; 5938c2ecf20Sopenharmony_ci } 5948c2ecf20Sopenharmony_ci if (p1 > remaining_size - 4) { 5958c2ecf20Sopenharmony_ci DRM_DEBUG("UBO clamp would allow reads " 5968c2ecf20Sopenharmony_ci "outside of UBO\n"); 5978c2ecf20Sopenharmony_ci goto fail; 5988c2ecf20Sopenharmony_ci } 5998c2ecf20Sopenharmony_ci *validated_p0 = tex->paddr + p0; 6008c2ecf20Sopenharmony_ci return true; 6018c2ecf20Sopenharmony_ci } 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_ci if (width == 0) 6048c2ecf20Sopenharmony_ci width = 2048; 6058c2ecf20Sopenharmony_ci if (height == 0) 6068c2ecf20Sopenharmony_ci height = 2048; 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci if (p0 & VC4_TEX_P0_CMMODE_MASK) { 6098c2ecf20Sopenharmony_ci if (VC4_GET_FIELD(p2, VC4_TEX_P2_PTYPE) == 6108c2ecf20Sopenharmony_ci VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) 6118c2ecf20Sopenharmony_ci cube_map_stride = p2 & VC4_TEX_P2_CMST_MASK; 6128c2ecf20Sopenharmony_ci if (VC4_GET_FIELD(p3, VC4_TEX_P2_PTYPE) == 6138c2ecf20Sopenharmony_ci VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE) { 6148c2ecf20Sopenharmony_ci if (cube_map_stride) { 6158c2ecf20Sopenharmony_ci DRM_DEBUG("Cube map stride set twice\n"); 6168c2ecf20Sopenharmony_ci goto fail; 6178c2ecf20Sopenharmony_ci } 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci cube_map_stride = p3 & VC4_TEX_P2_CMST_MASK; 6208c2ecf20Sopenharmony_ci } 6218c2ecf20Sopenharmony_ci if (!cube_map_stride) { 6228c2ecf20Sopenharmony_ci DRM_DEBUG("Cube map stride not set\n"); 6238c2ecf20Sopenharmony_ci goto fail; 6248c2ecf20Sopenharmony_ci } 6258c2ecf20Sopenharmony_ci } 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci type = (VC4_GET_FIELD(p0, VC4_TEX_P0_TYPE) | 6288c2ecf20Sopenharmony_ci (VC4_GET_FIELD(p1, VC4_TEX_P1_TYPE4) << 4)); 6298c2ecf20Sopenharmony_ci 6308c2ecf20Sopenharmony_ci switch (type) { 6318c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_RGBA8888: 6328c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_RGBX8888: 6338c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_RGBA32R: 6348c2ecf20Sopenharmony_ci cpp = 4; 6358c2ecf20Sopenharmony_ci break; 6368c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_RGBA4444: 6378c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_RGBA5551: 6388c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_RGB565: 6398c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_LUMALPHA: 6408c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_S16F: 6418c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_S16: 6428c2ecf20Sopenharmony_ci cpp = 2; 6438c2ecf20Sopenharmony_ci break; 6448c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_LUMINANCE: 6458c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_ALPHA: 6468c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_S8: 6478c2ecf20Sopenharmony_ci cpp = 1; 6488c2ecf20Sopenharmony_ci break; 6498c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_ETC1: 6508c2ecf20Sopenharmony_ci /* ETC1 is arranged as 64-bit blocks, where each block is 4x4 6518c2ecf20Sopenharmony_ci * pixels. 6528c2ecf20Sopenharmony_ci */ 6538c2ecf20Sopenharmony_ci cpp = 8; 6548c2ecf20Sopenharmony_ci width = (width + 3) >> 2; 6558c2ecf20Sopenharmony_ci height = (height + 3) >> 2; 6568c2ecf20Sopenharmony_ci break; 6578c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_BW1: 6588c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_A4: 6598c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_A1: 6608c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_RGBA64: 6618c2ecf20Sopenharmony_ci case VC4_TEXTURE_TYPE_YUV422R: 6628c2ecf20Sopenharmony_ci default: 6638c2ecf20Sopenharmony_ci DRM_DEBUG("Texture format %d unsupported\n", type); 6648c2ecf20Sopenharmony_ci goto fail; 6658c2ecf20Sopenharmony_ci } 6668c2ecf20Sopenharmony_ci utile_w = utile_width(cpp); 6678c2ecf20Sopenharmony_ci utile_h = utile_height(cpp); 6688c2ecf20Sopenharmony_ci 6698c2ecf20Sopenharmony_ci if (type == VC4_TEXTURE_TYPE_RGBA32R) { 6708c2ecf20Sopenharmony_ci tiling_format = VC4_TILING_FORMAT_LINEAR; 6718c2ecf20Sopenharmony_ci } else { 6728c2ecf20Sopenharmony_ci if (size_is_lt(width, height, cpp)) 6738c2ecf20Sopenharmony_ci tiling_format = VC4_TILING_FORMAT_LT; 6748c2ecf20Sopenharmony_ci else 6758c2ecf20Sopenharmony_ci tiling_format = VC4_TILING_FORMAT_T; 6768c2ecf20Sopenharmony_ci } 6778c2ecf20Sopenharmony_ci 6788c2ecf20Sopenharmony_ci if (!vc4_check_tex_size(exec, tex, offset + cube_map_stride * 5, 6798c2ecf20Sopenharmony_ci tiling_format, width, height, cpp)) { 6808c2ecf20Sopenharmony_ci goto fail; 6818c2ecf20Sopenharmony_ci } 6828c2ecf20Sopenharmony_ci 6838c2ecf20Sopenharmony_ci /* The mipmap levels are stored before the base of the texture. Make 6848c2ecf20Sopenharmony_ci * sure there is actually space in the BO. 6858c2ecf20Sopenharmony_ci */ 6868c2ecf20Sopenharmony_ci for (i = 1; i <= miplevels; i++) { 6878c2ecf20Sopenharmony_ci uint32_t level_width = max(width >> i, 1u); 6888c2ecf20Sopenharmony_ci uint32_t level_height = max(height >> i, 1u); 6898c2ecf20Sopenharmony_ci uint32_t aligned_width, aligned_height; 6908c2ecf20Sopenharmony_ci uint32_t level_size; 6918c2ecf20Sopenharmony_ci 6928c2ecf20Sopenharmony_ci /* Once the levels get small enough, they drop from T to LT. */ 6938c2ecf20Sopenharmony_ci if (tiling_format == VC4_TILING_FORMAT_T && 6948c2ecf20Sopenharmony_ci size_is_lt(level_width, level_height, cpp)) { 6958c2ecf20Sopenharmony_ci tiling_format = VC4_TILING_FORMAT_LT; 6968c2ecf20Sopenharmony_ci } 6978c2ecf20Sopenharmony_ci 6988c2ecf20Sopenharmony_ci switch (tiling_format) { 6998c2ecf20Sopenharmony_ci case VC4_TILING_FORMAT_T: 7008c2ecf20Sopenharmony_ci aligned_width = round_up(level_width, utile_w * 8); 7018c2ecf20Sopenharmony_ci aligned_height = round_up(level_height, utile_h * 8); 7028c2ecf20Sopenharmony_ci break; 7038c2ecf20Sopenharmony_ci case VC4_TILING_FORMAT_LT: 7048c2ecf20Sopenharmony_ci aligned_width = round_up(level_width, utile_w); 7058c2ecf20Sopenharmony_ci aligned_height = round_up(level_height, utile_h); 7068c2ecf20Sopenharmony_ci break; 7078c2ecf20Sopenharmony_ci default: 7088c2ecf20Sopenharmony_ci aligned_width = round_up(level_width, utile_w); 7098c2ecf20Sopenharmony_ci aligned_height = level_height; 7108c2ecf20Sopenharmony_ci break; 7118c2ecf20Sopenharmony_ci } 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci level_size = aligned_width * cpp * aligned_height; 7148c2ecf20Sopenharmony_ci 7158c2ecf20Sopenharmony_ci if (offset < level_size) { 7168c2ecf20Sopenharmony_ci DRM_DEBUG("Level %d (%dx%d -> %dx%d) size %db " 7178c2ecf20Sopenharmony_ci "overflowed buffer bounds (offset %d)\n", 7188c2ecf20Sopenharmony_ci i, level_width, level_height, 7198c2ecf20Sopenharmony_ci aligned_width, aligned_height, 7208c2ecf20Sopenharmony_ci level_size, offset); 7218c2ecf20Sopenharmony_ci goto fail; 7228c2ecf20Sopenharmony_ci } 7238c2ecf20Sopenharmony_ci 7248c2ecf20Sopenharmony_ci offset -= level_size; 7258c2ecf20Sopenharmony_ci } 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci *validated_p0 = tex->paddr + p0; 7288c2ecf20Sopenharmony_ci 7298c2ecf20Sopenharmony_ci if (is_cs) { 7308c2ecf20Sopenharmony_ci exec->bin_dep_seqno = max(exec->bin_dep_seqno, 7318c2ecf20Sopenharmony_ci to_vc4_bo(&tex->base)->write_seqno); 7328c2ecf20Sopenharmony_ci } 7338c2ecf20Sopenharmony_ci 7348c2ecf20Sopenharmony_ci return true; 7358c2ecf20Sopenharmony_ci fail: 7368c2ecf20Sopenharmony_ci DRM_INFO("Texture p0 at %d: 0x%08x\n", sample->p_offset[0], p0); 7378c2ecf20Sopenharmony_ci DRM_INFO("Texture p1 at %d: 0x%08x\n", sample->p_offset[1], p1); 7388c2ecf20Sopenharmony_ci DRM_INFO("Texture p2 at %d: 0x%08x\n", sample->p_offset[2], p2); 7398c2ecf20Sopenharmony_ci DRM_INFO("Texture p3 at %d: 0x%08x\n", sample->p_offset[3], p3); 7408c2ecf20Sopenharmony_ci return false; 7418c2ecf20Sopenharmony_ci} 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_cistatic int 7448c2ecf20Sopenharmony_civalidate_gl_shader_rec(struct drm_device *dev, 7458c2ecf20Sopenharmony_ci struct vc4_exec_info *exec, 7468c2ecf20Sopenharmony_ci struct vc4_shader_state *state) 7478c2ecf20Sopenharmony_ci{ 7488c2ecf20Sopenharmony_ci uint32_t *src_handles; 7498c2ecf20Sopenharmony_ci void *pkt_u, *pkt_v; 7508c2ecf20Sopenharmony_ci static const uint32_t shader_reloc_offsets[] = { 7518c2ecf20Sopenharmony_ci 4, /* fs */ 7528c2ecf20Sopenharmony_ci 16, /* vs */ 7538c2ecf20Sopenharmony_ci 28, /* cs */ 7548c2ecf20Sopenharmony_ci }; 7558c2ecf20Sopenharmony_ci uint32_t shader_reloc_count = ARRAY_SIZE(shader_reloc_offsets); 7568c2ecf20Sopenharmony_ci struct drm_gem_cma_object *bo[ARRAY_SIZE(shader_reloc_offsets) + 8]; 7578c2ecf20Sopenharmony_ci uint32_t nr_attributes, nr_relocs, packet_size; 7588c2ecf20Sopenharmony_ci int i; 7598c2ecf20Sopenharmony_ci 7608c2ecf20Sopenharmony_ci nr_attributes = state->addr & 0x7; 7618c2ecf20Sopenharmony_ci if (nr_attributes == 0) 7628c2ecf20Sopenharmony_ci nr_attributes = 8; 7638c2ecf20Sopenharmony_ci packet_size = gl_shader_rec_size(state->addr); 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci nr_relocs = ARRAY_SIZE(shader_reloc_offsets) + nr_attributes; 7668c2ecf20Sopenharmony_ci if (nr_relocs * 4 > exec->shader_rec_size) { 7678c2ecf20Sopenharmony_ci DRM_DEBUG("overflowed shader recs reading %d handles " 7688c2ecf20Sopenharmony_ci "from %d bytes left\n", 7698c2ecf20Sopenharmony_ci nr_relocs, exec->shader_rec_size); 7708c2ecf20Sopenharmony_ci return -EINVAL; 7718c2ecf20Sopenharmony_ci } 7728c2ecf20Sopenharmony_ci src_handles = exec->shader_rec_u; 7738c2ecf20Sopenharmony_ci exec->shader_rec_u += nr_relocs * 4; 7748c2ecf20Sopenharmony_ci exec->shader_rec_size -= nr_relocs * 4; 7758c2ecf20Sopenharmony_ci 7768c2ecf20Sopenharmony_ci if (packet_size > exec->shader_rec_size) { 7778c2ecf20Sopenharmony_ci DRM_DEBUG("overflowed shader recs copying %db packet " 7788c2ecf20Sopenharmony_ci "from %d bytes left\n", 7798c2ecf20Sopenharmony_ci packet_size, exec->shader_rec_size); 7808c2ecf20Sopenharmony_ci return -EINVAL; 7818c2ecf20Sopenharmony_ci } 7828c2ecf20Sopenharmony_ci pkt_u = exec->shader_rec_u; 7838c2ecf20Sopenharmony_ci pkt_v = exec->shader_rec_v; 7848c2ecf20Sopenharmony_ci memcpy(pkt_v, pkt_u, packet_size); 7858c2ecf20Sopenharmony_ci exec->shader_rec_u += packet_size; 7868c2ecf20Sopenharmony_ci /* Shader recs have to be aligned to 16 bytes (due to the attribute 7878c2ecf20Sopenharmony_ci * flags being in the low bytes), so round the next validated shader 7888c2ecf20Sopenharmony_ci * rec address up. This should be safe, since we've got so many 7898c2ecf20Sopenharmony_ci * relocations in a shader rec packet. 7908c2ecf20Sopenharmony_ci */ 7918c2ecf20Sopenharmony_ci BUG_ON(roundup(packet_size, 16) - packet_size > nr_relocs * 4); 7928c2ecf20Sopenharmony_ci exec->shader_rec_v += roundup(packet_size, 16); 7938c2ecf20Sopenharmony_ci exec->shader_rec_size -= packet_size; 7948c2ecf20Sopenharmony_ci 7958c2ecf20Sopenharmony_ci for (i = 0; i < shader_reloc_count; i++) { 7968c2ecf20Sopenharmony_ci if (src_handles[i] > exec->bo_count) { 7978c2ecf20Sopenharmony_ci DRM_DEBUG("Shader handle %d too big\n", src_handles[i]); 7988c2ecf20Sopenharmony_ci return -EINVAL; 7998c2ecf20Sopenharmony_ci } 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci bo[i] = exec->bo[src_handles[i]]; 8028c2ecf20Sopenharmony_ci if (!bo[i]) 8038c2ecf20Sopenharmony_ci return -EINVAL; 8048c2ecf20Sopenharmony_ci } 8058c2ecf20Sopenharmony_ci for (i = shader_reloc_count; i < nr_relocs; i++) { 8068c2ecf20Sopenharmony_ci bo[i] = vc4_use_bo(exec, src_handles[i]); 8078c2ecf20Sopenharmony_ci if (!bo[i]) 8088c2ecf20Sopenharmony_ci return -EINVAL; 8098c2ecf20Sopenharmony_ci } 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) != 8128c2ecf20Sopenharmony_ci to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) { 8138c2ecf20Sopenharmony_ci DRM_DEBUG("Thread mode of CL and FS do not match\n"); 8148c2ecf20Sopenharmony_ci return -EINVAL; 8158c2ecf20Sopenharmony_ci } 8168c2ecf20Sopenharmony_ci 8178c2ecf20Sopenharmony_ci if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded || 8188c2ecf20Sopenharmony_ci to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) { 8198c2ecf20Sopenharmony_ci DRM_DEBUG("cs and vs cannot be threaded\n"); 8208c2ecf20Sopenharmony_ci return -EINVAL; 8218c2ecf20Sopenharmony_ci } 8228c2ecf20Sopenharmony_ci 8238c2ecf20Sopenharmony_ci for (i = 0; i < shader_reloc_count; i++) { 8248c2ecf20Sopenharmony_ci struct vc4_validated_shader_info *validated_shader; 8258c2ecf20Sopenharmony_ci uint32_t o = shader_reloc_offsets[i]; 8268c2ecf20Sopenharmony_ci uint32_t src_offset = *(uint32_t *)(pkt_u + o); 8278c2ecf20Sopenharmony_ci uint32_t *texture_handles_u; 8288c2ecf20Sopenharmony_ci void *uniform_data_u; 8298c2ecf20Sopenharmony_ci uint32_t tex, uni; 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_ci *(uint32_t *)(pkt_v + o) = bo[i]->paddr + src_offset; 8328c2ecf20Sopenharmony_ci 8338c2ecf20Sopenharmony_ci if (src_offset != 0) { 8348c2ecf20Sopenharmony_ci DRM_DEBUG("Shaders must be at offset 0 of " 8358c2ecf20Sopenharmony_ci "the BO.\n"); 8368c2ecf20Sopenharmony_ci return -EINVAL; 8378c2ecf20Sopenharmony_ci } 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci validated_shader = to_vc4_bo(&bo[i]->base)->validated_shader; 8408c2ecf20Sopenharmony_ci if (!validated_shader) 8418c2ecf20Sopenharmony_ci return -EINVAL; 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_ci if (validated_shader->uniforms_src_size > 8448c2ecf20Sopenharmony_ci exec->uniforms_size) { 8458c2ecf20Sopenharmony_ci DRM_DEBUG("Uniforms src buffer overflow\n"); 8468c2ecf20Sopenharmony_ci return -EINVAL; 8478c2ecf20Sopenharmony_ci } 8488c2ecf20Sopenharmony_ci 8498c2ecf20Sopenharmony_ci texture_handles_u = exec->uniforms_u; 8508c2ecf20Sopenharmony_ci uniform_data_u = (texture_handles_u + 8518c2ecf20Sopenharmony_ci validated_shader->num_texture_samples); 8528c2ecf20Sopenharmony_ci 8538c2ecf20Sopenharmony_ci memcpy(exec->uniforms_v, uniform_data_u, 8548c2ecf20Sopenharmony_ci validated_shader->uniforms_size); 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci for (tex = 0; 8578c2ecf20Sopenharmony_ci tex < validated_shader->num_texture_samples; 8588c2ecf20Sopenharmony_ci tex++) { 8598c2ecf20Sopenharmony_ci if (!reloc_tex(exec, 8608c2ecf20Sopenharmony_ci uniform_data_u, 8618c2ecf20Sopenharmony_ci &validated_shader->texture_samples[tex], 8628c2ecf20Sopenharmony_ci texture_handles_u[tex], 8638c2ecf20Sopenharmony_ci i == 2)) { 8648c2ecf20Sopenharmony_ci return -EINVAL; 8658c2ecf20Sopenharmony_ci } 8668c2ecf20Sopenharmony_ci } 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci /* Fill in the uniform slots that need this shader's 8698c2ecf20Sopenharmony_ci * start-of-uniforms address (used for resetting the uniform 8708c2ecf20Sopenharmony_ci * stream in the presence of control flow). 8718c2ecf20Sopenharmony_ci */ 8728c2ecf20Sopenharmony_ci for (uni = 0; 8738c2ecf20Sopenharmony_ci uni < validated_shader->num_uniform_addr_offsets; 8748c2ecf20Sopenharmony_ci uni++) { 8758c2ecf20Sopenharmony_ci uint32_t o = validated_shader->uniform_addr_offsets[uni]; 8768c2ecf20Sopenharmony_ci ((uint32_t *)exec->uniforms_v)[o] = exec->uniforms_p; 8778c2ecf20Sopenharmony_ci } 8788c2ecf20Sopenharmony_ci 8798c2ecf20Sopenharmony_ci *(uint32_t *)(pkt_v + o + 4) = exec->uniforms_p; 8808c2ecf20Sopenharmony_ci 8818c2ecf20Sopenharmony_ci exec->uniforms_u += validated_shader->uniforms_src_size; 8828c2ecf20Sopenharmony_ci exec->uniforms_v += validated_shader->uniforms_size; 8838c2ecf20Sopenharmony_ci exec->uniforms_p += validated_shader->uniforms_size; 8848c2ecf20Sopenharmony_ci } 8858c2ecf20Sopenharmony_ci 8868c2ecf20Sopenharmony_ci for (i = 0; i < nr_attributes; i++) { 8878c2ecf20Sopenharmony_ci struct drm_gem_cma_object *vbo = 8888c2ecf20Sopenharmony_ci bo[ARRAY_SIZE(shader_reloc_offsets) + i]; 8898c2ecf20Sopenharmony_ci uint32_t o = 36 + i * 8; 8908c2ecf20Sopenharmony_ci uint32_t offset = *(uint32_t *)(pkt_u + o + 0); 8918c2ecf20Sopenharmony_ci uint32_t attr_size = *(uint8_t *)(pkt_u + o + 4) + 1; 8928c2ecf20Sopenharmony_ci uint32_t stride = *(uint8_t *)(pkt_u + o + 5); 8938c2ecf20Sopenharmony_ci uint32_t max_index; 8948c2ecf20Sopenharmony_ci 8958c2ecf20Sopenharmony_ci exec->bin_dep_seqno = max(exec->bin_dep_seqno, 8968c2ecf20Sopenharmony_ci to_vc4_bo(&vbo->base)->write_seqno); 8978c2ecf20Sopenharmony_ci 8988c2ecf20Sopenharmony_ci if (state->addr & 0x8) 8998c2ecf20Sopenharmony_ci stride |= (*(uint32_t *)(pkt_u + 100 + i * 4)) & ~0xff; 9008c2ecf20Sopenharmony_ci 9018c2ecf20Sopenharmony_ci if (vbo->base.size < offset || 9028c2ecf20Sopenharmony_ci vbo->base.size - offset < attr_size) { 9038c2ecf20Sopenharmony_ci DRM_DEBUG("BO offset overflow (%d + %d > %zu)\n", 9048c2ecf20Sopenharmony_ci offset, attr_size, vbo->base.size); 9058c2ecf20Sopenharmony_ci return -EINVAL; 9068c2ecf20Sopenharmony_ci } 9078c2ecf20Sopenharmony_ci 9088c2ecf20Sopenharmony_ci if (stride != 0) { 9098c2ecf20Sopenharmony_ci max_index = ((vbo->base.size - offset - attr_size) / 9108c2ecf20Sopenharmony_ci stride); 9118c2ecf20Sopenharmony_ci if (state->max_index > max_index) { 9128c2ecf20Sopenharmony_ci DRM_DEBUG("primitives use index %d out of " 9138c2ecf20Sopenharmony_ci "supplied %d\n", 9148c2ecf20Sopenharmony_ci state->max_index, max_index); 9158c2ecf20Sopenharmony_ci return -EINVAL; 9168c2ecf20Sopenharmony_ci } 9178c2ecf20Sopenharmony_ci } 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci *(uint32_t *)(pkt_v + o) = vbo->paddr + offset; 9208c2ecf20Sopenharmony_ci } 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci return 0; 9238c2ecf20Sopenharmony_ci} 9248c2ecf20Sopenharmony_ci 9258c2ecf20Sopenharmony_ciint 9268c2ecf20Sopenharmony_civc4_validate_shader_recs(struct drm_device *dev, 9278c2ecf20Sopenharmony_ci struct vc4_exec_info *exec) 9288c2ecf20Sopenharmony_ci{ 9298c2ecf20Sopenharmony_ci uint32_t i; 9308c2ecf20Sopenharmony_ci int ret = 0; 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci for (i = 0; i < exec->shader_state_count; i++) { 9338c2ecf20Sopenharmony_ci ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]); 9348c2ecf20Sopenharmony_ci if (ret) 9358c2ecf20Sopenharmony_ci return ret; 9368c2ecf20Sopenharmony_ci } 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci return ret; 9398c2ecf20Sopenharmony_ci} 940