1d722e3fbSopenharmony_ci/* 2d722e3fbSopenharmony_ci * Copyright 2015 Advanced Micro Devices, Inc. 3d722e3fbSopenharmony_ci * 4d722e3fbSopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5d722e3fbSopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6d722e3fbSopenharmony_ci * to deal in the Software without restriction, including without limitation 7d722e3fbSopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8d722e3fbSopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9d722e3fbSopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10d722e3fbSopenharmony_ci * 11d722e3fbSopenharmony_ci * The above copyright notice and this permission notice shall be included in 12d722e3fbSopenharmony_ci * all copies or substantial portions of the Software. 13d722e3fbSopenharmony_ci * 14d722e3fbSopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15d722e3fbSopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16d722e3fbSopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17d722e3fbSopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18d722e3fbSopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19d722e3fbSopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20d722e3fbSopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 21d722e3fbSopenharmony_ci * 22d722e3fbSopenharmony_ci*/ 23d722e3fbSopenharmony_ci 24d722e3fbSopenharmony_ci#include <stdio.h> 25d722e3fbSopenharmony_ci#include <inttypes.h> 26d722e3fbSopenharmony_ci 27d722e3fbSopenharmony_ci#include "CUnit/Basic.h" 28d722e3fbSopenharmony_ci 29d722e3fbSopenharmony_ci#include "util_math.h" 30d722e3fbSopenharmony_ci 31d722e3fbSopenharmony_ci#include "amdgpu_test.h" 32d722e3fbSopenharmony_ci#include "amdgpu_drm.h" 33d722e3fbSopenharmony_ci#include "amdgpu_internal.h" 34d722e3fbSopenharmony_ci 35d722e3fbSopenharmony_ci#include "vce_ib.h" 36d722e3fbSopenharmony_ci#include "frame.h" 37d722e3fbSopenharmony_ci 38d722e3fbSopenharmony_ci#define IB_SIZE 4096 39d722e3fbSopenharmony_ci#define MAX_RESOURCES 16 40d722e3fbSopenharmony_ci#define FW_53_0_03 ((53 << 24) | (0 << 16) | (03 << 8)) 41d722e3fbSopenharmony_ci 42d722e3fbSopenharmony_cistruct amdgpu_vce_bo { 43d722e3fbSopenharmony_ci amdgpu_bo_handle handle; 44d722e3fbSopenharmony_ci amdgpu_va_handle va_handle; 45d722e3fbSopenharmony_ci uint64_t addr; 46d722e3fbSopenharmony_ci uint64_t size; 47d722e3fbSopenharmony_ci uint8_t *ptr; 48d722e3fbSopenharmony_ci}; 49d722e3fbSopenharmony_ci 50d722e3fbSopenharmony_cistruct amdgpu_vce_encode { 51d722e3fbSopenharmony_ci unsigned width; 52d722e3fbSopenharmony_ci unsigned height; 53d722e3fbSopenharmony_ci struct amdgpu_vce_bo vbuf; 54d722e3fbSopenharmony_ci struct amdgpu_vce_bo bs[2]; 55d722e3fbSopenharmony_ci struct amdgpu_vce_bo fb[2]; 56d722e3fbSopenharmony_ci struct amdgpu_vce_bo cpb; 57d722e3fbSopenharmony_ci unsigned ib_len; 58d722e3fbSopenharmony_ci bool two_instance; 59d722e3fbSopenharmony_ci struct amdgpu_vce_bo mvrefbuf; 60d722e3fbSopenharmony_ci struct amdgpu_vce_bo mvb; 61d722e3fbSopenharmony_ci unsigned mvbuf_size; 62d722e3fbSopenharmony_ci}; 63d722e3fbSopenharmony_ci 64d722e3fbSopenharmony_cistatic amdgpu_device_handle device_handle; 65d722e3fbSopenharmony_cistatic uint32_t major_version; 66d722e3fbSopenharmony_cistatic uint32_t minor_version; 67d722e3fbSopenharmony_cistatic uint32_t family_id; 68d722e3fbSopenharmony_cistatic uint32_t vce_harvest_config; 69d722e3fbSopenharmony_cistatic uint32_t chip_rev; 70d722e3fbSopenharmony_cistatic uint32_t chip_id; 71d722e3fbSopenharmony_cistatic uint32_t ids_flags; 72d722e3fbSopenharmony_cistatic bool is_mv_supported = true; 73d722e3fbSopenharmony_ci 74d722e3fbSopenharmony_cistatic amdgpu_context_handle context_handle; 75d722e3fbSopenharmony_cistatic amdgpu_bo_handle ib_handle; 76d722e3fbSopenharmony_cistatic amdgpu_va_handle ib_va_handle; 77d722e3fbSopenharmony_cistatic uint64_t ib_mc_address; 78d722e3fbSopenharmony_cistatic uint32_t *ib_cpu; 79d722e3fbSopenharmony_ci 80d722e3fbSopenharmony_cistatic struct amdgpu_vce_encode enc; 81d722e3fbSopenharmony_cistatic amdgpu_bo_handle resources[MAX_RESOURCES]; 82d722e3fbSopenharmony_cistatic unsigned num_resources; 83d722e3fbSopenharmony_ci 84d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_create(void); 85d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode(void); 86d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode_mv(void); 87d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_destroy(void); 88d722e3fbSopenharmony_ci 89d722e3fbSopenharmony_ciCU_TestInfo vce_tests[] = { 90d722e3fbSopenharmony_ci { "VCE create", amdgpu_cs_vce_create }, 91d722e3fbSopenharmony_ci { "VCE encode", amdgpu_cs_vce_encode }, 92d722e3fbSopenharmony_ci { "VCE MV dump", amdgpu_cs_vce_encode_mv }, 93d722e3fbSopenharmony_ci { "VCE destroy", amdgpu_cs_vce_destroy }, 94d722e3fbSopenharmony_ci CU_TEST_INFO_NULL, 95d722e3fbSopenharmony_ci}; 96d722e3fbSopenharmony_ci 97d722e3fbSopenharmony_ciCU_BOOL suite_vce_tests_enable(void) 98d722e3fbSopenharmony_ci{ 99d722e3fbSopenharmony_ci uint32_t version, feature, asic_id; 100d722e3fbSopenharmony_ci CU_BOOL ret_mv = CU_FALSE; 101d722e3fbSopenharmony_ci 102d722e3fbSopenharmony_ci if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 103d722e3fbSopenharmony_ci &minor_version, &device_handle)) 104d722e3fbSopenharmony_ci return CU_FALSE; 105d722e3fbSopenharmony_ci 106d722e3fbSopenharmony_ci family_id = device_handle->info.family_id; 107d722e3fbSopenharmony_ci chip_rev = device_handle->info.chip_rev; 108d722e3fbSopenharmony_ci chip_id = device_handle->info.chip_external_rev; 109d722e3fbSopenharmony_ci ids_flags = device_handle->info.ids_flags; 110d722e3fbSopenharmony_ci asic_id = device_handle->info.asic_id; 111d722e3fbSopenharmony_ci 112d722e3fbSopenharmony_ci amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 113d722e3fbSopenharmony_ci 0, &version, &feature); 114d722e3fbSopenharmony_ci 115d722e3fbSopenharmony_ci if (amdgpu_device_deinitialize(device_handle)) 116d722e3fbSopenharmony_ci return CU_FALSE; 117d722e3fbSopenharmony_ci 118d722e3fbSopenharmony_ci if (family_id >= AMDGPU_FAMILY_RV || family_id == AMDGPU_FAMILY_SI || 119d722e3fbSopenharmony_ci asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) { 120d722e3fbSopenharmony_ci printf("\n\nThe ASIC NOT support VCE, suite disabled\n"); 121d722e3fbSopenharmony_ci return CU_FALSE; 122d722e3fbSopenharmony_ci } 123d722e3fbSopenharmony_ci 124d722e3fbSopenharmony_ci if (!(chip_id == (chip_rev + 0x3C) || /* FIJI */ 125d722e3fbSopenharmony_ci chip_id == (chip_rev + 0x50) || /* Polaris 10*/ 126d722e3fbSopenharmony_ci chip_id == (chip_rev + 0x5A) || /* Polaris 11*/ 127d722e3fbSopenharmony_ci chip_id == (chip_rev + 0x64) || /* Polaris 12*/ 128d722e3fbSopenharmony_ci (family_id >= AMDGPU_FAMILY_AI && !ids_flags))) /* dGPU > Polaris */ 129d722e3fbSopenharmony_ci printf("\n\nThe ASIC NOT support VCE MV, suite disabled\n"); 130d722e3fbSopenharmony_ci else if (FW_53_0_03 > version) 131d722e3fbSopenharmony_ci printf("\n\nThe ASIC FW version NOT support VCE MV, suite disabled\n"); 132d722e3fbSopenharmony_ci else 133d722e3fbSopenharmony_ci ret_mv = CU_TRUE; 134d722e3fbSopenharmony_ci 135d722e3fbSopenharmony_ci if (ret_mv == CU_FALSE) { 136d722e3fbSopenharmony_ci amdgpu_set_test_active("VCE Tests", "VCE MV dump", ret_mv); 137d722e3fbSopenharmony_ci is_mv_supported = false; 138d722e3fbSopenharmony_ci } 139d722e3fbSopenharmony_ci 140d722e3fbSopenharmony_ci return CU_TRUE; 141d722e3fbSopenharmony_ci} 142d722e3fbSopenharmony_ci 143d722e3fbSopenharmony_ciint suite_vce_tests_init(void) 144d722e3fbSopenharmony_ci{ 145d722e3fbSopenharmony_ci int r; 146d722e3fbSopenharmony_ci 147d722e3fbSopenharmony_ci r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 148d722e3fbSopenharmony_ci &minor_version, &device_handle); 149d722e3fbSopenharmony_ci if (r) { 150d722e3fbSopenharmony_ci if ((r == -EACCES) && (errno == EACCES)) 151d722e3fbSopenharmony_ci printf("\n\nError:%s. " 152d722e3fbSopenharmony_ci "Hint:Try to run this test program as root.", 153d722e3fbSopenharmony_ci strerror(errno)); 154d722e3fbSopenharmony_ci 155d722e3fbSopenharmony_ci return CUE_SINIT_FAILED; 156d722e3fbSopenharmony_ci } 157d722e3fbSopenharmony_ci 158d722e3fbSopenharmony_ci family_id = device_handle->info.family_id; 159d722e3fbSopenharmony_ci vce_harvest_config = device_handle->info.vce_harvest_config; 160d722e3fbSopenharmony_ci 161d722e3fbSopenharmony_ci r = amdgpu_cs_ctx_create(device_handle, &context_handle); 162d722e3fbSopenharmony_ci if (r) 163d722e3fbSopenharmony_ci return CUE_SINIT_FAILED; 164d722e3fbSopenharmony_ci 165d722e3fbSopenharmony_ci r = amdgpu_bo_alloc_and_map(device_handle, IB_SIZE, 4096, 166d722e3fbSopenharmony_ci AMDGPU_GEM_DOMAIN_GTT, 0, 167d722e3fbSopenharmony_ci &ib_handle, (void**)&ib_cpu, 168d722e3fbSopenharmony_ci &ib_mc_address, &ib_va_handle); 169d722e3fbSopenharmony_ci if (r) 170d722e3fbSopenharmony_ci return CUE_SINIT_FAILED; 171d722e3fbSopenharmony_ci 172d722e3fbSopenharmony_ci memset(&enc, 0, sizeof(struct amdgpu_vce_encode)); 173d722e3fbSopenharmony_ci 174d722e3fbSopenharmony_ci return CUE_SUCCESS; 175d722e3fbSopenharmony_ci} 176d722e3fbSopenharmony_ci 177d722e3fbSopenharmony_ciint suite_vce_tests_clean(void) 178d722e3fbSopenharmony_ci{ 179d722e3fbSopenharmony_ci int r; 180d722e3fbSopenharmony_ci 181d722e3fbSopenharmony_ci r = amdgpu_bo_unmap_and_free(ib_handle, ib_va_handle, 182d722e3fbSopenharmony_ci ib_mc_address, IB_SIZE); 183d722e3fbSopenharmony_ci if (r) 184d722e3fbSopenharmony_ci return CUE_SCLEAN_FAILED; 185d722e3fbSopenharmony_ci 186d722e3fbSopenharmony_ci r = amdgpu_cs_ctx_free(context_handle); 187d722e3fbSopenharmony_ci if (r) 188d722e3fbSopenharmony_ci return CUE_SCLEAN_FAILED; 189d722e3fbSopenharmony_ci 190d722e3fbSopenharmony_ci r = amdgpu_device_deinitialize(device_handle); 191d722e3fbSopenharmony_ci if (r) 192d722e3fbSopenharmony_ci return CUE_SCLEAN_FAILED; 193d722e3fbSopenharmony_ci 194d722e3fbSopenharmony_ci return CUE_SUCCESS; 195d722e3fbSopenharmony_ci} 196d722e3fbSopenharmony_ci 197d722e3fbSopenharmony_cistatic int submit(unsigned ndw, unsigned ip) 198d722e3fbSopenharmony_ci{ 199d722e3fbSopenharmony_ci struct amdgpu_cs_request ibs_request = {0}; 200d722e3fbSopenharmony_ci struct amdgpu_cs_ib_info ib_info = {0}; 201d722e3fbSopenharmony_ci struct amdgpu_cs_fence fence_status = {0}; 202d722e3fbSopenharmony_ci uint32_t expired; 203d722e3fbSopenharmony_ci int r; 204d722e3fbSopenharmony_ci 205d722e3fbSopenharmony_ci ib_info.ib_mc_address = ib_mc_address; 206d722e3fbSopenharmony_ci ib_info.size = ndw; 207d722e3fbSopenharmony_ci 208d722e3fbSopenharmony_ci ibs_request.ip_type = ip; 209d722e3fbSopenharmony_ci 210d722e3fbSopenharmony_ci r = amdgpu_bo_list_create(device_handle, num_resources, resources, 211d722e3fbSopenharmony_ci NULL, &ibs_request.resources); 212d722e3fbSopenharmony_ci if (r) 213d722e3fbSopenharmony_ci return r; 214d722e3fbSopenharmony_ci 215d722e3fbSopenharmony_ci ibs_request.number_of_ibs = 1; 216d722e3fbSopenharmony_ci ibs_request.ibs = &ib_info; 217d722e3fbSopenharmony_ci ibs_request.fence_info.handle = NULL; 218d722e3fbSopenharmony_ci 219d722e3fbSopenharmony_ci r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 220d722e3fbSopenharmony_ci if (r) 221d722e3fbSopenharmony_ci return r; 222d722e3fbSopenharmony_ci 223d722e3fbSopenharmony_ci r = amdgpu_bo_list_destroy(ibs_request.resources); 224d722e3fbSopenharmony_ci if (r) 225d722e3fbSopenharmony_ci return r; 226d722e3fbSopenharmony_ci 227d722e3fbSopenharmony_ci fence_status.context = context_handle; 228d722e3fbSopenharmony_ci fence_status.ip_type = ip; 229d722e3fbSopenharmony_ci fence_status.fence = ibs_request.seq_no; 230d722e3fbSopenharmony_ci 231d722e3fbSopenharmony_ci r = amdgpu_cs_query_fence_status(&fence_status, 232d722e3fbSopenharmony_ci AMDGPU_TIMEOUT_INFINITE, 233d722e3fbSopenharmony_ci 0, &expired); 234d722e3fbSopenharmony_ci if (r) 235d722e3fbSopenharmony_ci return r; 236d722e3fbSopenharmony_ci 237d722e3fbSopenharmony_ci return 0; 238d722e3fbSopenharmony_ci} 239d722e3fbSopenharmony_ci 240d722e3fbSopenharmony_cistatic void alloc_resource(struct amdgpu_vce_bo *vce_bo, unsigned size, unsigned domain) 241d722e3fbSopenharmony_ci{ 242d722e3fbSopenharmony_ci struct amdgpu_bo_alloc_request req = {0}; 243d722e3fbSopenharmony_ci amdgpu_bo_handle buf_handle; 244d722e3fbSopenharmony_ci amdgpu_va_handle va_handle; 245d722e3fbSopenharmony_ci uint64_t va = 0; 246d722e3fbSopenharmony_ci int r; 247d722e3fbSopenharmony_ci 248d722e3fbSopenharmony_ci req.alloc_size = ALIGN(size, 4096); 249d722e3fbSopenharmony_ci req.preferred_heap = domain; 250d722e3fbSopenharmony_ci r = amdgpu_bo_alloc(device_handle, &req, &buf_handle); 251d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 252d722e3fbSopenharmony_ci r = amdgpu_va_range_alloc(device_handle, 253d722e3fbSopenharmony_ci amdgpu_gpu_va_range_general, 254d722e3fbSopenharmony_ci req.alloc_size, 1, 0, &va, 255d722e3fbSopenharmony_ci &va_handle, 0); 256d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 257d722e3fbSopenharmony_ci r = amdgpu_bo_va_op(buf_handle, 0, req.alloc_size, va, 0, 258d722e3fbSopenharmony_ci AMDGPU_VA_OP_MAP); 259d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 260d722e3fbSopenharmony_ci vce_bo->addr = va; 261d722e3fbSopenharmony_ci vce_bo->handle = buf_handle; 262d722e3fbSopenharmony_ci vce_bo->size = req.alloc_size; 263d722e3fbSopenharmony_ci vce_bo->va_handle = va_handle; 264d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_map(vce_bo->handle, (void **)&vce_bo->ptr); 265d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 266d722e3fbSopenharmony_ci memset(vce_bo->ptr, 0, size); 267d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_unmap(vce_bo->handle); 268d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 269d722e3fbSopenharmony_ci} 270d722e3fbSopenharmony_ci 271d722e3fbSopenharmony_cistatic void free_resource(struct amdgpu_vce_bo *vce_bo) 272d722e3fbSopenharmony_ci{ 273d722e3fbSopenharmony_ci int r; 274d722e3fbSopenharmony_ci 275d722e3fbSopenharmony_ci r = amdgpu_bo_va_op(vce_bo->handle, 0, vce_bo->size, 276d722e3fbSopenharmony_ci vce_bo->addr, 0, AMDGPU_VA_OP_UNMAP); 277d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 278d722e3fbSopenharmony_ci 279d722e3fbSopenharmony_ci r = amdgpu_va_range_free(vce_bo->va_handle); 280d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 281d722e3fbSopenharmony_ci 282d722e3fbSopenharmony_ci r = amdgpu_bo_free(vce_bo->handle); 283d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 284d722e3fbSopenharmony_ci memset(vce_bo, 0, sizeof(*vce_bo)); 285d722e3fbSopenharmony_ci} 286d722e3fbSopenharmony_ci 287d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_create(void) 288d722e3fbSopenharmony_ci{ 289d722e3fbSopenharmony_ci unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; 290d722e3fbSopenharmony_ci int len, r; 291d722e3fbSopenharmony_ci 292d722e3fbSopenharmony_ci enc.width = vce_create[6]; 293d722e3fbSopenharmony_ci enc.height = vce_create[7]; 294d722e3fbSopenharmony_ci 295d722e3fbSopenharmony_ci num_resources = 0; 296d722e3fbSopenharmony_ci alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); 297d722e3fbSopenharmony_ci resources[num_resources++] = enc.fb[0].handle; 298d722e3fbSopenharmony_ci resources[num_resources++] = ib_handle; 299d722e3fbSopenharmony_ci 300d722e3fbSopenharmony_ci len = 0; 301d722e3fbSopenharmony_ci memcpy(ib_cpu, vce_session, sizeof(vce_session)); 302d722e3fbSopenharmony_ci len += sizeof(vce_session) / 4; 303d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); 304d722e3fbSopenharmony_ci len += sizeof(vce_taskinfo) / 4; 305d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_create, sizeof(vce_create)); 306d722e3fbSopenharmony_ci ib_cpu[len + 8] = ALIGN(enc.width, align); 307d722e3fbSopenharmony_ci ib_cpu[len + 9] = ALIGN(enc.width, align); 308d722e3fbSopenharmony_ci if (is_mv_supported == true) {/* disableTwoInstance */ 309d722e3fbSopenharmony_ci if (family_id >= AMDGPU_FAMILY_AI) 310d722e3fbSopenharmony_ci ib_cpu[len + 11] = 0x01000001; 311d722e3fbSopenharmony_ci else 312d722e3fbSopenharmony_ci ib_cpu[len + 11] = 0x01000201; 313d722e3fbSopenharmony_ci } 314d722e3fbSopenharmony_ci len += sizeof(vce_create) / 4; 315d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); 316d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc.fb[0].addr >> 32; 317d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc.fb[0].addr; 318d722e3fbSopenharmony_ci len += sizeof(vce_feedback) / 4; 319d722e3fbSopenharmony_ci 320d722e3fbSopenharmony_ci r = submit(len, AMDGPU_HW_IP_VCE); 321d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 322d722e3fbSopenharmony_ci 323d722e3fbSopenharmony_ci free_resource(&enc.fb[0]); 324d722e3fbSopenharmony_ci} 325d722e3fbSopenharmony_ci 326d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_config(void) 327d722e3fbSopenharmony_ci{ 328d722e3fbSopenharmony_ci int len = 0, r; 329d722e3fbSopenharmony_ci 330d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); 331d722e3fbSopenharmony_ci len += sizeof(vce_session) / 4; 332d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); 333d722e3fbSopenharmony_ci ib_cpu[len + 3] = 2; 334d722e3fbSopenharmony_ci ib_cpu[len + 6] = 0xffffffff; 335d722e3fbSopenharmony_ci len += sizeof(vce_taskinfo) / 4; 336d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_rate_ctrl, sizeof(vce_rate_ctrl)); 337d722e3fbSopenharmony_ci len += sizeof(vce_rate_ctrl) / 4; 338d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_config_ext, sizeof(vce_config_ext)); 339d722e3fbSopenharmony_ci len += sizeof(vce_config_ext) / 4; 340d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_motion_est, sizeof(vce_motion_est)); 341d722e3fbSopenharmony_ci len += sizeof(vce_motion_est) / 4; 342d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_rdo, sizeof(vce_rdo)); 343d722e3fbSopenharmony_ci len += sizeof(vce_rdo) / 4; 344d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_pic_ctrl, sizeof(vce_pic_ctrl)); 345d722e3fbSopenharmony_ci if (is_mv_supported == true) 346d722e3fbSopenharmony_ci ib_cpu[len + 27] = 0x00000001; /* encSliceMode */ 347d722e3fbSopenharmony_ci len += sizeof(vce_pic_ctrl) / 4; 348d722e3fbSopenharmony_ci 349d722e3fbSopenharmony_ci r = submit(len, AMDGPU_HW_IP_VCE); 350d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 351d722e3fbSopenharmony_ci} 352d722e3fbSopenharmony_ci 353d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode_idr(struct amdgpu_vce_encode *enc) 354d722e3fbSopenharmony_ci{ 355d722e3fbSopenharmony_ci 356d722e3fbSopenharmony_ci uint64_t luma_offset, chroma_offset; 357d722e3fbSopenharmony_ci unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; 358d722e3fbSopenharmony_ci unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); 359d722e3fbSopenharmony_ci int len = 0, i, r; 360d722e3fbSopenharmony_ci 361d722e3fbSopenharmony_ci luma_offset = enc->vbuf.addr; 362d722e3fbSopenharmony_ci chroma_offset = luma_offset + luma_size; 363d722e3fbSopenharmony_ci 364d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); 365d722e3fbSopenharmony_ci len += sizeof(vce_session) / 4; 366d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); 367d722e3fbSopenharmony_ci len += sizeof(vce_taskinfo) / 4; 368d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer)); 369d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->bs[0].addr >> 32; 370d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->bs[0].addr; 371d722e3fbSopenharmony_ci len += sizeof(vce_bs_buffer) / 4; 372d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer)); 373d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->cpb.addr >> 32; 374d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->cpb.addr; 375d722e3fbSopenharmony_ci len += sizeof(vce_context_buffer) / 4; 376d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); 377d722e3fbSopenharmony_ci for (i = 0; i < 8; ++i) 378d722e3fbSopenharmony_ci ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); 379d722e3fbSopenharmony_ci for (i = 0; i < 8; ++i) 380d722e3fbSopenharmony_ci ib_cpu[len + 10 + i] = luma_size * 1.5; 381d722e3fbSopenharmony_ci len += sizeof(vce_aux_buffer) / 4; 382d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); 383d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->fb[0].addr >> 32; 384d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->fb[0].addr; 385d722e3fbSopenharmony_ci len += sizeof(vce_feedback) / 4; 386d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode)); 387d722e3fbSopenharmony_ci ib_cpu[len + 9] = luma_offset >> 32; 388d722e3fbSopenharmony_ci ib_cpu[len + 10] = luma_offset; 389d722e3fbSopenharmony_ci ib_cpu[len + 11] = chroma_offset >> 32; 390d722e3fbSopenharmony_ci ib_cpu[len + 12] = chroma_offset; 391d722e3fbSopenharmony_ci ib_cpu[len + 14] = ALIGN(enc->width, align); 392d722e3fbSopenharmony_ci ib_cpu[len + 15] = ALIGN(enc->width, align); 393d722e3fbSopenharmony_ci ib_cpu[len + 73] = luma_size * 1.5; 394d722e3fbSopenharmony_ci ib_cpu[len + 74] = luma_size * 2.5; 395d722e3fbSopenharmony_ci len += sizeof(vce_encode) / 4; 396d722e3fbSopenharmony_ci enc->ib_len = len; 397d722e3fbSopenharmony_ci if (!enc->two_instance) { 398d722e3fbSopenharmony_ci r = submit(len, AMDGPU_HW_IP_VCE); 399d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 400d722e3fbSopenharmony_ci } 401d722e3fbSopenharmony_ci} 402d722e3fbSopenharmony_ci 403d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode_p(struct amdgpu_vce_encode *enc) 404d722e3fbSopenharmony_ci{ 405d722e3fbSopenharmony_ci uint64_t luma_offset, chroma_offset; 406d722e3fbSopenharmony_ci int len, i, r; 407d722e3fbSopenharmony_ci unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; 408d722e3fbSopenharmony_ci unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); 409d722e3fbSopenharmony_ci 410d722e3fbSopenharmony_ci len = (enc->two_instance) ? enc->ib_len : 0; 411d722e3fbSopenharmony_ci luma_offset = enc->vbuf.addr; 412d722e3fbSopenharmony_ci chroma_offset = luma_offset + luma_size; 413d722e3fbSopenharmony_ci 414d722e3fbSopenharmony_ci if (!enc->two_instance) { 415d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); 416d722e3fbSopenharmony_ci len += sizeof(vce_session) / 4; 417d722e3fbSopenharmony_ci } 418d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); 419d722e3fbSopenharmony_ci len += sizeof(vce_taskinfo) / 4; 420d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer)); 421d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->bs[1].addr >> 32; 422d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->bs[1].addr; 423d722e3fbSopenharmony_ci len += sizeof(vce_bs_buffer) / 4; 424d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer)); 425d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->cpb.addr >> 32; 426d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->cpb.addr; 427d722e3fbSopenharmony_ci len += sizeof(vce_context_buffer) / 4; 428d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); 429d722e3fbSopenharmony_ci for (i = 0; i < 8; ++i) 430d722e3fbSopenharmony_ci ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); 431d722e3fbSopenharmony_ci for (i = 0; i < 8; ++i) 432d722e3fbSopenharmony_ci ib_cpu[len + 10 + i] = luma_size * 1.5; 433d722e3fbSopenharmony_ci len += sizeof(vce_aux_buffer) / 4; 434d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); 435d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->fb[1].addr >> 32; 436d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->fb[1].addr; 437d722e3fbSopenharmony_ci len += sizeof(vce_feedback) / 4; 438d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode)); 439d722e3fbSopenharmony_ci ib_cpu[len + 2] = 0; 440d722e3fbSopenharmony_ci ib_cpu[len + 9] = luma_offset >> 32; 441d722e3fbSopenharmony_ci ib_cpu[len + 10] = luma_offset; 442d722e3fbSopenharmony_ci ib_cpu[len + 11] = chroma_offset >> 32; 443d722e3fbSopenharmony_ci ib_cpu[len + 12] = chroma_offset; 444d722e3fbSopenharmony_ci ib_cpu[len + 14] = ALIGN(enc->width, align); 445d722e3fbSopenharmony_ci ib_cpu[len + 15] = ALIGN(enc->width, align); 446d722e3fbSopenharmony_ci ib_cpu[len + 18] = 0; 447d722e3fbSopenharmony_ci ib_cpu[len + 19] = 0; 448d722e3fbSopenharmony_ci ib_cpu[len + 56] = 3; 449d722e3fbSopenharmony_ci ib_cpu[len + 57] = 0; 450d722e3fbSopenharmony_ci ib_cpu[len + 58] = 0; 451d722e3fbSopenharmony_ci ib_cpu[len + 59] = luma_size * 1.5; 452d722e3fbSopenharmony_ci ib_cpu[len + 60] = luma_size * 2.5; 453d722e3fbSopenharmony_ci ib_cpu[len + 73] = 0; 454d722e3fbSopenharmony_ci ib_cpu[len + 74] = luma_size; 455d722e3fbSopenharmony_ci ib_cpu[len + 81] = 1; 456d722e3fbSopenharmony_ci ib_cpu[len + 82] = 1; 457d722e3fbSopenharmony_ci len += sizeof(vce_encode) / 4; 458d722e3fbSopenharmony_ci 459d722e3fbSopenharmony_ci r = submit(len, AMDGPU_HW_IP_VCE); 460d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 461d722e3fbSopenharmony_ci} 462d722e3fbSopenharmony_ci 463d722e3fbSopenharmony_cistatic void check_result(struct amdgpu_vce_encode *enc) 464d722e3fbSopenharmony_ci{ 465d722e3fbSopenharmony_ci uint64_t sum; 466d722e3fbSopenharmony_ci uint32_t s[2] = {180325, 15946}; 467d722e3fbSopenharmony_ci uint32_t *ptr, size; 468d722e3fbSopenharmony_ci int i, j, r; 469d722e3fbSopenharmony_ci 470d722e3fbSopenharmony_ci for (i = 0; i < 2; ++i) { 471d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_map(enc->fb[i].handle, (void **)&enc->fb[i].ptr); 472d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 473d722e3fbSopenharmony_ci ptr = (uint32_t *)enc->fb[i].ptr; 474d722e3fbSopenharmony_ci size = ptr[4] - ptr[9]; 475d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_unmap(enc->fb[i].handle); 476d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 477d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_map(enc->bs[i].handle, (void **)&enc->bs[i].ptr); 478d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 479d722e3fbSopenharmony_ci for (j = 0, sum = 0; j < size; ++j) 480d722e3fbSopenharmony_ci sum += enc->bs[i].ptr[j]; 481d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(sum, s[i]); 482d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_unmap(enc->bs[i].handle); 483d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 484d722e3fbSopenharmony_ci } 485d722e3fbSopenharmony_ci} 486d722e3fbSopenharmony_ci 487d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode(void) 488d722e3fbSopenharmony_ci{ 489d722e3fbSopenharmony_ci uint32_t vbuf_size, bs_size = 0x154000, cpb_size; 490d722e3fbSopenharmony_ci unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; 491d722e3fbSopenharmony_ci int i, r; 492d722e3fbSopenharmony_ci 493d722e3fbSopenharmony_ci vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5; 494d722e3fbSopenharmony_ci cpb_size = vbuf_size * 10; 495d722e3fbSopenharmony_ci num_resources = 0; 496d722e3fbSopenharmony_ci alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); 497d722e3fbSopenharmony_ci resources[num_resources++] = enc.fb[0].handle; 498d722e3fbSopenharmony_ci alloc_resource(&enc.fb[1], 4096, AMDGPU_GEM_DOMAIN_GTT); 499d722e3fbSopenharmony_ci resources[num_resources++] = enc.fb[1].handle; 500d722e3fbSopenharmony_ci alloc_resource(&enc.bs[0], bs_size, AMDGPU_GEM_DOMAIN_GTT); 501d722e3fbSopenharmony_ci resources[num_resources++] = enc.bs[0].handle; 502d722e3fbSopenharmony_ci alloc_resource(&enc.bs[1], bs_size, AMDGPU_GEM_DOMAIN_GTT); 503d722e3fbSopenharmony_ci resources[num_resources++] = enc.bs[1].handle; 504d722e3fbSopenharmony_ci alloc_resource(&enc.vbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM); 505d722e3fbSopenharmony_ci resources[num_resources++] = enc.vbuf.handle; 506d722e3fbSopenharmony_ci alloc_resource(&enc.cpb, cpb_size, AMDGPU_GEM_DOMAIN_VRAM); 507d722e3fbSopenharmony_ci resources[num_resources++] = enc.cpb.handle; 508d722e3fbSopenharmony_ci resources[num_resources++] = ib_handle; 509d722e3fbSopenharmony_ci 510d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr); 511d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 512d722e3fbSopenharmony_ci 513d722e3fbSopenharmony_ci memset(enc.vbuf.ptr, 0, vbuf_size); 514d722e3fbSopenharmony_ci for (i = 0; i < enc.height; ++i) { 515d722e3fbSopenharmony_ci memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width); 516d722e3fbSopenharmony_ci enc.vbuf.ptr += ALIGN(enc.width, align); 517d722e3fbSopenharmony_ci } 518d722e3fbSopenharmony_ci for (i = 0; i < enc.height / 2; ++i) { 519d722e3fbSopenharmony_ci memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width); 520d722e3fbSopenharmony_ci enc.vbuf.ptr += ALIGN(enc.width, align); 521d722e3fbSopenharmony_ci } 522d722e3fbSopenharmony_ci 523d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_unmap(enc.vbuf.handle); 524d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 525d722e3fbSopenharmony_ci 526d722e3fbSopenharmony_ci amdgpu_cs_vce_config(); 527d722e3fbSopenharmony_ci 528d722e3fbSopenharmony_ci if (family_id >= AMDGPU_FAMILY_VI) { 529d722e3fbSopenharmony_ci vce_taskinfo[3] = 3; 530d722e3fbSopenharmony_ci amdgpu_cs_vce_encode_idr(&enc); 531d722e3fbSopenharmony_ci amdgpu_cs_vce_encode_p(&enc); 532d722e3fbSopenharmony_ci check_result(&enc); 533d722e3fbSopenharmony_ci 534d722e3fbSopenharmony_ci /* two pipes */ 535d722e3fbSopenharmony_ci vce_encode[16] = 0; 536d722e3fbSopenharmony_ci amdgpu_cs_vce_encode_idr(&enc); 537d722e3fbSopenharmony_ci amdgpu_cs_vce_encode_p(&enc); 538d722e3fbSopenharmony_ci check_result(&enc); 539d722e3fbSopenharmony_ci 540d722e3fbSopenharmony_ci /* two instances */ 541d722e3fbSopenharmony_ci if (vce_harvest_config == 0) { 542d722e3fbSopenharmony_ci enc.two_instance = true; 543d722e3fbSopenharmony_ci vce_taskinfo[2] = 0x83; 544d722e3fbSopenharmony_ci vce_taskinfo[4] = 1; 545d722e3fbSopenharmony_ci amdgpu_cs_vce_encode_idr(&enc); 546d722e3fbSopenharmony_ci vce_taskinfo[2] = 0xffffffff; 547d722e3fbSopenharmony_ci vce_taskinfo[4] = 2; 548d722e3fbSopenharmony_ci amdgpu_cs_vce_encode_p(&enc); 549d722e3fbSopenharmony_ci check_result(&enc); 550d722e3fbSopenharmony_ci } 551d722e3fbSopenharmony_ci } else { 552d722e3fbSopenharmony_ci vce_taskinfo[3] = 3; 553d722e3fbSopenharmony_ci vce_encode[16] = 0; 554d722e3fbSopenharmony_ci amdgpu_cs_vce_encode_idr(&enc); 555d722e3fbSopenharmony_ci amdgpu_cs_vce_encode_p(&enc); 556d722e3fbSopenharmony_ci check_result(&enc); 557d722e3fbSopenharmony_ci } 558d722e3fbSopenharmony_ci 559d722e3fbSopenharmony_ci free_resource(&enc.fb[0]); 560d722e3fbSopenharmony_ci free_resource(&enc.fb[1]); 561d722e3fbSopenharmony_ci free_resource(&enc.bs[0]); 562d722e3fbSopenharmony_ci free_resource(&enc.bs[1]); 563d722e3fbSopenharmony_ci free_resource(&enc.vbuf); 564d722e3fbSopenharmony_ci free_resource(&enc.cpb); 565d722e3fbSopenharmony_ci} 566d722e3fbSopenharmony_ci 567d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_mv(struct amdgpu_vce_encode *enc) 568d722e3fbSopenharmony_ci{ 569d722e3fbSopenharmony_ci uint64_t luma_offset, chroma_offset; 570d722e3fbSopenharmony_ci uint64_t mv_ref_luma_offset; 571d722e3fbSopenharmony_ci unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; 572d722e3fbSopenharmony_ci unsigned luma_size = ALIGN(enc->width, align) * ALIGN(enc->height, 16); 573d722e3fbSopenharmony_ci int len = 0, i, r; 574d722e3fbSopenharmony_ci 575d722e3fbSopenharmony_ci luma_offset = enc->vbuf.addr; 576d722e3fbSopenharmony_ci chroma_offset = luma_offset + luma_size; 577d722e3fbSopenharmony_ci mv_ref_luma_offset = enc->mvrefbuf.addr; 578d722e3fbSopenharmony_ci 579d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_session, sizeof(vce_session)); 580d722e3fbSopenharmony_ci len += sizeof(vce_session) / 4; 581d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); 582d722e3fbSopenharmony_ci len += sizeof(vce_taskinfo) / 4; 583d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_bs_buffer, sizeof(vce_bs_buffer)); 584d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->bs[0].addr >> 32; 585d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->bs[0].addr; 586d722e3fbSopenharmony_ci len += sizeof(vce_bs_buffer) / 4; 587d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_context_buffer, sizeof(vce_context_buffer)); 588d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->cpb.addr >> 32; 589d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->cpb.addr; 590d722e3fbSopenharmony_ci len += sizeof(vce_context_buffer) / 4; 591d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_aux_buffer, sizeof(vce_aux_buffer)); 592d722e3fbSopenharmony_ci for (i = 0; i < 8; ++i) 593d722e3fbSopenharmony_ci ib_cpu[len + 2 + i] = luma_size * 1.5 * (i + 2); 594d722e3fbSopenharmony_ci for (i = 0; i < 8; ++i) 595d722e3fbSopenharmony_ci ib_cpu[len + 10 + i] = luma_size * 1.5; 596d722e3fbSopenharmony_ci len += sizeof(vce_aux_buffer) / 4; 597d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); 598d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc->fb[0].addr >> 32; 599d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc->fb[0].addr; 600d722e3fbSopenharmony_ci len += sizeof(vce_feedback) / 4; 601d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_mv_buffer, sizeof(vce_mv_buffer)); 602d722e3fbSopenharmony_ci ib_cpu[len + 2] = mv_ref_luma_offset >> 32; 603d722e3fbSopenharmony_ci ib_cpu[len + 3] = mv_ref_luma_offset; 604d722e3fbSopenharmony_ci ib_cpu[len + 4] = ALIGN(enc->width, align); 605d722e3fbSopenharmony_ci ib_cpu[len + 5] = ALIGN(enc->width, align); 606d722e3fbSopenharmony_ci ib_cpu[len + 6] = luma_size; 607d722e3fbSopenharmony_ci ib_cpu[len + 7] = enc->mvb.addr >> 32; 608d722e3fbSopenharmony_ci ib_cpu[len + 8] = enc->mvb.addr; 609d722e3fbSopenharmony_ci len += sizeof(vce_mv_buffer) / 4; 610d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_encode, sizeof(vce_encode)); 611d722e3fbSopenharmony_ci ib_cpu[len + 2] = 0; 612d722e3fbSopenharmony_ci ib_cpu[len + 3] = 0; 613d722e3fbSopenharmony_ci ib_cpu[len + 4] = 0x154000; 614d722e3fbSopenharmony_ci ib_cpu[len + 9] = luma_offset >> 32; 615d722e3fbSopenharmony_ci ib_cpu[len + 10] = luma_offset; 616d722e3fbSopenharmony_ci ib_cpu[len + 11] = chroma_offset >> 32; 617d722e3fbSopenharmony_ci ib_cpu[len + 12] = chroma_offset; 618d722e3fbSopenharmony_ci ib_cpu[len + 13] = ALIGN(enc->height, 16);; 619d722e3fbSopenharmony_ci ib_cpu[len + 14] = ALIGN(enc->width, align); 620d722e3fbSopenharmony_ci ib_cpu[len + 15] = ALIGN(enc->width, align); 621d722e3fbSopenharmony_ci /* encDisableMBOffloading-encDisableTwoPipeMode-encInputPicArrayMode-encInputPicAddrMode */ 622d722e3fbSopenharmony_ci ib_cpu[len + 16] = 0x01010000; 623d722e3fbSopenharmony_ci ib_cpu[len + 18] = 0; /* encPicType */ 624d722e3fbSopenharmony_ci ib_cpu[len + 19] = 0; /* encIdrFlag */ 625d722e3fbSopenharmony_ci ib_cpu[len + 20] = 0; /* encIdrPicId */ 626d722e3fbSopenharmony_ci ib_cpu[len + 21] = 0; /* encMGSKeyPic */ 627d722e3fbSopenharmony_ci ib_cpu[len + 22] = 0; /* encReferenceFlag */ 628d722e3fbSopenharmony_ci ib_cpu[len + 23] = 0; /* encTemporalLayerIndex */ 629d722e3fbSopenharmony_ci ib_cpu[len + 55] = 0; /* pictureStructure */ 630d722e3fbSopenharmony_ci ib_cpu[len + 56] = 0; /* encPicType -ref[0] */ 631d722e3fbSopenharmony_ci ib_cpu[len + 61] = 0; /* pictureStructure */ 632d722e3fbSopenharmony_ci ib_cpu[len + 62] = 0; /* encPicType -ref[1] */ 633d722e3fbSopenharmony_ci ib_cpu[len + 67] = 0; /* pictureStructure */ 634d722e3fbSopenharmony_ci ib_cpu[len + 68] = 0; /* encPicType -ref1 */ 635d722e3fbSopenharmony_ci ib_cpu[len + 81] = 1; /* frameNumber */ 636d722e3fbSopenharmony_ci ib_cpu[len + 82] = 2; /* pictureOrderCount */ 637d722e3fbSopenharmony_ci ib_cpu[len + 83] = 0xffffffff; /* numIPicRemainInRCGOP */ 638d722e3fbSopenharmony_ci ib_cpu[len + 84] = 0xffffffff; /* numPPicRemainInRCGOP */ 639d722e3fbSopenharmony_ci ib_cpu[len + 85] = 0xffffffff; /* numBPicRemainInRCGOP */ 640d722e3fbSopenharmony_ci ib_cpu[len + 86] = 0xffffffff; /* numIRPicRemainInRCGOP */ 641d722e3fbSopenharmony_ci ib_cpu[len + 87] = 0; /* remainedIntraRefreshPictures */ 642d722e3fbSopenharmony_ci len += sizeof(vce_encode) / 4; 643d722e3fbSopenharmony_ci 644d722e3fbSopenharmony_ci enc->ib_len = len; 645d722e3fbSopenharmony_ci r = submit(len, AMDGPU_HW_IP_VCE); 646d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 647d722e3fbSopenharmony_ci} 648d722e3fbSopenharmony_ci 649d722e3fbSopenharmony_cistatic void check_mv_result(struct amdgpu_vce_encode *enc) 650d722e3fbSopenharmony_ci{ 651d722e3fbSopenharmony_ci uint64_t sum; 652d722e3fbSopenharmony_ci uint32_t s = 140790; 653d722e3fbSopenharmony_ci int j, r; 654d722e3fbSopenharmony_ci 655d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_map(enc->fb[0].handle, (void **)&enc->fb[0].ptr); 656d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 657d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_unmap(enc->fb[0].handle); 658d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 659d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_map(enc->mvb.handle, (void **)&enc->mvb.ptr); 660d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 661d722e3fbSopenharmony_ci for (j = 0, sum = 0; j < enc->mvbuf_size; ++j) 662d722e3fbSopenharmony_ci sum += enc->mvb.ptr[j]; 663d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(sum, s); 664d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_unmap(enc->mvb.handle); 665d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 666d722e3fbSopenharmony_ci} 667d722e3fbSopenharmony_ci 668d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_encode_mv(void) 669d722e3fbSopenharmony_ci{ 670d722e3fbSopenharmony_ci uint32_t vbuf_size, bs_size = 0x154000, cpb_size; 671d722e3fbSopenharmony_ci unsigned align = (family_id >= AMDGPU_FAMILY_AI) ? 256 : 16; 672d722e3fbSopenharmony_ci int i, r; 673d722e3fbSopenharmony_ci 674d722e3fbSopenharmony_ci vbuf_size = ALIGN(enc.width, align) * ALIGN(enc.height, 16) * 1.5; 675d722e3fbSopenharmony_ci enc.mvbuf_size = ALIGN(enc.width, 16) * ALIGN(enc.height, 16) / 8; 676d722e3fbSopenharmony_ci cpb_size = vbuf_size * 10; 677d722e3fbSopenharmony_ci num_resources = 0; 678d722e3fbSopenharmony_ci alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); 679d722e3fbSopenharmony_ci resources[num_resources++] = enc.fb[0].handle; 680d722e3fbSopenharmony_ci alloc_resource(&enc.bs[0], bs_size, AMDGPU_GEM_DOMAIN_GTT); 681d722e3fbSopenharmony_ci resources[num_resources++] = enc.bs[0].handle; 682d722e3fbSopenharmony_ci alloc_resource(&enc.mvb, enc.mvbuf_size, AMDGPU_GEM_DOMAIN_GTT); 683d722e3fbSopenharmony_ci resources[num_resources++] = enc.mvb.handle; 684d722e3fbSopenharmony_ci alloc_resource(&enc.vbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM); 685d722e3fbSopenharmony_ci resources[num_resources++] = enc.vbuf.handle; 686d722e3fbSopenharmony_ci alloc_resource(&enc.mvrefbuf, vbuf_size, AMDGPU_GEM_DOMAIN_VRAM); 687d722e3fbSopenharmony_ci resources[num_resources++] = enc.mvrefbuf.handle; 688d722e3fbSopenharmony_ci alloc_resource(&enc.cpb, cpb_size, AMDGPU_GEM_DOMAIN_VRAM); 689d722e3fbSopenharmony_ci resources[num_resources++] = enc.cpb.handle; 690d722e3fbSopenharmony_ci resources[num_resources++] = ib_handle; 691d722e3fbSopenharmony_ci 692d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_map(enc.vbuf.handle, (void **)&enc.vbuf.ptr); 693d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 694d722e3fbSopenharmony_ci 695d722e3fbSopenharmony_ci memset(enc.vbuf.ptr, 0, vbuf_size); 696d722e3fbSopenharmony_ci for (i = 0; i < enc.height; ++i) { 697d722e3fbSopenharmony_ci memcpy(enc.vbuf.ptr, (frame + i * enc.width), enc.width); 698d722e3fbSopenharmony_ci enc.vbuf.ptr += ALIGN(enc.width, align); 699d722e3fbSopenharmony_ci } 700d722e3fbSopenharmony_ci for (i = 0; i < enc.height / 2; ++i) { 701d722e3fbSopenharmony_ci memcpy(enc.vbuf.ptr, ((frame + enc.height * enc.width) + i * enc.width), enc.width); 702d722e3fbSopenharmony_ci enc.vbuf.ptr += ALIGN(enc.width, align); 703d722e3fbSopenharmony_ci } 704d722e3fbSopenharmony_ci 705d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_unmap(enc.vbuf.handle); 706d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 707d722e3fbSopenharmony_ci 708d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_map(enc.mvrefbuf.handle, (void **)&enc.mvrefbuf.ptr); 709d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 710d722e3fbSopenharmony_ci 711d722e3fbSopenharmony_ci memset(enc.mvrefbuf.ptr, 0, vbuf_size); 712d722e3fbSopenharmony_ci for (i = 0; i < enc.height; ++i) { 713d722e3fbSopenharmony_ci memcpy(enc.mvrefbuf.ptr, (frame + (enc.height - i -1) * enc.width), enc.width); 714d722e3fbSopenharmony_ci enc.mvrefbuf.ptr += ALIGN(enc.width, align); 715d722e3fbSopenharmony_ci } 716d722e3fbSopenharmony_ci for (i = 0; i < enc.height / 2; ++i) { 717d722e3fbSopenharmony_ci memcpy(enc.mvrefbuf.ptr, 718d722e3fbSopenharmony_ci ((frame + enc.height * enc.width) + (enc.height / 2 - i -1) * enc.width), enc.width); 719d722e3fbSopenharmony_ci enc.mvrefbuf.ptr += ALIGN(enc.width, align); 720d722e3fbSopenharmony_ci } 721d722e3fbSopenharmony_ci 722d722e3fbSopenharmony_ci r = amdgpu_bo_cpu_unmap(enc.mvrefbuf.handle); 723d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 724d722e3fbSopenharmony_ci 725d722e3fbSopenharmony_ci amdgpu_cs_vce_config(); 726d722e3fbSopenharmony_ci 727d722e3fbSopenharmony_ci vce_taskinfo[3] = 3; 728d722e3fbSopenharmony_ci amdgpu_cs_vce_mv(&enc); 729d722e3fbSopenharmony_ci check_mv_result(&enc); 730d722e3fbSopenharmony_ci 731d722e3fbSopenharmony_ci free_resource(&enc.fb[0]); 732d722e3fbSopenharmony_ci free_resource(&enc.bs[0]); 733d722e3fbSopenharmony_ci free_resource(&enc.vbuf); 734d722e3fbSopenharmony_ci free_resource(&enc.cpb); 735d722e3fbSopenharmony_ci free_resource(&enc.mvrefbuf); 736d722e3fbSopenharmony_ci free_resource(&enc.mvb); 737d722e3fbSopenharmony_ci} 738d722e3fbSopenharmony_ci 739d722e3fbSopenharmony_cistatic void amdgpu_cs_vce_destroy(void) 740d722e3fbSopenharmony_ci{ 741d722e3fbSopenharmony_ci int len, r; 742d722e3fbSopenharmony_ci 743d722e3fbSopenharmony_ci num_resources = 0; 744d722e3fbSopenharmony_ci alloc_resource(&enc.fb[0], 4096, AMDGPU_GEM_DOMAIN_GTT); 745d722e3fbSopenharmony_ci resources[num_resources++] = enc.fb[0].handle; 746d722e3fbSopenharmony_ci resources[num_resources++] = ib_handle; 747d722e3fbSopenharmony_ci 748d722e3fbSopenharmony_ci len = 0; 749d722e3fbSopenharmony_ci memcpy(ib_cpu, vce_session, sizeof(vce_session)); 750d722e3fbSopenharmony_ci len += sizeof(vce_session) / 4; 751d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_taskinfo, sizeof(vce_taskinfo)); 752d722e3fbSopenharmony_ci ib_cpu[len + 3] = 1; 753d722e3fbSopenharmony_ci len += sizeof(vce_taskinfo) / 4; 754d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_feedback, sizeof(vce_feedback)); 755d722e3fbSopenharmony_ci ib_cpu[len + 2] = enc.fb[0].addr >> 32; 756d722e3fbSopenharmony_ci ib_cpu[len + 3] = enc.fb[0].addr; 757d722e3fbSopenharmony_ci len += sizeof(vce_feedback) / 4; 758d722e3fbSopenharmony_ci memcpy((ib_cpu + len), vce_destroy, sizeof(vce_destroy)); 759d722e3fbSopenharmony_ci len += sizeof(vce_destroy) / 4; 760d722e3fbSopenharmony_ci 761d722e3fbSopenharmony_ci r = submit(len, AMDGPU_HW_IP_VCE); 762d722e3fbSopenharmony_ci CU_ASSERT_EQUAL(r, 0); 763d722e3fbSopenharmony_ci 764d722e3fbSopenharmony_ci free_resource(&enc.fb[0]); 765d722e3fbSopenharmony_ci} 766