1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22*/ 23 24#include <stdio.h> 25#include <stdlib.h> 26#include <unistd.h> 27#include <sys/types.h> 28#ifdef MAJOR_IN_SYSMACROS 29#include <sys/sysmacros.h> 30#endif 31#include <sys/stat.h> 32#include <fcntl.h> 33#if HAVE_ALLOCA_H 34# include <alloca.h> 35#endif 36#include <sys/wait.h> 37 38#include "CUnit/Basic.h" 39 40#include "amdgpu_test.h" 41#include "amdgpu_drm.h" 42#include "amdgpu_internal.h" 43#include "util_math.h" 44 45static amdgpu_device_handle device_handle; 46static uint32_t major_version; 47static uint32_t minor_version; 48static uint32_t family_id; 49static uint32_t chip_id; 50static uint32_t chip_rev; 51 52static void amdgpu_query_info_test(void); 53static void amdgpu_command_submission_gfx(void); 54static void amdgpu_command_submission_compute(void); 55static void amdgpu_command_submission_multi_fence(void); 56static void amdgpu_command_submission_sdma(void); 57static void amdgpu_userptr_test(void); 58static void amdgpu_semaphore_test(void); 59static void amdgpu_sync_dependency_test(void); 60static void amdgpu_bo_eviction_test(void); 61static void amdgpu_compute_dispatch_test(void); 62static void amdgpu_gfx_dispatch_test(void); 63static void amdgpu_draw_test(void); 64static void amdgpu_gpu_reset_test(void); 65static void amdgpu_stable_pstate_test(void); 66 67static void amdgpu_command_submission_write_linear_helper(unsigned ip_type); 68static void amdgpu_command_submission_const_fill_helper(unsigned ip_type); 69static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type); 70static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 71 unsigned ip_type, 72 int instance, int pm4_dw, uint32_t *pm4_src, 73 int res_cnt, amdgpu_bo_handle *resources, 74 struct amdgpu_cs_ib_info *ib_info, 75 struct amdgpu_cs_request *ibs_request); 76 77CU_TestInfo basic_tests[] = { 78 { "Query Info Test", amdgpu_query_info_test }, 79 { "Userptr Test", amdgpu_userptr_test }, 80 { "bo eviction Test", amdgpu_bo_eviction_test }, 81 { "Command submission Test (GFX)", amdgpu_command_submission_gfx }, 82 { "Command submission Test (Compute)", amdgpu_command_submission_compute }, 83 { "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence }, 84 { "Command submission Test (SDMA)", amdgpu_command_submission_sdma }, 85 { "SW semaphore Test", amdgpu_semaphore_test }, 86 { "Sync dependency Test", amdgpu_sync_dependency_test }, 87 { "Dispatch Test (Compute)", amdgpu_compute_dispatch_test }, 88 { "Dispatch Test (GFX)", amdgpu_gfx_dispatch_test }, 89 { "Draw Test", amdgpu_draw_test }, 90 { "GPU reset Test", amdgpu_gpu_reset_test }, 91 { "Stable pstate Test", amdgpu_stable_pstate_test }, 92 CU_TEST_INFO_NULL, 93}; 94#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize())) 95#define SDMA_PKT_HEADER_op_offset 0 96#define SDMA_PKT_HEADER_op_mask 0x000000FF 97#define SDMA_PKT_HEADER_op_shift 0 98#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift) 99#define SDMA_OPCODE_CONSTANT_FILL 11 100# define SDMA_CONSTANT_FILL_EXTRA_SIZE(x) ((x) << 14) 101 /* 0 = byte fill 102 * 2 = DW fill 103 */ 104#define SDMA_PACKET(op, sub_op, e) ((((e) & 0xFFFF) << 16) | \ 105 (((sub_op) & 0xFF) << 8) | \ 106 (((op) & 0xFF) << 0)) 107#define SDMA_OPCODE_WRITE 2 108# define SDMA_WRITE_SUB_OPCODE_LINEAR 0 109# define SDMA_WRTIE_SUB_OPCODE_TILED 1 110 111#define SDMA_OPCODE_COPY 1 112# define SDMA_COPY_SUB_OPCODE_LINEAR 0 113 114#define SDMA_OPCODE_ATOMIC 10 115# define SDMA_ATOMIC_LOOP(x) ((x) << 0) 116 /* 0 - single_pass_atomic. 117 * 1 - loop_until_compare_satisfied. 118 */ 119# define SDMA_ATOMIC_TMZ(x) ((x) << 2) 120 /* 0 - non-TMZ. 121 * 1 - TMZ. 122 */ 123# define SDMA_ATOMIC_OPCODE(x) ((x) << 9) 124 /* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 125 * same as Packet 3 126 */ 127 128#define GFX_COMPUTE_NOP 0xffff1000 129#define SDMA_NOP 0x0 130 131/* PM4 */ 132#define PACKET_TYPE0 0 133#define PACKET_TYPE1 1 134#define PACKET_TYPE2 2 135#define PACKET_TYPE3 3 136 137#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3) 138#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF) 139#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF) 140#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF) 141#define PACKET0(reg, n) ((PACKET_TYPE0 << 30) | \ 142 ((reg) & 0xFFFF) | \ 143 ((n) & 0x3FFF) << 16) 144#define CP_PACKET2 0x80000000 145#define PACKET2_PAD_SHIFT 0 146#define PACKET2_PAD_MASK (0x3fffffff << 0) 147 148#define PACKET2(v) (CP_PACKET2 | REG_SET(PACKET2_PAD, (v))) 149 150#define PACKET3(op, n) ((PACKET_TYPE3 << 30) | \ 151 (((op) & 0xFF) << 8) | \ 152 ((n) & 0x3FFF) << 16) 153#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1) 154 155/* Packet 3 types */ 156#define PACKET3_NOP 0x10 157 158#define PACKET3_WRITE_DATA 0x37 159#define WRITE_DATA_DST_SEL(x) ((x) << 8) 160 /* 0 - register 161 * 1 - memory (sync - via GRBM) 162 * 2 - gl2 163 * 3 - gds 164 * 4 - reserved 165 * 5 - memory (async - direct) 166 */ 167#define WR_ONE_ADDR (1 << 16) 168#define WR_CONFIRM (1 << 20) 169#define WRITE_DATA_CACHE_POLICY(x) ((x) << 25) 170 /* 0 - LRU 171 * 1 - Stream 172 */ 173#define WRITE_DATA_ENGINE_SEL(x) ((x) << 30) 174 /* 0 - me 175 * 1 - pfp 176 * 2 - ce 177 */ 178 179#define PACKET3_ATOMIC_MEM 0x1E 180#define TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008 181#define ATOMIC_MEM_COMMAND(x) ((x) << 8) 182 /* 0 - single_pass_atomic. 183 * 1 - loop_until_compare_satisfied. 184 */ 185#define ATOMIC_MEM_CACHEPOLICAY(x) ((x) << 25) 186 /* 0 - lru. 187 * 1 - stream. 188 */ 189#define ATOMIC_MEM_ENGINESEL(x) ((x) << 30) 190 /* 0 - micro_engine. 191 */ 192 193#define PACKET3_DMA_DATA 0x50 194/* 1. header 195 * 2. CONTROL 196 * 3. SRC_ADDR_LO or DATA [31:0] 197 * 4. SRC_ADDR_HI [31:0] 198 * 5. DST_ADDR_LO [31:0] 199 * 6. DST_ADDR_HI [7:0] 200 * 7. COMMAND [30:21] | BYTE_COUNT [20:0] 201 */ 202/* CONTROL */ 203# define PACKET3_DMA_DATA_ENGINE(x) ((x) << 0) 204 /* 0 - ME 205 * 1 - PFP 206 */ 207# define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13) 208 /* 0 - LRU 209 * 1 - Stream 210 * 2 - Bypass 211 */ 212# define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15) 213# define PACKET3_DMA_DATA_DST_SEL(x) ((x) << 20) 214 /* 0 - DST_ADDR using DAS 215 * 1 - GDS 216 * 3 - DST_ADDR using L2 217 */ 218# define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25) 219 /* 0 - LRU 220 * 1 - Stream 221 * 2 - Bypass 222 */ 223# define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27) 224# define PACKET3_DMA_DATA_SRC_SEL(x) ((x) << 29) 225 /* 0 - SRC_ADDR using SAS 226 * 1 - GDS 227 * 2 - DATA 228 * 3 - SRC_ADDR using L2 229 */ 230# define PACKET3_DMA_DATA_CP_SYNC (1 << 31) 231/* COMMAND */ 232# define PACKET3_DMA_DATA_DIS_WC (1 << 21) 233# define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22) 234 /* 0 - none 235 * 1 - 8 in 16 236 * 2 - 8 in 32 237 * 3 - 8 in 64 238 */ 239# define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24) 240 /* 0 - none 241 * 1 - 8 in 16 242 * 2 - 8 in 32 243 * 3 - 8 in 64 244 */ 245# define PACKET3_DMA_DATA_CMD_SAS (1 << 26) 246 /* 0 - memory 247 * 1 - register 248 */ 249# define PACKET3_DMA_DATA_CMD_DAS (1 << 27) 250 /* 0 - memory 251 * 1 - register 252 */ 253# define PACKET3_DMA_DATA_CMD_SAIC (1 << 28) 254# define PACKET3_DMA_DATA_CMD_DAIC (1 << 29) 255# define PACKET3_DMA_DATA_CMD_RAW_WAIT (1 << 30) 256 257#define SDMA_PACKET_SI(op, b, t, s, cnt) ((((op) & 0xF) << 28) | \ 258 (((b) & 0x1) << 26) | \ 259 (((t) & 0x1) << 23) | \ 260 (((s) & 0x1) << 22) | \ 261 (((cnt) & 0xFFFFF) << 0)) 262#define SDMA_OPCODE_COPY_SI 3 263#define SDMA_OPCODE_CONSTANT_FILL_SI 13 264#define SDMA_NOP_SI 0xf 265#define GFX_COMPUTE_NOP_SI 0x80000000 266#define PACKET3_DMA_DATA_SI 0x41 267# define PACKET3_DMA_DATA_SI_ENGINE(x) ((x) << 27) 268 /* 0 - ME 269 * 1 - PFP 270 */ 271# define PACKET3_DMA_DATA_SI_DST_SEL(x) ((x) << 20) 272 /* 0 - DST_ADDR using DAS 273 * 1 - GDS 274 * 3 - DST_ADDR using L2 275 */ 276# define PACKET3_DMA_DATA_SI_SRC_SEL(x) ((x) << 29) 277 /* 0 - SRC_ADDR using SAS 278 * 1 - GDS 279 * 2 - DATA 280 * 3 - SRC_ADDR using L2 281 */ 282# define PACKET3_DMA_DATA_SI_CP_SYNC (1 << 31) 283 284 285#define PKT3_CONTEXT_CONTROL 0x28 286#define CONTEXT_CONTROL_LOAD_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 287#define CONTEXT_CONTROL_LOAD_CE_RAM(x) (((unsigned)(x) & 0x1) << 28) 288#define CONTEXT_CONTROL_SHADOW_ENABLE(x) (((unsigned)(x) & 0x1) << 31) 289 290#define PKT3_CLEAR_STATE 0x12 291 292#define PKT3_SET_SH_REG 0x76 293#define PACKET3_SET_SH_REG_START 0x00002c00 294 295#define PKT3_SET_SH_REG_INDEX 0x9B 296 297#define PACKET3_DISPATCH_DIRECT 0x15 298#define PACKET3_EVENT_WRITE 0x46 299#define PACKET3_ACQUIRE_MEM 0x58 300#define PACKET3_SET_CONTEXT_REG 0x69 301#define PACKET3_SET_UCONFIG_REG 0x79 302#define PACKET3_DRAW_INDEX_AUTO 0x2D 303/* gfx 8 */ 304#define mmCOMPUTE_PGM_LO 0x2e0c 305#define mmCOMPUTE_PGM_RSRC1 0x2e12 306#define mmCOMPUTE_TMPRING_SIZE 0x2e18 307#define mmCOMPUTE_USER_DATA_0 0x2e40 308#define mmCOMPUTE_USER_DATA_1 0x2e41 309#define mmCOMPUTE_RESOURCE_LIMITS 0x2e15 310#define mmCOMPUTE_NUM_THREAD_X 0x2e07 311 312 313 314#define SWAP_32(num) (((num & 0xff000000) >> 24) | \ 315 ((num & 0x0000ff00) << 8) | \ 316 ((num & 0x00ff0000) >> 8) | \ 317 ((num & 0x000000ff) << 24)) 318 319 320/* Shader code 321 * void main() 322{ 323 324 float x = some_input; 325 for (unsigned i = 0; i < 1000000; i++) 326 x = sin(x); 327 328 u[0] = 42u; 329} 330*/ 331 332static uint32_t shader_bin[] = { 333 SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf), 334 SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf), 335 SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e), 336 SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf) 337}; 338 339#define CODE_OFFSET 512 340#define DATA_OFFSET 1024 341 342enum cs_type { 343 CS_BUFFERCLEAR, 344 CS_BUFFERCOPY, 345 CS_HANG, 346 CS_HANG_SLOW 347}; 348 349static const uint32_t bufferclear_cs_shader_gfx9[] = { 350 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 351 0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206, 352 0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000, 353 0xbf810000 354}; 355 356static const uint32_t bufferclear_cs_shader_gfx10[] = { 357 0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205, 358 0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004, 359 0xBF810000 360}; 361 362static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = { 363 {0x2e12, 0x000C0041}, //{ mmCOMPUTE_PGM_RSRC1, 0x000C0041 }, 364 {0x2e13, 0x00000090}, //{ mmCOMPUTE_PGM_RSRC2, 0x00000090 }, 365 {0x2e07, 0x00000040}, //{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 }, 366 {0x2e08, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 }, 367 {0x2e09, 0x00000001}, //{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 } 368}; 369 370static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5; 371 372static const uint32_t buffercopy_cs_shader_gfx9[] = { 373 0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08, 374 0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70, 375 0xe01c2000, 0x80010200, 0xbf810000 376}; 377 378static const uint32_t buffercopy_cs_shader_gfx10[] = { 379 0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201, 380 0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000 381}; 382 383static const uint32_t preamblecache_gfx9[] = { 384 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 385 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 386 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 387 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 388 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 389 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 390 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 391 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 392 0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20, 393 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 394 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0, 395 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 396 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 397 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 398 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 399 0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 400 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 401 0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0, 402 0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0, 403 0xc0017900, 0x24b, 0x0 404}; 405 406static const uint32_t preamblecache_gfx10[] = { 407 0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0, 408 0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000, 409 0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0, 410 0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0, 411 0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0, 412 0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0, 413 0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0, 414 0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000, 415 0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20, 416 0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0, 417 0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0, 418 0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0, 419 0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 420 0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0, 421 0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff, 422 0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0, 423 0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2, 424 0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0, 425 0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff, 426 0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0, 427 0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0, 428 0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 429 0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0 430}; 431 432enum ps_type { 433 PS_CONST, 434 PS_TEX, 435 PS_HANG, 436 PS_HANG_SLOW 437}; 438 439static const uint32_t ps_const_shader_gfx9[] = { 440 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 441 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 442 0xC4001C0F, 0x00000100, 0xBF810000 443}; 444 445static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6; 446 447static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = { 448 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 449 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 }, 450 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 }, 451 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 }, 452 { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 }, 453 { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 }, 454 { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 }, 455 { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 }, 456 { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 }, 457 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 } 458 } 459}; 460 461static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = { 462 0x00000004 463}; 464 465static const uint32_t ps_num_sh_registers_gfx9 = 2; 466 467static const uint32_t ps_const_sh_registers_gfx9[][2] = { 468 {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 }, 469 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 470}; 471 472static const uint32_t ps_num_context_registers_gfx9 = 7; 473 474static const uint32_t ps_const_context_reg_gfx9[][2] = { 475 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 476 {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL, 0x00000000 }, 477 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 478 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 479 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 480 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 481 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 482}; 483 484static const uint32_t ps_const_shader_gfx10[] = { 485 0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203, 486 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 487 0xF8001C0F, 0x00000100, 0xBF810000 488}; 489 490static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6; 491 492static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = { 493 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, 494 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 }, 495 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 }, 496 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 }, 497 { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, 498 { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 }, 499 { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 }, 500 { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 }, 501 { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 }, 502 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 } 503 } 504}; 505 506static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = { 507 0x00000004 508}; 509 510static const uint32_t ps_num_sh_registers_gfx10 = 2; 511 512static const uint32_t ps_const_sh_registers_gfx10[][2] = { 513 {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 }, 514 {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 } 515}; 516 517static const uint32_t ps_tex_shader_gfx9[] = { 518 0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000, 519 0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00, 520 0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000, 521 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 522 0x00000100, 0xBF810000 523}; 524 525static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = { 526 0x0000000B 527}; 528 529static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6; 530 531static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = { 532 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 }, 533 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 }, 534 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 }, 535 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 }, 536 { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 537 { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 538 { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 539 { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 540 { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 }, 541 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 } 542 } 543}; 544 545static const uint32_t ps_tex_sh_registers_gfx9[][2] = { 546 {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 }, 547 {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 } 548}; 549 550static const uint32_t ps_tex_context_reg_gfx9[][2] = { 551 {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR, 0x00000002 }, 552 {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL, 0x00000001 }, 553 {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK, 0x0000000F }, 554 {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL, 0x00000010 }, 555 {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT, 0x00000000 }, 556 {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL, 0x00000000 /* Always 0 for now */}, 557 {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT, 0x00000004 } 558}; 559 560static const uint32_t ps_tex_shader_gfx10[] = { 561 0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000, 562 0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A, 563 0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70, 564 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 565 0xF8001C0F, 0x00000100, 0xBF810000 566}; 567 568static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = { 569 0x0000000C 570}; 571 572static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6; 573 574static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = { 575 {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 }, 576 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 }, 577 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 }, 578 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 }, 579 { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 }, 580 { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 581 { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 582 { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 583 { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 }, 584 { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 } 585 } 586}; 587 588static const uint32_t vs_RectPosTexFast_shader_gfx9[] = { 589 0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100, 590 0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206, 591 0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080, 592 0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003, 593 0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101, 594 0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903, 595 0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100, 596 0xC400020F, 0x05060403, 0xBF810000 597}; 598 599static const uint32_t vs_RectPosTexFast_shader_gfx10[] = { 600 0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206, 601 0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200, 602 0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207, 603 0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001, 604 0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002, 605 0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209, 606 0xF80008CF, 0x05030100, 0xF800020F, 0x05060402, 607 0xBF810000 608}; 609 610static const uint32_t cached_cmd_gfx9[] = { 611 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 612 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 613 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 614 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x12, 615 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 616 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 617 0xc0026900, 0x292, 0x20, 0x60201b8, 618 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 619}; 620 621static const uint32_t cached_cmd_gfx10[] = { 622 0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0, 623 0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020, 624 0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf, 625 0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18, 626 0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0, 627 0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011, 628 0xc0026900, 0x292, 0x20, 0x6020000, 629 0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0 630}; 631 632unsigned int memcpy_ps_hang[] = { 633 0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100, 634 0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001, 635 0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002, 636 0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000, 637 0xF800180F, 0x03020100, 0xBF810000 638}; 639 640struct amdgpu_test_shader { 641 uint32_t *shader; 642 uint32_t header_length; 643 uint32_t body_length; 644 uint32_t foot_length; 645}; 646 647unsigned int memcpy_cs_hang_slow_ai_codes[] = { 648 0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100, 649 0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000 650}; 651 652struct amdgpu_test_shader memcpy_cs_hang_slow_ai = { 653 memcpy_cs_hang_slow_ai_codes, 654 4, 655 3, 656 1 657}; 658 659unsigned int memcpy_cs_hang_slow_rv_codes[] = { 660 0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100, 661 0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000 662}; 663 664struct amdgpu_test_shader memcpy_cs_hang_slow_rv = { 665 memcpy_cs_hang_slow_rv_codes, 666 4, 667 3, 668 1 669}; 670 671unsigned int memcpy_cs_hang_slow_nv_codes[] = { 672 0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100, 673 0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000 674}; 675 676struct amdgpu_test_shader memcpy_cs_hang_slow_nv = { 677 memcpy_cs_hang_slow_nv_codes, 678 4, 679 3, 680 1 681}; 682 683unsigned int memcpy_ps_hang_slow_ai_codes[] = { 684 0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000, 685 0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00, 686 0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000, 687 0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f, 688 0x03020100, 0xbf810000 689}; 690 691struct amdgpu_test_shader memcpy_ps_hang_slow_ai = { 692 memcpy_ps_hang_slow_ai_codes, 693 7, 694 2, 695 9 696}; 697 698int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size, 699 unsigned alignment, unsigned heap, uint64_t alloc_flags, 700 uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu, 701 uint64_t *mc_address, 702 amdgpu_va_handle *va_handle) 703{ 704 struct amdgpu_bo_alloc_request request = {}; 705 amdgpu_bo_handle buf_handle; 706 amdgpu_va_handle handle; 707 uint64_t vmc_addr; 708 int r; 709 710 request.alloc_size = size; 711 request.phys_alignment = alignment; 712 request.preferred_heap = heap; 713 request.flags = alloc_flags; 714 715 r = amdgpu_bo_alloc(dev, &request, &buf_handle); 716 if (r) 717 return r; 718 719 r = amdgpu_va_range_alloc(dev, 720 amdgpu_gpu_va_range_general, 721 size, alignment, 0, &vmc_addr, 722 &handle, 0); 723 if (r) 724 goto error_va_alloc; 725 726 r = amdgpu_bo_va_op_raw(dev, buf_handle, 0, ALIGN(size, getpagesize()), vmc_addr, 727 AMDGPU_VM_PAGE_READABLE | 728 AMDGPU_VM_PAGE_WRITEABLE | 729 AMDGPU_VM_PAGE_EXECUTABLE | 730 mapping_flags, 731 AMDGPU_VA_OP_MAP); 732 if (r) 733 goto error_va_map; 734 735 r = amdgpu_bo_cpu_map(buf_handle, cpu); 736 if (r) 737 goto error_cpu_map; 738 739 *bo = buf_handle; 740 *mc_address = vmc_addr; 741 *va_handle = handle; 742 743 return 0; 744 745 error_cpu_map: 746 amdgpu_bo_cpu_unmap(buf_handle); 747 748 error_va_map: 749 amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP); 750 751 error_va_alloc: 752 amdgpu_bo_free(buf_handle); 753 return r; 754} 755 756 757 758CU_BOOL suite_basic_tests_enable(void) 759{ 760 761 if (amdgpu_device_initialize(drm_amdgpu[0], &major_version, 762 &minor_version, &device_handle)) 763 return CU_FALSE; 764 765 766 family_id = device_handle->info.family_id; 767 chip_id = device_handle->info.chip_external_rev; 768 chip_rev = device_handle->info.chip_rev; 769 770 if (amdgpu_device_deinitialize(device_handle)) 771 return CU_FALSE; 772 773 /* disable gfx engine basic test cases for some asics have no CPG */ 774 if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) { 775 if (amdgpu_set_test_active("Basic Tests", 776 "Command submission Test (GFX)", 777 CU_FALSE)) 778 fprintf(stderr, "test deactivation failed - %s\n", 779 CU_get_error_msg()); 780 781 if (amdgpu_set_test_active("Basic Tests", 782 "Command submission Test (Multi-Fence)", 783 CU_FALSE)) 784 fprintf(stderr, "test deactivation failed - %s\n", 785 CU_get_error_msg()); 786 787 if (amdgpu_set_test_active("Basic Tests", 788 "Sync dependency Test", 789 CU_FALSE)) 790 fprintf(stderr, "test deactivation failed - %s\n", 791 CU_get_error_msg()); 792 } 793 794 return CU_TRUE; 795} 796 797int suite_basic_tests_init(void) 798{ 799 struct amdgpu_gpu_info gpu_info = {0}; 800 int r; 801 802 r = amdgpu_device_initialize(drm_amdgpu[0], &major_version, 803 &minor_version, &device_handle); 804 805 if (r) { 806 if ((r == -EACCES) && (errno == EACCES)) 807 printf("\n\nError:%s. " 808 "Hint:Try to run this test program as root.", 809 strerror(errno)); 810 return CUE_SINIT_FAILED; 811 } 812 813 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 814 if (r) 815 return CUE_SINIT_FAILED; 816 817 family_id = gpu_info.family_id; 818 819 return CUE_SUCCESS; 820} 821 822int suite_basic_tests_clean(void) 823{ 824 int r = amdgpu_device_deinitialize(device_handle); 825 826 if (r == 0) 827 return CUE_SUCCESS; 828 else 829 return CUE_SCLEAN_FAILED; 830} 831 832static void amdgpu_query_info_test(void) 833{ 834 struct amdgpu_gpu_info gpu_info = {0}; 835 uint32_t version, feature; 836 int r; 837 838 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 839 CU_ASSERT_EQUAL(r, 0); 840 841 r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0, 842 0, &version, &feature); 843 CU_ASSERT_EQUAL(r, 0); 844} 845 846static void amdgpu_command_submission_gfx_separate_ibs(void) 847{ 848 amdgpu_context_handle context_handle; 849 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 850 void *ib_result_cpu, *ib_result_ce_cpu; 851 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 852 struct amdgpu_cs_request ibs_request = {0}; 853 struct amdgpu_cs_ib_info ib_info[2]; 854 struct amdgpu_cs_fence fence_status = {0}; 855 uint32_t *ptr; 856 uint32_t expired; 857 amdgpu_bo_list_handle bo_list; 858 amdgpu_va_handle va_handle, va_handle_ce; 859 int r, i = 0; 860 861 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 862 CU_ASSERT_EQUAL(r, 0); 863 864 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 865 AMDGPU_GEM_DOMAIN_GTT, 0, 866 &ib_result_handle, &ib_result_cpu, 867 &ib_result_mc_address, &va_handle); 868 CU_ASSERT_EQUAL(r, 0); 869 870 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 871 AMDGPU_GEM_DOMAIN_GTT, 0, 872 &ib_result_ce_handle, &ib_result_ce_cpu, 873 &ib_result_ce_mc_address, &va_handle_ce); 874 CU_ASSERT_EQUAL(r, 0); 875 876 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 877 ib_result_ce_handle, &bo_list); 878 CU_ASSERT_EQUAL(r, 0); 879 880 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 881 882 /* IT_SET_CE_DE_COUNTERS */ 883 ptr = ib_result_ce_cpu; 884 if (family_id != AMDGPU_FAMILY_SI) { 885 ptr[i++] = 0xc0008900; 886 ptr[i++] = 0; 887 } 888 ptr[i++] = 0xc0008400; 889 ptr[i++] = 1; 890 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 891 ib_info[0].size = i; 892 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 893 894 /* IT_WAIT_ON_CE_COUNTER */ 895 ptr = ib_result_cpu; 896 ptr[0] = 0xc0008600; 897 ptr[1] = 0x00000001; 898 ib_info[1].ib_mc_address = ib_result_mc_address; 899 ib_info[1].size = 2; 900 901 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 902 ibs_request.number_of_ibs = 2; 903 ibs_request.ibs = ib_info; 904 ibs_request.resources = bo_list; 905 ibs_request.fence_info.handle = NULL; 906 907 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 908 909 CU_ASSERT_EQUAL(r, 0); 910 911 fence_status.context = context_handle; 912 fence_status.ip_type = AMDGPU_HW_IP_GFX; 913 fence_status.ip_instance = 0; 914 fence_status.fence = ibs_request.seq_no; 915 916 r = amdgpu_cs_query_fence_status(&fence_status, 917 AMDGPU_TIMEOUT_INFINITE, 918 0, &expired); 919 CU_ASSERT_EQUAL(r, 0); 920 921 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 922 ib_result_mc_address, 4096); 923 CU_ASSERT_EQUAL(r, 0); 924 925 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 926 ib_result_ce_mc_address, 4096); 927 CU_ASSERT_EQUAL(r, 0); 928 929 r = amdgpu_bo_list_destroy(bo_list); 930 CU_ASSERT_EQUAL(r, 0); 931 932 r = amdgpu_cs_ctx_free(context_handle); 933 CU_ASSERT_EQUAL(r, 0); 934 935} 936 937static void amdgpu_command_submission_gfx_shared_ib(void) 938{ 939 amdgpu_context_handle context_handle; 940 amdgpu_bo_handle ib_result_handle; 941 void *ib_result_cpu; 942 uint64_t ib_result_mc_address; 943 struct amdgpu_cs_request ibs_request = {0}; 944 struct amdgpu_cs_ib_info ib_info[2]; 945 struct amdgpu_cs_fence fence_status = {0}; 946 uint32_t *ptr; 947 uint32_t expired; 948 amdgpu_bo_list_handle bo_list; 949 amdgpu_va_handle va_handle; 950 int r, i = 0; 951 952 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 953 CU_ASSERT_EQUAL(r, 0); 954 955 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 956 AMDGPU_GEM_DOMAIN_GTT, 0, 957 &ib_result_handle, &ib_result_cpu, 958 &ib_result_mc_address, &va_handle); 959 CU_ASSERT_EQUAL(r, 0); 960 961 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 962 &bo_list); 963 CU_ASSERT_EQUAL(r, 0); 964 965 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 966 967 /* IT_SET_CE_DE_COUNTERS */ 968 ptr = ib_result_cpu; 969 if (family_id != AMDGPU_FAMILY_SI) { 970 ptr[i++] = 0xc0008900; 971 ptr[i++] = 0; 972 } 973 ptr[i++] = 0xc0008400; 974 ptr[i++] = 1; 975 ib_info[0].ib_mc_address = ib_result_mc_address; 976 ib_info[0].size = i; 977 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 978 979 ptr = (uint32_t *)ib_result_cpu + 4; 980 ptr[0] = 0xc0008600; 981 ptr[1] = 0x00000001; 982 ib_info[1].ib_mc_address = ib_result_mc_address + 16; 983 ib_info[1].size = 2; 984 985 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 986 ibs_request.number_of_ibs = 2; 987 ibs_request.ibs = ib_info; 988 ibs_request.resources = bo_list; 989 ibs_request.fence_info.handle = NULL; 990 991 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 992 993 CU_ASSERT_EQUAL(r, 0); 994 995 fence_status.context = context_handle; 996 fence_status.ip_type = AMDGPU_HW_IP_GFX; 997 fence_status.ip_instance = 0; 998 fence_status.fence = ibs_request.seq_no; 999 1000 r = amdgpu_cs_query_fence_status(&fence_status, 1001 AMDGPU_TIMEOUT_INFINITE, 1002 0, &expired); 1003 CU_ASSERT_EQUAL(r, 0); 1004 1005 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1006 ib_result_mc_address, 4096); 1007 CU_ASSERT_EQUAL(r, 0); 1008 1009 r = amdgpu_bo_list_destroy(bo_list); 1010 CU_ASSERT_EQUAL(r, 0); 1011 1012 r = amdgpu_cs_ctx_free(context_handle); 1013 CU_ASSERT_EQUAL(r, 0); 1014} 1015 1016static void amdgpu_command_submission_gfx_cp_write_data(void) 1017{ 1018 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX); 1019} 1020 1021static void amdgpu_command_submission_gfx_cp_const_fill(void) 1022{ 1023 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX); 1024} 1025 1026static void amdgpu_command_submission_gfx_cp_copy_data(void) 1027{ 1028 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX); 1029} 1030 1031static void amdgpu_bo_eviction_test(void) 1032{ 1033 const int sdma_write_length = 1024; 1034 const int pm4_dw = 256; 1035 amdgpu_context_handle context_handle; 1036 amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2]; 1037 amdgpu_bo_handle *resources; 1038 uint32_t *pm4; 1039 struct amdgpu_cs_ib_info *ib_info; 1040 struct amdgpu_cs_request *ibs_request; 1041 uint64_t bo1_mc, bo2_mc; 1042 volatile unsigned char *bo1_cpu, *bo2_cpu; 1043 int i, j, r, loop1, loop2; 1044 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1045 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1046 struct amdgpu_heap_info vram_info, gtt_info; 1047 1048 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1049 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1050 1051 ib_info = calloc(1, sizeof(*ib_info)); 1052 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1053 1054 ibs_request = calloc(1, sizeof(*ibs_request)); 1055 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1056 1057 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1058 CU_ASSERT_EQUAL(r, 0); 1059 1060 /* prepare resource */ 1061 resources = calloc(4, sizeof(amdgpu_bo_handle)); 1062 CU_ASSERT_NOT_EQUAL(resources, NULL); 1063 1064 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM, 1065 0, &vram_info); 1066 CU_ASSERT_EQUAL(r, 0); 1067 1068 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 1069 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]); 1070 CU_ASSERT_EQUAL(r, 0); 1071 r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096, 1072 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]); 1073 CU_ASSERT_EQUAL(r, 0); 1074 1075 r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT, 1076 0, >t_info); 1077 CU_ASSERT_EQUAL(r, 0); 1078 1079 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 1080 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[0]); 1081 CU_ASSERT_EQUAL(r, 0); 1082 r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096, 1083 AMDGPU_GEM_DOMAIN_GTT, 0, >t_max[1]); 1084 CU_ASSERT_EQUAL(r, 0); 1085 1086 1087 1088 loop1 = loop2 = 0; 1089 /* run 9 circle to test all mapping combination */ 1090 while(loop1 < 2) { 1091 while(loop2 < 2) { 1092 /* allocate UC bo1for sDMA use */ 1093 r = amdgpu_bo_alloc_and_map(device_handle, 1094 sdma_write_length, 4096, 1095 AMDGPU_GEM_DOMAIN_GTT, 1096 gtt_flags[loop1], &bo1, 1097 (void**)&bo1_cpu, &bo1_mc, 1098 &bo1_va_handle); 1099 CU_ASSERT_EQUAL(r, 0); 1100 1101 /* set bo1 */ 1102 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1103 1104 /* allocate UC bo2 for sDMA use */ 1105 r = amdgpu_bo_alloc_and_map(device_handle, 1106 sdma_write_length, 4096, 1107 AMDGPU_GEM_DOMAIN_GTT, 1108 gtt_flags[loop2], &bo2, 1109 (void**)&bo2_cpu, &bo2_mc, 1110 &bo2_va_handle); 1111 CU_ASSERT_EQUAL(r, 0); 1112 1113 /* clear bo2 */ 1114 memset((void*)bo2_cpu, 0, sdma_write_length); 1115 1116 resources[0] = bo1; 1117 resources[1] = bo2; 1118 resources[2] = vram_max[loop2]; 1119 resources[3] = gtt_max[loop2]; 1120 1121 /* fulfill PM4: test DMA copy linear */ 1122 i = j = 0; 1123 if (family_id == AMDGPU_FAMILY_SI) { 1124 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0, 1125 sdma_write_length); 1126 pm4[i++] = 0xffffffff & bo2_mc; 1127 pm4[i++] = 0xffffffff & bo1_mc; 1128 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1129 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1130 } else { 1131 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); 1132 if (family_id >= AMDGPU_FAMILY_AI) 1133 pm4[i++] = sdma_write_length - 1; 1134 else 1135 pm4[i++] = sdma_write_length; 1136 pm4[i++] = 0; 1137 pm4[i++] = 0xffffffff & bo1_mc; 1138 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1139 pm4[i++] = 0xffffffff & bo2_mc; 1140 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1141 } 1142 1143 amdgpu_test_exec_cs_helper(context_handle, 1144 AMDGPU_HW_IP_DMA, 0, 1145 i, pm4, 1146 4, resources, 1147 ib_info, ibs_request); 1148 1149 /* verify if SDMA test result meets with expected */ 1150 i = 0; 1151 while(i < sdma_write_length) { 1152 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 1153 } 1154 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 1155 sdma_write_length); 1156 CU_ASSERT_EQUAL(r, 0); 1157 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 1158 sdma_write_length); 1159 CU_ASSERT_EQUAL(r, 0); 1160 loop2++; 1161 } 1162 loop2 = 0; 1163 loop1++; 1164 } 1165 amdgpu_bo_free(vram_max[0]); 1166 amdgpu_bo_free(vram_max[1]); 1167 amdgpu_bo_free(gtt_max[0]); 1168 amdgpu_bo_free(gtt_max[1]); 1169 /* clean resources */ 1170 free(resources); 1171 free(ibs_request); 1172 free(ib_info); 1173 free(pm4); 1174 1175 /* end of test */ 1176 r = amdgpu_cs_ctx_free(context_handle); 1177 CU_ASSERT_EQUAL(r, 0); 1178} 1179 1180 1181static void amdgpu_command_submission_gfx(void) 1182{ 1183 /* write data using the CP */ 1184 amdgpu_command_submission_gfx_cp_write_data(); 1185 /* const fill using the CP */ 1186 amdgpu_command_submission_gfx_cp_const_fill(); 1187 /* copy data using the CP */ 1188 amdgpu_command_submission_gfx_cp_copy_data(); 1189 /* separate IB buffers for multi-IB submission */ 1190 amdgpu_command_submission_gfx_separate_ibs(); 1191 /* shared IB buffer for multi-IB submission */ 1192 amdgpu_command_submission_gfx_shared_ib(); 1193} 1194 1195static void amdgpu_semaphore_test(void) 1196{ 1197 amdgpu_context_handle context_handle[2]; 1198 amdgpu_semaphore_handle sem; 1199 amdgpu_bo_handle ib_result_handle[2]; 1200 void *ib_result_cpu[2]; 1201 uint64_t ib_result_mc_address[2]; 1202 struct amdgpu_cs_request ibs_request[2] = {0}; 1203 struct amdgpu_cs_ib_info ib_info[2] = {0}; 1204 struct amdgpu_cs_fence fence_status = {0}; 1205 uint32_t *ptr; 1206 uint32_t expired; 1207 uint32_t sdma_nop, gfx_nop; 1208 amdgpu_bo_list_handle bo_list[2]; 1209 amdgpu_va_handle va_handle[2]; 1210 int r, i; 1211 struct amdgpu_gpu_info gpu_info = {0}; 1212 unsigned gc_ip_type; 1213 1214 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 1215 CU_ASSERT_EQUAL(r, 0); 1216 1217 gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ? 1218 AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX; 1219 1220 if (family_id == AMDGPU_FAMILY_SI) { 1221 sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0); 1222 gfx_nop = GFX_COMPUTE_NOP_SI; 1223 } else { 1224 sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP); 1225 gfx_nop = GFX_COMPUTE_NOP; 1226 } 1227 1228 r = amdgpu_cs_create_semaphore(&sem); 1229 CU_ASSERT_EQUAL(r, 0); 1230 for (i = 0; i < 2; i++) { 1231 r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]); 1232 CU_ASSERT_EQUAL(r, 0); 1233 1234 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1235 AMDGPU_GEM_DOMAIN_GTT, 0, 1236 &ib_result_handle[i], &ib_result_cpu[i], 1237 &ib_result_mc_address[i], &va_handle[i]); 1238 CU_ASSERT_EQUAL(r, 0); 1239 1240 r = amdgpu_get_bo_list(device_handle, ib_result_handle[i], 1241 NULL, &bo_list[i]); 1242 CU_ASSERT_EQUAL(r, 0); 1243 } 1244 1245 /* 1. same context different engine */ 1246 ptr = ib_result_cpu[0]; 1247 ptr[0] = sdma_nop; 1248 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1249 ib_info[0].size = 1; 1250 1251 ibs_request[0].ip_type = AMDGPU_HW_IP_DMA; 1252 ibs_request[0].number_of_ibs = 1; 1253 ibs_request[0].ibs = &ib_info[0]; 1254 ibs_request[0].resources = bo_list[0]; 1255 ibs_request[0].fence_info.handle = NULL; 1256 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1257 CU_ASSERT_EQUAL(r, 0); 1258 r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem); 1259 CU_ASSERT_EQUAL(r, 0); 1260 1261 r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 1262 CU_ASSERT_EQUAL(r, 0); 1263 ptr = ib_result_cpu[1]; 1264 ptr[0] = gfx_nop; 1265 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1266 ib_info[1].size = 1; 1267 1268 ibs_request[1].ip_type = gc_ip_type; 1269 ibs_request[1].number_of_ibs = 1; 1270 ibs_request[1].ibs = &ib_info[1]; 1271 ibs_request[1].resources = bo_list[1]; 1272 ibs_request[1].fence_info.handle = NULL; 1273 1274 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1); 1275 CU_ASSERT_EQUAL(r, 0); 1276 1277 fence_status.context = context_handle[0]; 1278 fence_status.ip_type = gc_ip_type; 1279 fence_status.ip_instance = 0; 1280 fence_status.fence = ibs_request[1].seq_no; 1281 r = amdgpu_cs_query_fence_status(&fence_status, 1282 500000000, 0, &expired); 1283 CU_ASSERT_EQUAL(r, 0); 1284 CU_ASSERT_EQUAL(expired, true); 1285 1286 /* 2. same engine different context */ 1287 ptr = ib_result_cpu[0]; 1288 ptr[0] = gfx_nop; 1289 ib_info[0].ib_mc_address = ib_result_mc_address[0]; 1290 ib_info[0].size = 1; 1291 1292 ibs_request[0].ip_type = gc_ip_type; 1293 ibs_request[0].number_of_ibs = 1; 1294 ibs_request[0].ibs = &ib_info[0]; 1295 ibs_request[0].resources = bo_list[0]; 1296 ibs_request[0].fence_info.handle = NULL; 1297 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1); 1298 CU_ASSERT_EQUAL(r, 0); 1299 r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem); 1300 CU_ASSERT_EQUAL(r, 0); 1301 1302 r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem); 1303 CU_ASSERT_EQUAL(r, 0); 1304 ptr = ib_result_cpu[1]; 1305 ptr[0] = gfx_nop; 1306 ib_info[1].ib_mc_address = ib_result_mc_address[1]; 1307 ib_info[1].size = 1; 1308 1309 ibs_request[1].ip_type = gc_ip_type; 1310 ibs_request[1].number_of_ibs = 1; 1311 ibs_request[1].ibs = &ib_info[1]; 1312 ibs_request[1].resources = bo_list[1]; 1313 ibs_request[1].fence_info.handle = NULL; 1314 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1); 1315 1316 CU_ASSERT_EQUAL(r, 0); 1317 1318 fence_status.context = context_handle[1]; 1319 fence_status.ip_type = gc_ip_type; 1320 fence_status.ip_instance = 0; 1321 fence_status.fence = ibs_request[1].seq_no; 1322 r = amdgpu_cs_query_fence_status(&fence_status, 1323 500000000, 0, &expired); 1324 CU_ASSERT_EQUAL(r, 0); 1325 CU_ASSERT_EQUAL(expired, true); 1326 1327 for (i = 0; i < 2; i++) { 1328 r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i], 1329 ib_result_mc_address[i], 4096); 1330 CU_ASSERT_EQUAL(r, 0); 1331 1332 r = amdgpu_bo_list_destroy(bo_list[i]); 1333 CU_ASSERT_EQUAL(r, 0); 1334 1335 r = amdgpu_cs_ctx_free(context_handle[i]); 1336 CU_ASSERT_EQUAL(r, 0); 1337 } 1338 1339 r = amdgpu_cs_destroy_semaphore(sem); 1340 CU_ASSERT_EQUAL(r, 0); 1341} 1342 1343static void amdgpu_command_submission_compute_nop(void) 1344{ 1345 amdgpu_context_handle context_handle; 1346 amdgpu_bo_handle ib_result_handle; 1347 void *ib_result_cpu; 1348 uint64_t ib_result_mc_address; 1349 struct amdgpu_cs_request ibs_request; 1350 struct amdgpu_cs_ib_info ib_info; 1351 struct amdgpu_cs_fence fence_status; 1352 uint32_t *ptr; 1353 uint32_t expired; 1354 int r, instance; 1355 amdgpu_bo_list_handle bo_list; 1356 amdgpu_va_handle va_handle; 1357 struct drm_amdgpu_info_hw_ip info; 1358 1359 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 1360 CU_ASSERT_EQUAL(r, 0); 1361 1362 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1363 CU_ASSERT_EQUAL(r, 0); 1364 1365 for (instance = 0; (1 << instance) & info.available_rings; instance++) { 1366 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1367 AMDGPU_GEM_DOMAIN_GTT, 0, 1368 &ib_result_handle, &ib_result_cpu, 1369 &ib_result_mc_address, &va_handle); 1370 CU_ASSERT_EQUAL(r, 0); 1371 1372 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 1373 &bo_list); 1374 CU_ASSERT_EQUAL(r, 0); 1375 1376 ptr = ib_result_cpu; 1377 memset(ptr, 0, 16); 1378 ptr[0]=PACKET3(PACKET3_NOP, 14); 1379 1380 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 1381 ib_info.ib_mc_address = ib_result_mc_address; 1382 ib_info.size = 16; 1383 1384 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 1385 ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE; 1386 ibs_request.ring = instance; 1387 ibs_request.number_of_ibs = 1; 1388 ibs_request.ibs = &ib_info; 1389 ibs_request.resources = bo_list; 1390 ibs_request.fence_info.handle = NULL; 1391 1392 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 1393 r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1); 1394 CU_ASSERT_EQUAL(r, 0); 1395 1396 fence_status.context = context_handle; 1397 fence_status.ip_type = AMDGPU_HW_IP_COMPUTE; 1398 fence_status.ip_instance = 0; 1399 fence_status.ring = instance; 1400 fence_status.fence = ibs_request.seq_no; 1401 1402 r = amdgpu_cs_query_fence_status(&fence_status, 1403 AMDGPU_TIMEOUT_INFINITE, 1404 0, &expired); 1405 CU_ASSERT_EQUAL(r, 0); 1406 1407 r = amdgpu_bo_list_destroy(bo_list); 1408 CU_ASSERT_EQUAL(r, 0); 1409 1410 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1411 ib_result_mc_address, 4096); 1412 CU_ASSERT_EQUAL(r, 0); 1413 } 1414 1415 r = amdgpu_cs_ctx_free(context_handle); 1416 CU_ASSERT_EQUAL(r, 0); 1417} 1418 1419static void amdgpu_command_submission_compute_cp_write_data(void) 1420{ 1421 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE); 1422} 1423 1424static void amdgpu_command_submission_compute_cp_const_fill(void) 1425{ 1426 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE); 1427} 1428 1429static void amdgpu_command_submission_compute_cp_copy_data(void) 1430{ 1431 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE); 1432} 1433 1434static void amdgpu_command_submission_compute(void) 1435{ 1436 /* write data using the CP */ 1437 amdgpu_command_submission_compute_cp_write_data(); 1438 /* const fill using the CP */ 1439 amdgpu_command_submission_compute_cp_const_fill(); 1440 /* copy data using the CP */ 1441 amdgpu_command_submission_compute_cp_copy_data(); 1442 /* nop test */ 1443 amdgpu_command_submission_compute_nop(); 1444} 1445 1446/* 1447 * caller need create/release: 1448 * pm4_src, resources, ib_info, and ibs_request 1449 * submit command stream described in ibs_request and wait for this IB accomplished 1450 */ 1451void 1452amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle, 1453 amdgpu_context_handle context_handle, 1454 unsigned ip_type, int instance, int pm4_dw, 1455 uint32_t *pm4_src, int res_cnt, 1456 amdgpu_bo_handle *resources, 1457 struct amdgpu_cs_ib_info *ib_info, 1458 struct amdgpu_cs_request *ibs_request, 1459 bool secure) 1460{ 1461 int r; 1462 uint32_t expired; 1463 uint32_t *ring_ptr; 1464 amdgpu_bo_handle ib_result_handle; 1465 void *ib_result_cpu; 1466 uint64_t ib_result_mc_address; 1467 struct amdgpu_cs_fence fence_status = {0}; 1468 amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1)); 1469 amdgpu_va_handle va_handle; 1470 1471 /* prepare CS */ 1472 CU_ASSERT_NOT_EQUAL(pm4_src, NULL); 1473 CU_ASSERT_NOT_EQUAL(resources, NULL); 1474 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1475 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1476 CU_ASSERT_TRUE(pm4_dw <= 1024); 1477 1478 /* allocate IB */ 1479 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 1480 AMDGPU_GEM_DOMAIN_GTT, 0, 1481 &ib_result_handle, &ib_result_cpu, 1482 &ib_result_mc_address, &va_handle); 1483 CU_ASSERT_EQUAL(r, 0); 1484 1485 /* copy PM4 packet to ring from caller */ 1486 ring_ptr = ib_result_cpu; 1487 memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src)); 1488 1489 ib_info->ib_mc_address = ib_result_mc_address; 1490 ib_info->size = pm4_dw; 1491 if (secure) 1492 ib_info->flags |= AMDGPU_IB_FLAGS_SECURE; 1493 1494 ibs_request->ip_type = ip_type; 1495 ibs_request->ring = instance; 1496 ibs_request->number_of_ibs = 1; 1497 ibs_request->ibs = ib_info; 1498 ibs_request->fence_info.handle = NULL; 1499 1500 memcpy(all_res, resources, sizeof(resources[0]) * res_cnt); 1501 all_res[res_cnt] = ib_result_handle; 1502 1503 r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res, 1504 NULL, &ibs_request->resources); 1505 CU_ASSERT_EQUAL(r, 0); 1506 1507 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1508 1509 /* submit CS */ 1510 r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1); 1511 CU_ASSERT_EQUAL(r, 0); 1512 1513 r = amdgpu_bo_list_destroy(ibs_request->resources); 1514 CU_ASSERT_EQUAL(r, 0); 1515 1516 fence_status.ip_type = ip_type; 1517 fence_status.ip_instance = 0; 1518 fence_status.ring = ibs_request->ring; 1519 fence_status.context = context_handle; 1520 fence_status.fence = ibs_request->seq_no; 1521 1522 /* wait for IB accomplished */ 1523 r = amdgpu_cs_query_fence_status(&fence_status, 1524 AMDGPU_TIMEOUT_INFINITE, 1525 0, &expired); 1526 CU_ASSERT_EQUAL(r, 0); 1527 CU_ASSERT_EQUAL(expired, true); 1528 1529 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 1530 ib_result_mc_address, 4096); 1531 CU_ASSERT_EQUAL(r, 0); 1532} 1533 1534static void 1535amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle, 1536 unsigned ip_type, int instance, int pm4_dw, 1537 uint32_t *pm4_src, int res_cnt, 1538 amdgpu_bo_handle *resources, 1539 struct amdgpu_cs_ib_info *ib_info, 1540 struct amdgpu_cs_request *ibs_request) 1541{ 1542 amdgpu_test_exec_cs_helper_raw(device_handle, context_handle, 1543 ip_type, instance, pm4_dw, pm4_src, 1544 res_cnt, resources, ib_info, 1545 ibs_request, false); 1546} 1547 1548void 1549amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle 1550 device, unsigned 1551 ip_type, bool secure) 1552{ 1553 const int sdma_write_length = 128; 1554 const int pm4_dw = 256; 1555 amdgpu_context_handle context_handle; 1556 amdgpu_bo_handle bo; 1557 amdgpu_bo_handle *resources; 1558 uint32_t *pm4; 1559 struct amdgpu_cs_ib_info *ib_info; 1560 struct amdgpu_cs_request *ibs_request; 1561 uint64_t bo_mc; 1562 volatile uint32_t *bo_cpu; 1563 uint32_t bo_cpu_origin; 1564 int i, j, r, loop, ring_id; 1565 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1566 amdgpu_va_handle va_handle; 1567 struct drm_amdgpu_info_hw_ip hw_ip_info; 1568 1569 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1570 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1571 1572 ib_info = calloc(1, sizeof(*ib_info)); 1573 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1574 1575 ibs_request = calloc(1, sizeof(*ibs_request)); 1576 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1577 1578 r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info); 1579 CU_ASSERT_EQUAL(r, 0); 1580 1581 for (i = 0; secure && (i < 2); i++) 1582 gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED; 1583 1584 r = amdgpu_cs_ctx_create(device, &context_handle); 1585 1586 CU_ASSERT_EQUAL(r, 0); 1587 1588 /* prepare resource */ 1589 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1590 CU_ASSERT_NOT_EQUAL(resources, NULL); 1591 1592 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1593 loop = 0; 1594 while(loop < 2) { 1595 /* allocate UC bo for sDMA use */ 1596 r = amdgpu_bo_alloc_and_map(device, 1597 sdma_write_length * sizeof(uint32_t), 1598 4096, AMDGPU_GEM_DOMAIN_GTT, 1599 gtt_flags[loop], &bo, (void**)&bo_cpu, 1600 &bo_mc, &va_handle); 1601 CU_ASSERT_EQUAL(r, 0); 1602 1603 /* clear bo */ 1604 memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t)); 1605 1606 resources[0] = bo; 1607 1608 /* fulfill PM4: test DMA write-linear */ 1609 i = j = 0; 1610 if (ip_type == AMDGPU_HW_IP_DMA) { 1611 if (family_id == AMDGPU_FAMILY_SI) 1612 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 1613 sdma_write_length); 1614 else 1615 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 1616 SDMA_WRITE_SUB_OPCODE_LINEAR, 1617 secure ? SDMA_ATOMIC_TMZ(1) : 0); 1618 pm4[i++] = 0xfffffffc & bo_mc; 1619 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1620 if (family_id >= AMDGPU_FAMILY_AI) 1621 pm4[i++] = sdma_write_length - 1; 1622 else if (family_id != AMDGPU_FAMILY_SI) 1623 pm4[i++] = sdma_write_length; 1624 while(j++ < sdma_write_length) 1625 pm4[i++] = 0xdeadbeaf; 1626 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1627 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1628 pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length); 1629 pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 1630 pm4[i++] = 0xfffffffc & bo_mc; 1631 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1632 while(j++ < sdma_write_length) 1633 pm4[i++] = 0xdeadbeaf; 1634 } 1635 1636 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1637 ip_type, ring_id, i, pm4, 1638 1, resources, ib_info, 1639 ibs_request, secure); 1640 1641 /* verify if SDMA test result meets with expected */ 1642 i = 0; 1643 if (!secure) { 1644 while(i < sdma_write_length) { 1645 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1646 } 1647 } else if (ip_type == AMDGPU_HW_IP_GFX) { 1648 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1649 pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7); 1650 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 1651 * command, 1-loop_until_compare_satisfied. 1652 * single_pass_atomic, 0-lru 1653 * engine_sel, 0-micro_engine 1654 */ 1655 pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 | 1656 ATOMIC_MEM_COMMAND(1) | 1657 ATOMIC_MEM_CACHEPOLICAY(0) | 1658 ATOMIC_MEM_ENGINESEL(0)); 1659 pm4[i++] = 0xfffffffc & bo_mc; 1660 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1661 pm4[i++] = 0x12345678; 1662 pm4[i++] = 0x0; 1663 pm4[i++] = 0xdeadbeaf; 1664 pm4[i++] = 0x0; 1665 pm4[i++] = 0x100; 1666 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1667 ip_type, ring_id, i, pm4, 1668 1, resources, ib_info, 1669 ibs_request, true); 1670 } else if (ip_type == AMDGPU_HW_IP_DMA) { 1671 /* restore the bo_cpu to compare */ 1672 bo_cpu_origin = bo_cpu[0]; 1673 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1674 /* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN 1675 * loop, 1-loop_until_compare_satisfied. 1676 * single_pass_atomic, 0-lru 1677 */ 1678 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 1679 0, 1680 SDMA_ATOMIC_LOOP(1) | 1681 SDMA_ATOMIC_TMZ(1) | 1682 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 1683 pm4[i++] = 0xfffffffc & bo_mc; 1684 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1685 pm4[i++] = 0x12345678; 1686 pm4[i++] = 0x0; 1687 pm4[i++] = 0xdeadbeaf; 1688 pm4[i++] = 0x0; 1689 pm4[i++] = 0x100; 1690 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1691 ip_type, ring_id, i, pm4, 1692 1, resources, ib_info, 1693 ibs_request, true); 1694 /* DMA's atomic behavir is unlike GFX 1695 * If the comparing data is not equal to destination data, 1696 * For GFX, loop again till gfx timeout(system hang). 1697 * For DMA, loop again till timer expired and then send interrupt. 1698 * So testcase can't use interrupt mechanism. 1699 * We take another way to verify. When the comparing data is not 1700 * equal to destination data, overwrite the source data to the destination 1701 * buffer. Otherwise, original destination data unchanged. 1702 * So if the bo_cpu data is overwritten, the result is passed. 1703 */ 1704 CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin); 1705 1706 /* compare again for the case of dest_data != cmp_data */ 1707 i = 0; 1708 /* restore again, here dest_data should be */ 1709 bo_cpu_origin = bo_cpu[0]; 1710 memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t)); 1711 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC, 1712 0, 1713 SDMA_ATOMIC_LOOP(1) | 1714 SDMA_ATOMIC_TMZ(1) | 1715 SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32)); 1716 pm4[i++] = 0xfffffffc & bo_mc; 1717 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1718 pm4[i++] = 0x87654321; 1719 pm4[i++] = 0x0; 1720 pm4[i++] = 0xdeadbeaf; 1721 pm4[i++] = 0x0; 1722 pm4[i++] = 0x100; 1723 amdgpu_test_exec_cs_helper_raw(device, context_handle, 1724 ip_type, ring_id, i, pm4, 1725 1, resources, ib_info, 1726 ibs_request, true); 1727 /* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/ 1728 CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin); 1729 } 1730 1731 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1732 sdma_write_length * sizeof(uint32_t)); 1733 CU_ASSERT_EQUAL(r, 0); 1734 loop++; 1735 } 1736 } 1737 /* clean resources */ 1738 free(resources); 1739 free(ibs_request); 1740 free(ib_info); 1741 free(pm4); 1742 1743 /* end of test */ 1744 r = amdgpu_cs_ctx_free(context_handle); 1745 CU_ASSERT_EQUAL(r, 0); 1746} 1747 1748static void amdgpu_command_submission_write_linear_helper(unsigned ip_type) 1749{ 1750 amdgpu_command_submission_write_linear_helper_with_secure(device_handle, 1751 ip_type, 1752 false); 1753} 1754 1755static void amdgpu_command_submission_sdma_write_linear(void) 1756{ 1757 amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA); 1758} 1759 1760static void amdgpu_command_submission_const_fill_helper(unsigned ip_type) 1761{ 1762 const int sdma_write_length = 1024 * 1024; 1763 const int pm4_dw = 256; 1764 amdgpu_context_handle context_handle; 1765 amdgpu_bo_handle bo; 1766 amdgpu_bo_handle *resources; 1767 uint32_t *pm4; 1768 struct amdgpu_cs_ib_info *ib_info; 1769 struct amdgpu_cs_request *ibs_request; 1770 uint64_t bo_mc; 1771 volatile uint32_t *bo_cpu; 1772 int i, j, r, loop, ring_id; 1773 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1774 amdgpu_va_handle va_handle; 1775 struct drm_amdgpu_info_hw_ip hw_ip_info; 1776 1777 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1778 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1779 1780 ib_info = calloc(1, sizeof(*ib_info)); 1781 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1782 1783 ibs_request = calloc(1, sizeof(*ibs_request)); 1784 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1785 1786 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1787 CU_ASSERT_EQUAL(r, 0); 1788 1789 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1790 CU_ASSERT_EQUAL(r, 0); 1791 1792 /* prepare resource */ 1793 resources = calloc(1, sizeof(amdgpu_bo_handle)); 1794 CU_ASSERT_NOT_EQUAL(resources, NULL); 1795 1796 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1797 loop = 0; 1798 while(loop < 2) { 1799 /* allocate UC bo for sDMA use */ 1800 r = amdgpu_bo_alloc_and_map(device_handle, 1801 sdma_write_length, 4096, 1802 AMDGPU_GEM_DOMAIN_GTT, 1803 gtt_flags[loop], &bo, (void**)&bo_cpu, 1804 &bo_mc, &va_handle); 1805 CU_ASSERT_EQUAL(r, 0); 1806 1807 /* clear bo */ 1808 memset((void*)bo_cpu, 0, sdma_write_length); 1809 1810 resources[0] = bo; 1811 1812 /* fulfill PM4: test DMA const fill */ 1813 i = j = 0; 1814 if (ip_type == AMDGPU_HW_IP_DMA) { 1815 if (family_id == AMDGPU_FAMILY_SI) { 1816 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI, 1817 0, 0, 0, 1818 sdma_write_length / 4); 1819 pm4[i++] = 0xfffffffc & bo_mc; 1820 pm4[i++] = 0xdeadbeaf; 1821 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16; 1822 } else { 1823 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 1824 SDMA_CONSTANT_FILL_EXTRA_SIZE(2)); 1825 pm4[i++] = 0xffffffff & bo_mc; 1826 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1827 pm4[i++] = 0xdeadbeaf; 1828 if (family_id >= AMDGPU_FAMILY_AI) 1829 pm4[i++] = sdma_write_length - 1; 1830 else 1831 pm4[i++] = sdma_write_length; 1832 } 1833 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1834 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1835 if (family_id == AMDGPU_FAMILY_SI) { 1836 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1837 pm4[i++] = 0xdeadbeaf; 1838 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1839 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1840 PACKET3_DMA_DATA_SI_SRC_SEL(2) | 1841 PACKET3_DMA_DATA_SI_CP_SYNC; 1842 pm4[i++] = 0xffffffff & bo_mc; 1843 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1844 pm4[i++] = sdma_write_length; 1845 } else { 1846 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 1847 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 1848 PACKET3_DMA_DATA_DST_SEL(0) | 1849 PACKET3_DMA_DATA_SRC_SEL(2) | 1850 PACKET3_DMA_DATA_CP_SYNC; 1851 pm4[i++] = 0xdeadbeaf; 1852 pm4[i++] = 0; 1853 pm4[i++] = 0xfffffffc & bo_mc; 1854 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 1855 pm4[i++] = sdma_write_length; 1856 } 1857 } 1858 1859 amdgpu_test_exec_cs_helper(context_handle, 1860 ip_type, ring_id, 1861 i, pm4, 1862 1, resources, 1863 ib_info, ibs_request); 1864 1865 /* verify if SDMA test result meets with expected */ 1866 i = 0; 1867 while(i < (sdma_write_length / 4)) { 1868 CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf); 1869 } 1870 1871 r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc, 1872 sdma_write_length); 1873 CU_ASSERT_EQUAL(r, 0); 1874 loop++; 1875 } 1876 } 1877 /* clean resources */ 1878 free(resources); 1879 free(ibs_request); 1880 free(ib_info); 1881 free(pm4); 1882 1883 /* end of test */ 1884 r = amdgpu_cs_ctx_free(context_handle); 1885 CU_ASSERT_EQUAL(r, 0); 1886} 1887 1888static void amdgpu_command_submission_sdma_const_fill(void) 1889{ 1890 amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA); 1891} 1892 1893static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type) 1894{ 1895 const int sdma_write_length = 1024; 1896 const int pm4_dw = 256; 1897 amdgpu_context_handle context_handle; 1898 amdgpu_bo_handle bo1, bo2; 1899 amdgpu_bo_handle *resources; 1900 uint32_t *pm4; 1901 struct amdgpu_cs_ib_info *ib_info; 1902 struct amdgpu_cs_request *ibs_request; 1903 uint64_t bo1_mc, bo2_mc; 1904 volatile unsigned char *bo1_cpu, *bo2_cpu; 1905 int i, j, r, loop1, loop2, ring_id; 1906 uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC}; 1907 amdgpu_va_handle bo1_va_handle, bo2_va_handle; 1908 struct drm_amdgpu_info_hw_ip hw_ip_info; 1909 1910 pm4 = calloc(pm4_dw, sizeof(*pm4)); 1911 CU_ASSERT_NOT_EQUAL(pm4, NULL); 1912 1913 ib_info = calloc(1, sizeof(*ib_info)); 1914 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 1915 1916 ibs_request = calloc(1, sizeof(*ibs_request)); 1917 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 1918 1919 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info); 1920 CU_ASSERT_EQUAL(r, 0); 1921 1922 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 1923 CU_ASSERT_EQUAL(r, 0); 1924 1925 /* prepare resource */ 1926 resources = calloc(2, sizeof(amdgpu_bo_handle)); 1927 CU_ASSERT_NOT_EQUAL(resources, NULL); 1928 1929 for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) { 1930 loop1 = loop2 = 0; 1931 /* run 9 circle to test all mapping combination */ 1932 while(loop1 < 2) { 1933 while(loop2 < 2) { 1934 /* allocate UC bo1for sDMA use */ 1935 r = amdgpu_bo_alloc_and_map(device_handle, 1936 sdma_write_length, 4096, 1937 AMDGPU_GEM_DOMAIN_GTT, 1938 gtt_flags[loop1], &bo1, 1939 (void**)&bo1_cpu, &bo1_mc, 1940 &bo1_va_handle); 1941 CU_ASSERT_EQUAL(r, 0); 1942 1943 /* set bo1 */ 1944 memset((void*)bo1_cpu, 0xaa, sdma_write_length); 1945 1946 /* allocate UC bo2 for sDMA use */ 1947 r = amdgpu_bo_alloc_and_map(device_handle, 1948 sdma_write_length, 4096, 1949 AMDGPU_GEM_DOMAIN_GTT, 1950 gtt_flags[loop2], &bo2, 1951 (void**)&bo2_cpu, &bo2_mc, 1952 &bo2_va_handle); 1953 CU_ASSERT_EQUAL(r, 0); 1954 1955 /* clear bo2 */ 1956 memset((void*)bo2_cpu, 0, sdma_write_length); 1957 1958 resources[0] = bo1; 1959 resources[1] = bo2; 1960 1961 /* fulfill PM4: test DMA copy linear */ 1962 i = j = 0; 1963 if (ip_type == AMDGPU_HW_IP_DMA) { 1964 if (family_id == AMDGPU_FAMILY_SI) { 1965 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 1966 0, 0, 0, 1967 sdma_write_length); 1968 pm4[i++] = 0xffffffff & bo2_mc; 1969 pm4[i++] = 0xffffffff & bo1_mc; 1970 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1971 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1972 } else { 1973 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, 1974 SDMA_COPY_SUB_OPCODE_LINEAR, 1975 0); 1976 if (family_id >= AMDGPU_FAMILY_AI) 1977 pm4[i++] = sdma_write_length - 1; 1978 else 1979 pm4[i++] = sdma_write_length; 1980 pm4[i++] = 0; 1981 pm4[i++] = 0xffffffff & bo1_mc; 1982 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 1983 pm4[i++] = 0xffffffff & bo2_mc; 1984 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1985 } 1986 } else if ((ip_type == AMDGPU_HW_IP_GFX) || 1987 (ip_type == AMDGPU_HW_IP_COMPUTE)) { 1988 if (family_id == AMDGPU_FAMILY_SI) { 1989 pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4); 1990 pm4[i++] = 0xfffffffc & bo1_mc; 1991 pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) | 1992 PACKET3_DMA_DATA_SI_DST_SEL(0) | 1993 PACKET3_DMA_DATA_SI_SRC_SEL(0) | 1994 PACKET3_DMA_DATA_SI_CP_SYNC | 1995 (0xffff00000000 & bo1_mc) >> 32; 1996 pm4[i++] = 0xfffffffc & bo2_mc; 1997 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 1998 pm4[i++] = sdma_write_length; 1999 } else { 2000 pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5); 2001 pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) | 2002 PACKET3_DMA_DATA_DST_SEL(0) | 2003 PACKET3_DMA_DATA_SRC_SEL(0) | 2004 PACKET3_DMA_DATA_CP_SYNC; 2005 pm4[i++] = 0xfffffffc & bo1_mc; 2006 pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32; 2007 pm4[i++] = 0xfffffffc & bo2_mc; 2008 pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32; 2009 pm4[i++] = sdma_write_length; 2010 } 2011 } 2012 2013 amdgpu_test_exec_cs_helper(context_handle, 2014 ip_type, ring_id, 2015 i, pm4, 2016 2, resources, 2017 ib_info, ibs_request); 2018 2019 /* verify if SDMA test result meets with expected */ 2020 i = 0; 2021 while(i < sdma_write_length) { 2022 CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa); 2023 } 2024 r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc, 2025 sdma_write_length); 2026 CU_ASSERT_EQUAL(r, 0); 2027 r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc, 2028 sdma_write_length); 2029 CU_ASSERT_EQUAL(r, 0); 2030 loop2++; 2031 } 2032 loop1++; 2033 } 2034 } 2035 /* clean resources */ 2036 free(resources); 2037 free(ibs_request); 2038 free(ib_info); 2039 free(pm4); 2040 2041 /* end of test */ 2042 r = amdgpu_cs_ctx_free(context_handle); 2043 CU_ASSERT_EQUAL(r, 0); 2044} 2045 2046static void amdgpu_command_submission_sdma_copy_linear(void) 2047{ 2048 amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA); 2049} 2050 2051static void amdgpu_command_submission_sdma(void) 2052{ 2053 amdgpu_command_submission_sdma_write_linear(); 2054 amdgpu_command_submission_sdma_const_fill(); 2055 amdgpu_command_submission_sdma_copy_linear(); 2056} 2057 2058static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all) 2059{ 2060 amdgpu_context_handle context_handle; 2061 amdgpu_bo_handle ib_result_handle, ib_result_ce_handle; 2062 void *ib_result_cpu, *ib_result_ce_cpu; 2063 uint64_t ib_result_mc_address, ib_result_ce_mc_address; 2064 struct amdgpu_cs_request ibs_request[2] = {0}; 2065 struct amdgpu_cs_ib_info ib_info[2]; 2066 struct amdgpu_cs_fence fence_status[2] = {0}; 2067 uint32_t *ptr; 2068 uint32_t expired; 2069 amdgpu_bo_list_handle bo_list; 2070 amdgpu_va_handle va_handle, va_handle_ce; 2071 int r; 2072 int i = 0, ib_cs_num = 2; 2073 2074 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2075 CU_ASSERT_EQUAL(r, 0); 2076 2077 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 2078 AMDGPU_GEM_DOMAIN_GTT, 0, 2079 &ib_result_handle, &ib_result_cpu, 2080 &ib_result_mc_address, &va_handle); 2081 CU_ASSERT_EQUAL(r, 0); 2082 2083 r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096, 2084 AMDGPU_GEM_DOMAIN_GTT, 0, 2085 &ib_result_ce_handle, &ib_result_ce_cpu, 2086 &ib_result_ce_mc_address, &va_handle_ce); 2087 CU_ASSERT_EQUAL(r, 0); 2088 2089 r = amdgpu_get_bo_list(device_handle, ib_result_handle, 2090 ib_result_ce_handle, &bo_list); 2091 CU_ASSERT_EQUAL(r, 0); 2092 2093 memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info)); 2094 2095 /* IT_SET_CE_DE_COUNTERS */ 2096 ptr = ib_result_ce_cpu; 2097 if (family_id != AMDGPU_FAMILY_SI) { 2098 ptr[i++] = 0xc0008900; 2099 ptr[i++] = 0; 2100 } 2101 ptr[i++] = 0xc0008400; 2102 ptr[i++] = 1; 2103 ib_info[0].ib_mc_address = ib_result_ce_mc_address; 2104 ib_info[0].size = i; 2105 ib_info[0].flags = AMDGPU_IB_FLAG_CE; 2106 2107 /* IT_WAIT_ON_CE_COUNTER */ 2108 ptr = ib_result_cpu; 2109 ptr[0] = 0xc0008600; 2110 ptr[1] = 0x00000001; 2111 ib_info[1].ib_mc_address = ib_result_mc_address; 2112 ib_info[1].size = 2; 2113 2114 for (i = 0; i < ib_cs_num; i++) { 2115 ibs_request[i].ip_type = AMDGPU_HW_IP_GFX; 2116 ibs_request[i].number_of_ibs = 2; 2117 ibs_request[i].ibs = ib_info; 2118 ibs_request[i].resources = bo_list; 2119 ibs_request[i].fence_info.handle = NULL; 2120 } 2121 2122 r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num); 2123 2124 CU_ASSERT_EQUAL(r, 0); 2125 2126 for (i = 0; i < ib_cs_num; i++) { 2127 fence_status[i].context = context_handle; 2128 fence_status[i].ip_type = AMDGPU_HW_IP_GFX; 2129 fence_status[i].fence = ibs_request[i].seq_no; 2130 } 2131 2132 r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all, 2133 AMDGPU_TIMEOUT_INFINITE, 2134 &expired, NULL); 2135 CU_ASSERT_EQUAL(r, 0); 2136 2137 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2138 ib_result_mc_address, 4096); 2139 CU_ASSERT_EQUAL(r, 0); 2140 2141 r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce, 2142 ib_result_ce_mc_address, 4096); 2143 CU_ASSERT_EQUAL(r, 0); 2144 2145 r = amdgpu_bo_list_destroy(bo_list); 2146 CU_ASSERT_EQUAL(r, 0); 2147 2148 r = amdgpu_cs_ctx_free(context_handle); 2149 CU_ASSERT_EQUAL(r, 0); 2150} 2151 2152static void amdgpu_command_submission_multi_fence(void) 2153{ 2154 amdgpu_command_submission_multi_fence_wait_all(true); 2155 amdgpu_command_submission_multi_fence_wait_all(false); 2156} 2157 2158static void amdgpu_userptr_test(void) 2159{ 2160 int i, r, j; 2161 uint32_t *pm4 = NULL; 2162 uint64_t bo_mc; 2163 void *ptr = NULL; 2164 int pm4_dw = 256; 2165 int sdma_write_length = 4; 2166 amdgpu_bo_handle handle; 2167 amdgpu_context_handle context_handle; 2168 struct amdgpu_cs_ib_info *ib_info; 2169 struct amdgpu_cs_request *ibs_request; 2170 amdgpu_bo_handle buf_handle; 2171 amdgpu_va_handle va_handle; 2172 2173 pm4 = calloc(pm4_dw, sizeof(*pm4)); 2174 CU_ASSERT_NOT_EQUAL(pm4, NULL); 2175 2176 ib_info = calloc(1, sizeof(*ib_info)); 2177 CU_ASSERT_NOT_EQUAL(ib_info, NULL); 2178 2179 ibs_request = calloc(1, sizeof(*ibs_request)); 2180 CU_ASSERT_NOT_EQUAL(ibs_request, NULL); 2181 2182 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2183 CU_ASSERT_EQUAL(r, 0); 2184 2185 posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE); 2186 CU_ASSERT_NOT_EQUAL(ptr, NULL); 2187 memset(ptr, 0, BUFFER_SIZE); 2188 2189 r = amdgpu_create_bo_from_user_mem(device_handle, 2190 ptr, BUFFER_SIZE, &buf_handle); 2191 CU_ASSERT_EQUAL(r, 0); 2192 2193 r = amdgpu_va_range_alloc(device_handle, 2194 amdgpu_gpu_va_range_general, 2195 BUFFER_SIZE, 1, 0, &bo_mc, 2196 &va_handle, 0); 2197 CU_ASSERT_EQUAL(r, 0); 2198 2199 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP); 2200 CU_ASSERT_EQUAL(r, 0); 2201 2202 handle = buf_handle; 2203 2204 j = i = 0; 2205 2206 if (family_id == AMDGPU_FAMILY_SI) 2207 pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0, 2208 sdma_write_length); 2209 else 2210 pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE, 2211 SDMA_WRITE_SUB_OPCODE_LINEAR, 0); 2212 pm4[i++] = 0xffffffff & bo_mc; 2213 pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32; 2214 if (family_id >= AMDGPU_FAMILY_AI) 2215 pm4[i++] = sdma_write_length - 1; 2216 else if (family_id != AMDGPU_FAMILY_SI) 2217 pm4[i++] = sdma_write_length; 2218 2219 while (j++ < sdma_write_length) 2220 pm4[i++] = 0xdeadbeaf; 2221 2222 if (!fork()) { 2223 pm4[0] = 0x0; 2224 exit(0); 2225 } 2226 2227 amdgpu_test_exec_cs_helper(context_handle, 2228 AMDGPU_HW_IP_DMA, 0, 2229 i, pm4, 2230 1, &handle, 2231 ib_info, ibs_request); 2232 i = 0; 2233 while (i < sdma_write_length) { 2234 CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf); 2235 } 2236 free(ibs_request); 2237 free(ib_info); 2238 free(pm4); 2239 2240 r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP); 2241 CU_ASSERT_EQUAL(r, 0); 2242 r = amdgpu_va_range_free(va_handle); 2243 CU_ASSERT_EQUAL(r, 0); 2244 r = amdgpu_bo_free(buf_handle); 2245 CU_ASSERT_EQUAL(r, 0); 2246 free(ptr); 2247 2248 r = amdgpu_cs_ctx_free(context_handle); 2249 CU_ASSERT_EQUAL(r, 0); 2250 2251 wait(NULL); 2252} 2253 2254static void amdgpu_sync_dependency_test(void) 2255{ 2256 amdgpu_context_handle context_handle[2]; 2257 amdgpu_bo_handle ib_result_handle; 2258 void *ib_result_cpu; 2259 uint64_t ib_result_mc_address; 2260 struct amdgpu_cs_request ibs_request; 2261 struct amdgpu_cs_ib_info ib_info; 2262 struct amdgpu_cs_fence fence_status; 2263 uint32_t expired; 2264 int i, j, r; 2265 amdgpu_bo_list_handle bo_list; 2266 amdgpu_va_handle va_handle; 2267 static uint32_t *ptr; 2268 uint64_t seq_no; 2269 2270 r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]); 2271 CU_ASSERT_EQUAL(r, 0); 2272 r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]); 2273 CU_ASSERT_EQUAL(r, 0); 2274 2275 r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096, 2276 AMDGPU_GEM_DOMAIN_GTT, 0, 2277 &ib_result_handle, &ib_result_cpu, 2278 &ib_result_mc_address, &va_handle); 2279 CU_ASSERT_EQUAL(r, 0); 2280 2281 r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL, 2282 &bo_list); 2283 CU_ASSERT_EQUAL(r, 0); 2284 2285 ptr = ib_result_cpu; 2286 i = 0; 2287 2288 memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin)); 2289 2290 /* Dispatch minimal init config and verify it's executed */ 2291 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2292 ptr[i++] = 0x80000000; 2293 ptr[i++] = 0x80000000; 2294 2295 ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0); 2296 ptr[i++] = 0x80000000; 2297 2298 2299 /* Program compute regs */ 2300 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2301 ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START; 2302 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8; 2303 ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40; 2304 2305 2306 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2307 ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START; 2308 /* 2309 * 002c0040 COMPUTE_PGM_RSRC1 <- VGPRS = 0 2310 SGPRS = 1 2311 PRIORITY = 0 2312 FLOAT_MODE = 192 (0xc0) 2313 PRIV = 0 2314 DX10_CLAMP = 1 2315 DEBUG_MODE = 0 2316 IEEE_MODE = 0 2317 BULKY = 0 2318 CDBG_USER = 0 2319 * 2320 */ 2321 ptr[i++] = 0x002c0040; 2322 2323 2324 /* 2325 * 00000010 COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0 2326 USER_SGPR = 8 2327 TRAP_PRESENT = 0 2328 TGID_X_EN = 0 2329 TGID_Y_EN = 0 2330 TGID_Z_EN = 0 2331 TG_SIZE_EN = 0 2332 TIDIG_COMP_CNT = 0 2333 EXCP_EN_MSB = 0 2334 LDS_SIZE = 0 2335 EXCP_EN = 0 2336 * 2337 */ 2338 ptr[i++] = 0x00000010; 2339 2340 2341/* 2342 * 00000100 COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100) 2343 WAVESIZE = 0 2344 * 2345 */ 2346 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2347 ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START; 2348 ptr[i++] = 0x00000100; 2349 2350 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 2351 ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START; 2352 ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4); 2353 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2354 2355 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 2356 ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START; 2357 ptr[i++] = 0; 2358 2359 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 2360 ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START; 2361 ptr[i++] = 1; 2362 ptr[i++] = 1; 2363 ptr[i++] = 1; 2364 2365 2366 /* Dispatch */ 2367 ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); 2368 ptr[i++] = 1; 2369 ptr[i++] = 1; 2370 ptr[i++] = 1; 2371 ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */ 2372 2373 2374 while (i & 7) 2375 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2376 2377 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2378 ib_info.ib_mc_address = ib_result_mc_address; 2379 ib_info.size = i; 2380 2381 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2382 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2383 ibs_request.ring = 0; 2384 ibs_request.number_of_ibs = 1; 2385 ibs_request.ibs = &ib_info; 2386 ibs_request.resources = bo_list; 2387 ibs_request.fence_info.handle = NULL; 2388 2389 r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1); 2390 CU_ASSERT_EQUAL(r, 0); 2391 seq_no = ibs_request.seq_no; 2392 2393 2394 2395 /* Prepare second command with dependency on the first */ 2396 j = i; 2397 ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3); 2398 ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; 2399 ptr[i++] = 0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4); 2400 ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32; 2401 ptr[i++] = 99; 2402 2403 while (i & 7) 2404 ptr[i++] = 0xffff1000; /* type3 nop packet */ 2405 2406 memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info)); 2407 ib_info.ib_mc_address = ib_result_mc_address + j * 4; 2408 ib_info.size = i - j; 2409 2410 memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request)); 2411 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 2412 ibs_request.ring = 0; 2413 ibs_request.number_of_ibs = 1; 2414 ibs_request.ibs = &ib_info; 2415 ibs_request.resources = bo_list; 2416 ibs_request.fence_info.handle = NULL; 2417 2418 ibs_request.number_of_dependencies = 1; 2419 2420 ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies)); 2421 ibs_request.dependencies[0].context = context_handle[1]; 2422 ibs_request.dependencies[0].ip_instance = 0; 2423 ibs_request.dependencies[0].ring = 0; 2424 ibs_request.dependencies[0].fence = seq_no; 2425 2426 2427 r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1); 2428 CU_ASSERT_EQUAL(r, 0); 2429 2430 2431 memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence)); 2432 fence_status.context = context_handle[0]; 2433 fence_status.ip_type = AMDGPU_HW_IP_GFX; 2434 fence_status.ip_instance = 0; 2435 fence_status.ring = 0; 2436 fence_status.fence = ibs_request.seq_no; 2437 2438 r = amdgpu_cs_query_fence_status(&fence_status, 2439 AMDGPU_TIMEOUT_INFINITE,0, &expired); 2440 CU_ASSERT_EQUAL(r, 0); 2441 2442 /* Expect the second command to wait for shader to complete */ 2443 CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99); 2444 2445 r = amdgpu_bo_list_destroy(bo_list); 2446 CU_ASSERT_EQUAL(r, 0); 2447 2448 r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle, 2449 ib_result_mc_address, 4096); 2450 CU_ASSERT_EQUAL(r, 0); 2451 2452 r = amdgpu_cs_ctx_free(context_handle[0]); 2453 CU_ASSERT_EQUAL(r, 0); 2454 r = amdgpu_cs_ctx_free(context_handle[1]); 2455 CU_ASSERT_EQUAL(r, 0); 2456 2457 free(ibs_request.dependencies); 2458} 2459 2460static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family) 2461{ 2462 struct amdgpu_test_shader *shader; 2463 int i, loop = 0x10000; 2464 2465 switch (family) { 2466 case AMDGPU_FAMILY_AI: 2467 shader = &memcpy_cs_hang_slow_ai; 2468 break; 2469 case AMDGPU_FAMILY_RV: 2470 shader = &memcpy_cs_hang_slow_rv; 2471 break; 2472 case AMDGPU_FAMILY_NV: 2473 shader = &memcpy_cs_hang_slow_nv; 2474 break; 2475 default: 2476 return -1; 2477 break; 2478 } 2479 2480 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 2481 2482 for (i = 0; i < loop; i++) 2483 memcpy(ptr + shader->header_length + shader->body_length * i, 2484 shader->shader + shader->header_length, 2485 shader->body_length * sizeof(uint32_t)); 2486 2487 memcpy(ptr + shader->header_length + shader->body_length * loop, 2488 shader->shader + shader->header_length + shader->body_length, 2489 shader->foot_length * sizeof(uint32_t)); 2490 2491 return 0; 2492} 2493 2494static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr, 2495 int cs_type, 2496 uint32_t version) 2497{ 2498 uint32_t shader_size; 2499 const uint32_t *shader; 2500 2501 switch (cs_type) { 2502 case CS_BUFFERCLEAR: 2503 if (version == 9) { 2504 shader = bufferclear_cs_shader_gfx9; 2505 shader_size = sizeof(bufferclear_cs_shader_gfx9); 2506 } else if (version == 10) { 2507 shader = bufferclear_cs_shader_gfx10; 2508 shader_size = sizeof(bufferclear_cs_shader_gfx10); 2509 } 2510 break; 2511 case CS_BUFFERCOPY: 2512 if (version == 9) { 2513 shader = buffercopy_cs_shader_gfx9; 2514 shader_size = sizeof(buffercopy_cs_shader_gfx9); 2515 } else if (version == 10) { 2516 shader = buffercopy_cs_shader_gfx10; 2517 shader_size = sizeof(buffercopy_cs_shader_gfx10); 2518 } 2519 break; 2520 case CS_HANG: 2521 shader = memcpy_ps_hang; 2522 shader_size = sizeof(memcpy_ps_hang); 2523 break; 2524 default: 2525 return -1; 2526 break; 2527 } 2528 2529 memcpy(ptr, shader, shader_size); 2530 return 0; 2531} 2532 2533static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type, uint32_t version) 2534{ 2535 int i = 0; 2536 2537 /* Write context control and load shadowing register if necessary */ 2538 if (ip_type == AMDGPU_HW_IP_GFX) { 2539 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 2540 ptr[i++] = 0x80000000; 2541 ptr[i++] = 0x80000000; 2542 } 2543 2544 /* Issue commands to set default compute state. */ 2545 /* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */ 2546 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3); 2547 ptr[i++] = 0x204; 2548 i += 3; 2549 2550 /* clear mmCOMPUTE_TMPRING_SIZE */ 2551 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2552 ptr[i++] = 0x218; 2553 ptr[i++] = 0; 2554 2555 /* Set new sh registers in GFX10 to 0 */ 2556 if (version == 10) { 2557 /* mmCOMPUTE_SHADER_CHKSUM */ 2558 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2559 ptr[i++] = 0x22a; 2560 ptr[i++] = 0; 2561 /* mmCOMPUTE_REQ_CTRL */ 2562 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 6); 2563 ptr[i++] = 0x222; 2564 i += 6; 2565 /* mmCP_COHER_START_DELAY */ 2566 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 2567 ptr[i++] = 0x7b; 2568 ptr[i++] = 0x20; 2569 } 2570 return i; 2571} 2572 2573static int amdgpu_dispatch_write_cumask(uint32_t *ptr, uint32_t version) 2574{ 2575 int i = 0; 2576 2577 /* Issue commands to set cu mask used in current dispatch */ 2578 if (version == 9) { 2579 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 2580 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2581 ptr[i++] = 0x216; 2582 ptr[i++] = 0xffffffff; 2583 ptr[i++] = 0xffffffff; 2584 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 2585 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2586 ptr[i++] = 0x219; 2587 ptr[i++] = 0xffffffff; 2588 ptr[i++] = 0xffffffff; 2589 } else if (version == 10) { 2590 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */ 2591 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2); 2592 ptr[i++] = 0x30000216; 2593 ptr[i++] = 0xffffffff; 2594 ptr[i++] = 0xffffffff; 2595 /* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */ 2596 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2); 2597 ptr[i++] = 0x30000219; 2598 ptr[i++] = 0xffffffff; 2599 ptr[i++] = 0xffffffff; 2600 } 2601 2602 return i; 2603} 2604 2605static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr, uint32_t version) 2606{ 2607 int i, j; 2608 2609 i = 0; 2610 2611 /* Writes shader state to HW */ 2612 /* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */ 2613 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2); 2614 ptr[i++] = 0x20c; 2615 ptr[i++] = (shader_addr >> 8); 2616 ptr[i++] = (shader_addr >> 40); 2617 /* write sh regs*/ 2618 for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) { 2619 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2620 /* - Gfx9ShRegBase */ 2621 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00; 2622 ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1]; 2623 } 2624 2625 if (version == 10) { 2626 /* mmCOMPUTE_PGM_RSRC3 */ 2627 ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2628 ptr[i++] = 0x228; 2629 ptr[i++] = 0; 2630 } 2631 2632 return i; 2633} 2634 2635static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle, 2636 uint32_t ip_type, 2637 uint32_t ring, 2638 uint32_t version) 2639{ 2640 amdgpu_context_handle context_handle; 2641 amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3]; 2642 volatile unsigned char *ptr_dst; 2643 void *ptr_shader; 2644 uint32_t *ptr_cmd; 2645 uint64_t mc_address_dst, mc_address_shader, mc_address_cmd; 2646 amdgpu_va_handle va_dst, va_shader, va_cmd; 2647 int i, r; 2648 int bo_dst_size = 16384; 2649 int bo_shader_size = 4096; 2650 int bo_cmd_size = 4096; 2651 struct amdgpu_cs_request ibs_request = {0}; 2652 struct amdgpu_cs_ib_info ib_info= {0}; 2653 amdgpu_bo_list_handle bo_list; 2654 struct amdgpu_cs_fence fence_status = {0}; 2655 uint32_t expired; 2656 2657 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2658 CU_ASSERT_EQUAL(r, 0); 2659 2660 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2661 AMDGPU_GEM_DOMAIN_GTT, 0, 2662 &bo_cmd, (void **)&ptr_cmd, 2663 &mc_address_cmd, &va_cmd); 2664 CU_ASSERT_EQUAL(r, 0); 2665 memset(ptr_cmd, 0, bo_cmd_size); 2666 2667 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2668 AMDGPU_GEM_DOMAIN_VRAM, 0, 2669 &bo_shader, &ptr_shader, 2670 &mc_address_shader, &va_shader); 2671 CU_ASSERT_EQUAL(r, 0); 2672 memset(ptr_shader, 0, bo_shader_size); 2673 2674 r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR, version); 2675 CU_ASSERT_EQUAL(r, 0); 2676 2677 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2678 AMDGPU_GEM_DOMAIN_VRAM, 0, 2679 &bo_dst, (void **)&ptr_dst, 2680 &mc_address_dst, &va_dst); 2681 CU_ASSERT_EQUAL(r, 0); 2682 2683 i = 0; 2684 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); 2685 2686 /* Issue commands to set cu mask used in current dispatch */ 2687 i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); 2688 2689 /* Writes shader state to HW */ 2690 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); 2691 2692 /* Write constant data */ 2693 /* Writes the UAV constant data to the SGPRs. */ 2694 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2695 ptr_cmd[i++] = 0x240; 2696 ptr_cmd[i++] = mc_address_dst; 2697 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2698 ptr_cmd[i++] = 0x400; 2699 if (version == 9) 2700 ptr_cmd[i++] = 0x74fac; 2701 else if (version == 10) 2702 ptr_cmd[i++] = 0x1104bfac; 2703 2704 /* Sets a range of pixel shader constants */ 2705 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2706 ptr_cmd[i++] = 0x244; 2707 ptr_cmd[i++] = 0x22222222; 2708 ptr_cmd[i++] = 0x22222222; 2709 ptr_cmd[i++] = 0x22222222; 2710 ptr_cmd[i++] = 0x22222222; 2711 2712 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2713 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2714 ptr_cmd[i++] = 0x215; 2715 ptr_cmd[i++] = 0; 2716 2717 /* dispatch direct command */ 2718 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2719 ptr_cmd[i++] = 0x10; 2720 ptr_cmd[i++] = 1; 2721 ptr_cmd[i++] = 1; 2722 ptr_cmd[i++] = 1; 2723 2724 while (i & 7) 2725 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2726 2727 resources[0] = bo_dst; 2728 resources[1] = bo_shader; 2729 resources[2] = bo_cmd; 2730 r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list); 2731 CU_ASSERT_EQUAL(r, 0); 2732 2733 ib_info.ib_mc_address = mc_address_cmd; 2734 ib_info.size = i; 2735 ibs_request.ip_type = ip_type; 2736 ibs_request.ring = ring; 2737 ibs_request.resources = bo_list; 2738 ibs_request.number_of_ibs = 1; 2739 ibs_request.ibs = &ib_info; 2740 ibs_request.fence_info.handle = NULL; 2741 2742 /* submit CS */ 2743 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2744 CU_ASSERT_EQUAL(r, 0); 2745 2746 r = amdgpu_bo_list_destroy(bo_list); 2747 CU_ASSERT_EQUAL(r, 0); 2748 2749 fence_status.ip_type = ip_type; 2750 fence_status.ip_instance = 0; 2751 fence_status.ring = ring; 2752 fence_status.context = context_handle; 2753 fence_status.fence = ibs_request.seq_no; 2754 2755 /* wait for IB accomplished */ 2756 r = amdgpu_cs_query_fence_status(&fence_status, 2757 AMDGPU_TIMEOUT_INFINITE, 2758 0, &expired); 2759 CU_ASSERT_EQUAL(r, 0); 2760 CU_ASSERT_EQUAL(expired, true); 2761 2762 /* verify if memset test result meets with expected */ 2763 i = 0; 2764 while(i < bo_dst_size) { 2765 CU_ASSERT_EQUAL(ptr_dst[i++], 0x22); 2766 } 2767 2768 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2769 CU_ASSERT_EQUAL(r, 0); 2770 2771 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2772 CU_ASSERT_EQUAL(r, 0); 2773 2774 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2775 CU_ASSERT_EQUAL(r, 0); 2776 2777 r = amdgpu_cs_ctx_free(context_handle); 2778 CU_ASSERT_EQUAL(r, 0); 2779} 2780 2781static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle, 2782 uint32_t ip_type, 2783 uint32_t ring, 2784 uint32_t version, 2785 int hang) 2786{ 2787 amdgpu_context_handle context_handle; 2788 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 2789 volatile unsigned char *ptr_dst; 2790 void *ptr_shader; 2791 unsigned char *ptr_src; 2792 uint32_t *ptr_cmd; 2793 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 2794 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 2795 int i, r; 2796 int bo_dst_size = 16384; 2797 int bo_shader_size = 4096; 2798 int bo_cmd_size = 4096; 2799 struct amdgpu_cs_request ibs_request = {0}; 2800 struct amdgpu_cs_ib_info ib_info= {0}; 2801 uint32_t expired, hang_state, hangs; 2802 enum cs_type cs_type; 2803 amdgpu_bo_list_handle bo_list; 2804 struct amdgpu_cs_fence fence_status = {0}; 2805 2806 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 2807 CU_ASSERT_EQUAL(r, 0); 2808 2809 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 2810 AMDGPU_GEM_DOMAIN_GTT, 0, 2811 &bo_cmd, (void **)&ptr_cmd, 2812 &mc_address_cmd, &va_cmd); 2813 CU_ASSERT_EQUAL(r, 0); 2814 memset(ptr_cmd, 0, bo_cmd_size); 2815 2816 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 2817 AMDGPU_GEM_DOMAIN_VRAM, 0, 2818 &bo_shader, &ptr_shader, 2819 &mc_address_shader, &va_shader); 2820 CU_ASSERT_EQUAL(r, 0); 2821 memset(ptr_shader, 0, bo_shader_size); 2822 2823 cs_type = hang ? CS_HANG : CS_BUFFERCOPY; 2824 r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version); 2825 CU_ASSERT_EQUAL(r, 0); 2826 2827 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2828 AMDGPU_GEM_DOMAIN_VRAM, 0, 2829 &bo_src, (void **)&ptr_src, 2830 &mc_address_src, &va_src); 2831 CU_ASSERT_EQUAL(r, 0); 2832 2833 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 2834 AMDGPU_GEM_DOMAIN_VRAM, 0, 2835 &bo_dst, (void **)&ptr_dst, 2836 &mc_address_dst, &va_dst); 2837 CU_ASSERT_EQUAL(r, 0); 2838 2839 memset(ptr_src, 0x55, bo_dst_size); 2840 2841 i = 0; 2842 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); 2843 2844 /* Issue commands to set cu mask used in current dispatch */ 2845 i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); 2846 2847 /* Writes shader state to HW */ 2848 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); 2849 2850 /* Write constant data */ 2851 /* Writes the texture resource constants data to the SGPRs */ 2852 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2853 ptr_cmd[i++] = 0x240; 2854 ptr_cmd[i++] = mc_address_src; 2855 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 2856 ptr_cmd[i++] = 0x400; 2857 if (version == 9) 2858 ptr_cmd[i++] = 0x74fac; 2859 else if (version == 10) 2860 ptr_cmd[i++] = 0x1104bfac; 2861 2862 /* Writes the UAV constant data to the SGPRs. */ 2863 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 2864 ptr_cmd[i++] = 0x244; 2865 ptr_cmd[i++] = mc_address_dst; 2866 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 2867 ptr_cmd[i++] = 0x400; 2868 if (version == 9) 2869 ptr_cmd[i++] = 0x74fac; 2870 else if (version == 10) 2871 ptr_cmd[i++] = 0x1104bfac; 2872 2873 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 2874 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 2875 ptr_cmd[i++] = 0x215; 2876 ptr_cmd[i++] = 0; 2877 2878 /* dispatch direct command */ 2879 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 2880 ptr_cmd[i++] = 0x10; 2881 ptr_cmd[i++] = 1; 2882 ptr_cmd[i++] = 1; 2883 ptr_cmd[i++] = 1; 2884 2885 while (i & 7) 2886 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 2887 2888 resources[0] = bo_shader; 2889 resources[1] = bo_src; 2890 resources[2] = bo_dst; 2891 resources[3] = bo_cmd; 2892 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 2893 CU_ASSERT_EQUAL(r, 0); 2894 2895 ib_info.ib_mc_address = mc_address_cmd; 2896 ib_info.size = i; 2897 ibs_request.ip_type = ip_type; 2898 ibs_request.ring = ring; 2899 ibs_request.resources = bo_list; 2900 ibs_request.number_of_ibs = 1; 2901 ibs_request.ibs = &ib_info; 2902 ibs_request.fence_info.handle = NULL; 2903 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 2904 CU_ASSERT_EQUAL(r, 0); 2905 2906 fence_status.ip_type = ip_type; 2907 fence_status.ip_instance = 0; 2908 fence_status.ring = ring; 2909 fence_status.context = context_handle; 2910 fence_status.fence = ibs_request.seq_no; 2911 2912 /* wait for IB accomplished */ 2913 r = amdgpu_cs_query_fence_status(&fence_status, 2914 AMDGPU_TIMEOUT_INFINITE, 2915 0, &expired); 2916 2917 if (!hang) { 2918 CU_ASSERT_EQUAL(r, 0); 2919 CU_ASSERT_EQUAL(expired, true); 2920 2921 /* verify if memcpy test result meets with expected */ 2922 i = 0; 2923 while(i < bo_dst_size) { 2924 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 2925 i++; 2926 } 2927 } else { 2928 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 2929 CU_ASSERT_EQUAL(r, 0); 2930 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 2931 } 2932 2933 r = amdgpu_bo_list_destroy(bo_list); 2934 CU_ASSERT_EQUAL(r, 0); 2935 2936 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 2937 CU_ASSERT_EQUAL(r, 0); 2938 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 2939 CU_ASSERT_EQUAL(r, 0); 2940 2941 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 2942 CU_ASSERT_EQUAL(r, 0); 2943 2944 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 2945 CU_ASSERT_EQUAL(r, 0); 2946 2947 r = amdgpu_cs_ctx_free(context_handle); 2948 CU_ASSERT_EQUAL(r, 0); 2949} 2950 2951static void amdgpu_compute_dispatch_test(void) 2952{ 2953 int r; 2954 struct drm_amdgpu_info_hw_ip info; 2955 uint32_t ring_id, version; 2956 2957 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info); 2958 CU_ASSERT_EQUAL(r, 0); 2959 if (!info.available_rings) 2960 printf("SKIP ... as there's no compute ring\n"); 2961 2962 version = info.hw_ip_version_major; 2963 if (version != 9 && version != 10) { 2964 printf("SKIP ... unsupported gfx version %d\n", version); 2965 return; 2966 } 2967 2968 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2969 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version); 2970 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version, 0); 2971 } 2972} 2973 2974static void amdgpu_gfx_dispatch_test(void) 2975{ 2976 int r; 2977 struct drm_amdgpu_info_hw_ip info; 2978 uint32_t ring_id, version; 2979 2980 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 2981 CU_ASSERT_EQUAL(r, 0); 2982 if (!info.available_rings) 2983 printf("SKIP ... as there's no graphics ring\n"); 2984 2985 version = info.hw_ip_version_major; 2986 if (version != 9 && version != 10) { 2987 printf("SKIP ... unsupported gfx version %d\n", version); 2988 return; 2989 } 2990 2991 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 2992 amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version); 2993 amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version, 0); 2994 } 2995} 2996 2997void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 2998{ 2999 int r; 3000 struct drm_amdgpu_info_hw_ip info; 3001 uint32_t ring_id, version; 3002 3003 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 3004 CU_ASSERT_EQUAL(r, 0); 3005 if (!info.available_rings) 3006 printf("SKIP ... as there's no ring for ip %d\n", ip_type); 3007 3008 version = info.hw_ip_version_major; 3009 if (version != 9 && version != 10) { 3010 printf("SKIP ... unsupported gfx version %d\n", version); 3011 return; 3012 } 3013 3014 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 3015 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); 3016 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 1); 3017 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); 3018 } 3019} 3020 3021static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle, 3022 uint32_t ip_type, uint32_t ring, int version) 3023{ 3024 amdgpu_context_handle context_handle; 3025 amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4]; 3026 volatile unsigned char *ptr_dst; 3027 void *ptr_shader; 3028 unsigned char *ptr_src; 3029 uint32_t *ptr_cmd; 3030 uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd; 3031 amdgpu_va_handle va_src, va_dst, va_shader, va_cmd; 3032 int i, r; 3033 int bo_dst_size = 0x4000000; 3034 int bo_shader_size = 0x400000; 3035 int bo_cmd_size = 4096; 3036 struct amdgpu_cs_request ibs_request = {0}; 3037 struct amdgpu_cs_ib_info ib_info= {0}; 3038 uint32_t hang_state, hangs, expired; 3039 struct amdgpu_gpu_info gpu_info = {0}; 3040 amdgpu_bo_list_handle bo_list; 3041 struct amdgpu_cs_fence fence_status = {0}; 3042 3043 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 3044 CU_ASSERT_EQUAL(r, 0); 3045 3046 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3047 CU_ASSERT_EQUAL(r, 0); 3048 3049 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3050 AMDGPU_GEM_DOMAIN_GTT, 0, 3051 &bo_cmd, (void **)&ptr_cmd, 3052 &mc_address_cmd, &va_cmd); 3053 CU_ASSERT_EQUAL(r, 0); 3054 memset(ptr_cmd, 0, bo_cmd_size); 3055 3056 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3057 AMDGPU_GEM_DOMAIN_VRAM, 0, 3058 &bo_shader, &ptr_shader, 3059 &mc_address_shader, &va_shader); 3060 CU_ASSERT_EQUAL(r, 0); 3061 memset(ptr_shader, 0, bo_shader_size); 3062 3063 r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id); 3064 CU_ASSERT_EQUAL(r, 0); 3065 3066 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 3067 AMDGPU_GEM_DOMAIN_VRAM, 0, 3068 &bo_src, (void **)&ptr_src, 3069 &mc_address_src, &va_src); 3070 CU_ASSERT_EQUAL(r, 0); 3071 3072 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 3073 AMDGPU_GEM_DOMAIN_VRAM, 0, 3074 &bo_dst, (void **)&ptr_dst, 3075 &mc_address_dst, &va_dst); 3076 CU_ASSERT_EQUAL(r, 0); 3077 3078 memset(ptr_src, 0x55, bo_dst_size); 3079 3080 i = 0; 3081 i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version); 3082 3083 /* Issue commands to set cu mask used in current dispatch */ 3084 i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version); 3085 3086 /* Writes shader state to HW */ 3087 i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version); 3088 3089 /* Write constant data */ 3090 /* Writes the texture resource constants data to the SGPRs */ 3091 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 3092 ptr_cmd[i++] = 0x240; 3093 ptr_cmd[i++] = mc_address_src; 3094 ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000; 3095 ptr_cmd[i++] = 0x400000; 3096 if (version == 9) 3097 ptr_cmd[i++] = 0x74fac; 3098 else if (version == 10) 3099 ptr_cmd[i++] = 0x1104bfac; 3100 3101 /* Writes the UAV constant data to the SGPRs. */ 3102 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4); 3103 ptr_cmd[i++] = 0x244; 3104 ptr_cmd[i++] = mc_address_dst; 3105 ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000; 3106 ptr_cmd[i++] = 0x400000; 3107 if (version == 9) 3108 ptr_cmd[i++] = 0x74fac; 3109 else if (version == 10) 3110 ptr_cmd[i++] = 0x1104bfac; 3111 3112 /* clear mmCOMPUTE_RESOURCE_LIMITS */ 3113 ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1); 3114 ptr_cmd[i++] = 0x215; 3115 ptr_cmd[i++] = 0; 3116 3117 /* dispatch direct command */ 3118 ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3); 3119 ptr_cmd[i++] = 0x10000; 3120 ptr_cmd[i++] = 1; 3121 ptr_cmd[i++] = 1; 3122 ptr_cmd[i++] = 1; 3123 3124 while (i & 7) 3125 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3126 3127 resources[0] = bo_shader; 3128 resources[1] = bo_src; 3129 resources[2] = bo_dst; 3130 resources[3] = bo_cmd; 3131 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 3132 CU_ASSERT_EQUAL(r, 0); 3133 3134 ib_info.ib_mc_address = mc_address_cmd; 3135 ib_info.size = i; 3136 ibs_request.ip_type = ip_type; 3137 ibs_request.ring = ring; 3138 ibs_request.resources = bo_list; 3139 ibs_request.number_of_ibs = 1; 3140 ibs_request.ibs = &ib_info; 3141 ibs_request.fence_info.handle = NULL; 3142 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3143 CU_ASSERT_EQUAL(r, 0); 3144 3145 fence_status.ip_type = ip_type; 3146 fence_status.ip_instance = 0; 3147 fence_status.ring = ring; 3148 fence_status.context = context_handle; 3149 fence_status.fence = ibs_request.seq_no; 3150 3151 /* wait for IB accomplished */ 3152 r = amdgpu_cs_query_fence_status(&fence_status, 3153 AMDGPU_TIMEOUT_INFINITE, 3154 0, &expired); 3155 3156 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 3157 CU_ASSERT_EQUAL(r, 0); 3158 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 3159 3160 r = amdgpu_bo_list_destroy(bo_list); 3161 CU_ASSERT_EQUAL(r, 0); 3162 3163 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size); 3164 CU_ASSERT_EQUAL(r, 0); 3165 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 3166 CU_ASSERT_EQUAL(r, 0); 3167 3168 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3169 CU_ASSERT_EQUAL(r, 0); 3170 3171 r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size); 3172 CU_ASSERT_EQUAL(r, 0); 3173 3174 r = amdgpu_cs_ctx_free(context_handle); 3175 CU_ASSERT_EQUAL(r, 0); 3176} 3177 3178void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type) 3179{ 3180 int r; 3181 struct drm_amdgpu_info_hw_ip info; 3182 uint32_t ring_id, version; 3183 3184 r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info); 3185 CU_ASSERT_EQUAL(r, 0); 3186 if (!info.available_rings) 3187 printf("SKIP ... as there's no ring for ip %d\n", ip_type); 3188 3189 version = info.hw_ip_version_major; 3190 if (version != 9 && version != 10) { 3191 printf("SKIP ... unsupported gfx version %d\n", version); 3192 return; 3193 } 3194 3195 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 3196 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); 3197 amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id, version); 3198 amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0); 3199 } 3200} 3201 3202static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family) 3203{ 3204 struct amdgpu_test_shader *shader; 3205 int i, loop = 0x40000; 3206 3207 switch (family) { 3208 case AMDGPU_FAMILY_AI: 3209 case AMDGPU_FAMILY_RV: 3210 shader = &memcpy_ps_hang_slow_ai; 3211 break; 3212 default: 3213 return -1; 3214 break; 3215 } 3216 3217 memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t)); 3218 3219 for (i = 0; i < loop; i++) 3220 memcpy(ptr + shader->header_length + shader->body_length * i, 3221 shader->shader + shader->header_length, 3222 shader->body_length * sizeof(uint32_t)); 3223 3224 memcpy(ptr + shader->header_length + shader->body_length * loop, 3225 shader->shader + shader->header_length + shader->body_length, 3226 shader->foot_length * sizeof(uint32_t)); 3227 3228 return 0; 3229} 3230 3231static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type, uint32_t version) 3232{ 3233 int i; 3234 uint32_t shader_offset= 256; 3235 uint32_t mem_offset, patch_code_offset; 3236 uint32_t shader_size, patchinfo_code_size; 3237 const uint32_t *shader; 3238 const uint32_t *patchinfo_code; 3239 const uint32_t *patchcode_offset; 3240 3241 switch (ps_type) { 3242 case PS_CONST: 3243 if (version == 9) { 3244 shader = ps_const_shader_gfx9; 3245 shader_size = sizeof(ps_const_shader_gfx9); 3246 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9; 3247 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9; 3248 patchcode_offset = ps_const_shader_patchinfo_offset_gfx9; 3249 } else if (version == 10){ 3250 shader = ps_const_shader_gfx10; 3251 shader_size = sizeof(ps_const_shader_gfx10); 3252 patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx10; 3253 patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx10; 3254 patchcode_offset = ps_const_shader_patchinfo_offset_gfx10; 3255 } 3256 break; 3257 case PS_TEX: 3258 if (version == 9) { 3259 shader = ps_tex_shader_gfx9; 3260 shader_size = sizeof(ps_tex_shader_gfx9); 3261 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9; 3262 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9; 3263 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9; 3264 } else if (version == 10) { 3265 shader = ps_tex_shader_gfx10; 3266 shader_size = sizeof(ps_tex_shader_gfx10); 3267 patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx10; 3268 patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx10; 3269 patchcode_offset = ps_tex_shader_patchinfo_offset_gfx10; 3270 } 3271 break; 3272 case PS_HANG: 3273 shader = memcpy_ps_hang; 3274 shader_size = sizeof(memcpy_ps_hang); 3275 3276 memcpy(ptr, shader, shader_size); 3277 return 0; 3278 default: 3279 return -1; 3280 break; 3281 } 3282 3283 /* write main shader program */ 3284 for (i = 0 ; i < 10; i++) { 3285 mem_offset = i * shader_offset; 3286 memcpy(ptr + mem_offset, shader, shader_size); 3287 } 3288 3289 /* overwrite patch codes */ 3290 for (i = 0 ; i < 10; i++) { 3291 mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t); 3292 patch_code_offset = i * patchinfo_code_size; 3293 memcpy(ptr + mem_offset, 3294 patchinfo_code + patch_code_offset, 3295 patchinfo_code_size * sizeof(uint32_t)); 3296 } 3297 3298 return 0; 3299} 3300 3301/* load RectPosTexFast_VS */ 3302static int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version) 3303{ 3304 const uint32_t *shader; 3305 uint32_t shader_size; 3306 3307 if (version == 9) { 3308 shader = vs_RectPosTexFast_shader_gfx9; 3309 shader_size = sizeof(vs_RectPosTexFast_shader_gfx9); 3310 } else if (version == 10) { 3311 shader = vs_RectPosTexFast_shader_gfx10; 3312 shader_size = sizeof(vs_RectPosTexFast_shader_gfx10); 3313 } 3314 3315 memcpy(ptr, shader, shader_size); 3316 3317 return 0; 3318} 3319 3320static int amdgpu_draw_init(uint32_t *ptr, uint32_t version) 3321{ 3322 int i = 0; 3323 const uint32_t *preamblecache_ptr; 3324 uint32_t preamblecache_size; 3325 3326 /* Write context control and load shadowing register if necessary */ 3327 ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1); 3328 ptr[i++] = 0x80000000; 3329 ptr[i++] = 0x80000000; 3330 3331 if (version == 9) { 3332 preamblecache_ptr = preamblecache_gfx9; 3333 preamblecache_size = sizeof(preamblecache_gfx9); 3334 } else if (version == 10) { 3335 preamblecache_ptr = preamblecache_gfx10; 3336 preamblecache_size = sizeof(preamblecache_gfx10); 3337 } 3338 3339 memcpy(ptr + i, preamblecache_ptr, preamblecache_size); 3340 return i + preamblecache_size/sizeof(uint32_t); 3341} 3342 3343static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr, 3344 uint64_t dst_addr, 3345 uint32_t version, 3346 int hang_slow) 3347{ 3348 int i = 0; 3349 3350 /* setup color buffer */ 3351 if (version == 9) { 3352 /* offset reg 3353 0xA318 CB_COLOR0_BASE 3354 0xA319 CB_COLOR0_BASE_EXT 3355 0xA31A CB_COLOR0_ATTRIB2 3356 0xA31B CB_COLOR0_VIEW 3357 0xA31C CB_COLOR0_INFO 3358 0xA31D CB_COLOR0_ATTRIB 3359 0xA31E CB_COLOR0_DCC_CONTROL 3360 0xA31F CB_COLOR0_CMASK 3361 0xA320 CB_COLOR0_CMASK_BASE_EXT 3362 0xA321 CB_COLOR0_FMASK 3363 0xA322 CB_COLOR0_FMASK_BASE_EXT 3364 0xA323 CB_COLOR0_CLEAR_WORD0 3365 0xA324 CB_COLOR0_CLEAR_WORD1 3366 0xA325 CB_COLOR0_DCC_BASE 3367 0xA326 CB_COLOR0_DCC_BASE_EXT */ 3368 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15); 3369 ptr[i++] = 0x318; 3370 ptr[i++] = dst_addr >> 8; 3371 ptr[i++] = dst_addr >> 40; 3372 ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f; 3373 ptr[i++] = 0; 3374 ptr[i++] = 0x50438; 3375 ptr[i++] = 0x10140000; 3376 i += 9; 3377 3378 /* mmCB_MRT0_EPITCH */ 3379 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3380 ptr[i++] = 0x1e8; 3381 ptr[i++] = hang_slow ? 0xfff : 0x1f; 3382 } else if (version == 10) { 3383 /* 0xA318 CB_COLOR0_BASE 3384 0xA319 CB_COLOR0_PITCH 3385 0xA31A CB_COLOR0_SLICE 3386 0xA31B CB_COLOR0_VIEW 3387 0xA31C CB_COLOR0_INFO 3388 0xA31D CB_COLOR0_ATTRIB 3389 0xA31E CB_COLOR0_DCC_CONTROL 3390 0xA31F CB_COLOR0_CMASK 3391 0xA320 CB_COLOR0_CMASK_SLICE 3392 0xA321 CB_COLOR0_FMASK 3393 0xA322 CB_COLOR0_FMASK_SLICE 3394 0xA323 CB_COLOR0_CLEAR_WORD0 3395 0xA324 CB_COLOR0_CLEAR_WORD1 3396 0xA325 CB_COLOR0_DCC_BASE */ 3397 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14); 3398 ptr[i++] = 0x318; 3399 ptr[i++] = dst_addr >> 8; 3400 i += 3; 3401 ptr[i++] = 0x50438; 3402 i += 9; 3403 3404 /* 0xA390 CB_COLOR0_BASE_EXT */ 3405 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3406 ptr[i++] = 0x390; 3407 ptr[i++] = dst_addr >> 40; 3408 3409 /* 0xA398 CB_COLOR0_CMASK_BASE_EXT */ 3410 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3411 ptr[i++] = 0x398; 3412 ptr[i++] = 0; 3413 3414 /* 0xA3A0 CB_COLOR0_FMASK_BASE_EXT */ 3415 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3416 ptr[i++] = 0x3a0; 3417 ptr[i++] = 0; 3418 3419 /* 0xA3A8 CB_COLOR0_DCC_BASE_EXT */ 3420 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3421 ptr[i++] = 0x3a8; 3422 ptr[i++] = 0; 3423 3424 /* 0xA3B0 CB_COLOR0_ATTRIB2 */ 3425 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3426 ptr[i++] = 0x3b0; 3427 ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f; 3428 3429 /* 0xA3B8 CB_COLOR0_ATTRIB3 */ 3430 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3431 ptr[i++] = 0x3b8; 3432 ptr[i++] = 0x9014000; 3433 } 3434 3435 /* 0xA32B CB_COLOR1_BASE */ 3436 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3437 ptr[i++] = 0x32b; 3438 ptr[i++] = 0; 3439 3440 /* 0xA33A CB_COLOR1_BASE */ 3441 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3442 ptr[i++] = 0x33a; 3443 ptr[i++] = 0; 3444 3445 /* SPI_SHADER_COL_FORMAT */ 3446 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3447 ptr[i++] = 0x1c5; 3448 ptr[i++] = 9; 3449 3450 /* Setup depth buffer */ 3451 if (version == 9) { 3452 /* mmDB_Z_INFO */ 3453 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 3454 ptr[i++] = 0xe; 3455 i += 2; 3456 } else if (version == 10) { 3457 /* mmDB_Z_INFO */ 3458 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 3459 ptr[i++] = 0x10; 3460 i += 2; 3461 } 3462 3463 return i; 3464} 3465 3466static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr, 3467 uint32_t version, 3468 int hang_slow) 3469{ 3470 int i = 0; 3471 const uint32_t *cached_cmd_ptr; 3472 uint32_t cached_cmd_size; 3473 3474 /* mmPA_SC_TILE_STEERING_OVERRIDE */ 3475 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3476 ptr[i++] = 0xd7; 3477 ptr[i++] = 0; 3478 3479 ptr[i++] = 0xffff1000; 3480 ptr[i++] = 0xc0021000; 3481 3482 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3483 ptr[i++] = 0xd7; 3484 if (version == 9) 3485 ptr[i++] = 1; 3486 else if (version == 10) 3487 ptr[i++] = 0; 3488 3489 /* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */ 3490 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16); 3491 ptr[i++] = 0x2fe; 3492 i += 16; 3493 3494 /* mmPA_SC_CENTROID_PRIORITY_0 */ 3495 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2); 3496 ptr[i++] = 0x2f5; 3497 i += 2; 3498 3499 if (version == 9) { 3500 cached_cmd_ptr = cached_cmd_gfx9; 3501 cached_cmd_size = sizeof(cached_cmd_gfx9); 3502 } else if (version == 10) { 3503 cached_cmd_ptr = cached_cmd_gfx10; 3504 cached_cmd_size = sizeof(cached_cmd_gfx10); 3505 } 3506 3507 memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size); 3508 if (hang_slow) 3509 *(ptr + i + 12) = 0x8000800; 3510 i += cached_cmd_size/sizeof(uint32_t); 3511 3512 if (version == 10) { 3513 /* mmCB_RMI_GL2_CACHE_CONTROL */ 3514 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3515 ptr[i++] = 0x104; 3516 ptr[i++] = 0x40aa0055; 3517 /* mmDB_RMI_L2_CACHE_CONTROL */ 3518 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3519 ptr[i++] = 0x1f; 3520 ptr[i++] = 0x2a0055; 3521 } 3522 3523 return i; 3524} 3525 3526static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr, 3527 int ps_type, 3528 uint64_t shader_addr, 3529 uint32_t version, 3530 int hang_slow) 3531{ 3532 int i = 0; 3533 3534 /* mmPA_CL_VS_OUT_CNTL */ 3535 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3536 ptr[i++] = 0x207; 3537 ptr[i++] = 0; 3538 3539 if (version == 9) { 3540 /* mmSPI_SHADER_PGM_RSRC3_VS */ 3541 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3542 ptr[i++] = 0x46; 3543 ptr[i++] = 0xffff; 3544 } else if (version == 10) { 3545 /* mmSPI_SHADER_PGM_RSRC3_VS */ 3546 ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); 3547 ptr[i++] = 0x30000046; 3548 ptr[i++] = 0xffff; 3549 /* mmSPI_SHADER_PGM_RSRC4_VS */ 3550 ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); 3551 ptr[i++] = 0x30000041; 3552 ptr[i++] = 0xffff; 3553 } 3554 3555 /* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */ 3556 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 3557 ptr[i++] = 0x48; 3558 ptr[i++] = shader_addr >> 8; 3559 ptr[i++] = shader_addr >> 40; 3560 3561 /* mmSPI_SHADER_PGM_RSRC1_VS */ 3562 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3563 ptr[i++] = 0x4a; 3564 if (version == 9) 3565 ptr[i++] = 0xc0081; 3566 else if (version == 10) 3567 ptr[i++] = 0xc0041; 3568 /* mmSPI_SHADER_PGM_RSRC2_VS */ 3569 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3570 ptr[i++] = 0x4b; 3571 ptr[i++] = 0x18; 3572 3573 /* mmSPI_VS_OUT_CONFIG */ 3574 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3575 ptr[i++] = 0x1b1; 3576 ptr[i++] = 2; 3577 3578 /* mmSPI_SHADER_POS_FORMAT */ 3579 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3580 ptr[i++] = 0x1c3; 3581 ptr[i++] = 4; 3582 3583 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3584 ptr[i++] = 0x4c; 3585 i += 2; 3586 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 3587 ptr[i++] = hang_slow ? 0x45000000 : 0x42000000; 3588 3589 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3590 ptr[i++] = 0x50; 3591 i += 2; 3592 if (ps_type == PS_CONST) { 3593 i += 2; 3594 } else if (ps_type == PS_TEX) { 3595 ptr[i++] = 0x3f800000; 3596 ptr[i++] = 0x3f800000; 3597 } 3598 3599 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3600 ptr[i++] = 0x54; 3601 i += 4; 3602 3603 return i; 3604} 3605 3606static int amdgpu_draw_ps_write2hw(uint32_t *ptr, 3607 int ps_type, 3608 uint64_t shader_addr, 3609 uint32_t version) 3610{ 3611 int i, j; 3612 const uint32_t *sh_registers; 3613 const uint32_t *context_registers; 3614 uint32_t num_sh_reg, num_context_reg; 3615 3616 if (ps_type == PS_CONST) { 3617 if (version == 9) { 3618 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9; 3619 num_sh_reg = ps_num_sh_registers_gfx9; 3620 } else if (version == 10) { 3621 sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10; 3622 num_sh_reg = ps_num_sh_registers_gfx10; 3623 } 3624 context_registers = (const uint32_t *)ps_const_context_reg_gfx9; 3625 num_context_reg = ps_num_context_registers_gfx9; 3626 } else if (ps_type == PS_TEX) { 3627 sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9; 3628 context_registers = (const uint32_t *)ps_tex_context_reg_gfx9; 3629 num_sh_reg = ps_num_sh_registers_gfx9; 3630 num_context_reg = ps_num_context_registers_gfx9; 3631 } 3632 3633 i = 0; 3634 3635 if (version == 9) { 3636 /* 0x2c07 SPI_SHADER_PGM_RSRC3_PS 3637 0x2c08 SPI_SHADER_PGM_LO_PS 3638 0x2c09 SPI_SHADER_PGM_HI_PS */ 3639 /* multiplicator 9 is from SPI_SHADER_COL_FORMAT */ 3640 shader_addr += 256 * 9; 3641 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3); 3642 ptr[i++] = 0x7; 3643 ptr[i++] = 0xffff; 3644 ptr[i++] = shader_addr >> 8; 3645 ptr[i++] = shader_addr >> 40; 3646 } else if (version == 10) { 3647 shader_addr += 256 * 9; 3648 /* 0x2c08 SPI_SHADER_PGM_LO_PS 3649 0x2c09 SPI_SHADER_PGM_HI_PS */ 3650 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2); 3651 ptr[i++] = 0x8; 3652 ptr[i++] = shader_addr >> 8; 3653 ptr[i++] = shader_addr >> 40; 3654 3655 /* mmSPI_SHADER_PGM_RSRC3_PS */ 3656 ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); 3657 ptr[i++] = 0x30000007; 3658 ptr[i++] = 0xffff; 3659 /* mmSPI_SHADER_PGM_RSRC4_PS */ 3660 ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1); 3661 ptr[i++] = 0x30000001; 3662 ptr[i++] = 0xffff; 3663 } 3664 3665 for (j = 0; j < num_sh_reg; j++) { 3666 ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1); 3667 ptr[i++] = sh_registers[j * 2] - 0x2c00; 3668 ptr[i++] = sh_registers[j * 2 + 1]; 3669 } 3670 3671 for (j = 0; j < num_context_reg; j++) { 3672 if (context_registers[j * 2] != 0xA1C5) { 3673 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3674 ptr[i++] = context_registers[j * 2] - 0xa000; 3675 ptr[i++] = context_registers[j * 2 + 1]; 3676 } 3677 3678 if (context_registers[j * 2] == 0xA1B4) { 3679 ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3680 ptr[i++] = 0x1b3; 3681 ptr[i++] = 2; 3682 } 3683 } 3684 3685 return i; 3686} 3687 3688static int amdgpu_draw_draw(uint32_t *ptr, uint32_t version) 3689{ 3690 int i = 0; 3691 3692 if (version == 9) { 3693 /* mmIA_MULTI_VGT_PARAM */ 3694 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3695 ptr[i++] = 0x40000258; 3696 ptr[i++] = 0xd00ff; 3697 /* mmVGT_PRIMITIVE_TYPE */ 3698 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3699 ptr[i++] = 0x10000242; 3700 ptr[i++] = 0x11; 3701 } else if (version == 10) { 3702 /* mmGE_CNTL */ 3703 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3704 ptr[i++] = 0x25b; 3705 ptr[i++] = 0xff; 3706 /* mmVGT_PRIMITIVE_TYPE */ 3707 ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); 3708 ptr[i++] = 0x242; 3709 ptr[i++] = 0x11; 3710 } 3711 3712 ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1); 3713 ptr[i++] = 3; 3714 ptr[i++] = 2; 3715 3716 return i; 3717} 3718 3719void amdgpu_memset_draw(amdgpu_device_handle device_handle, 3720 amdgpu_bo_handle bo_shader_ps, 3721 amdgpu_bo_handle bo_shader_vs, 3722 uint64_t mc_address_shader_ps, 3723 uint64_t mc_address_shader_vs, 3724 uint32_t ring_id, uint32_t version) 3725{ 3726 amdgpu_context_handle context_handle; 3727 amdgpu_bo_handle bo_dst, bo_cmd, resources[4]; 3728 volatile unsigned char *ptr_dst; 3729 uint32_t *ptr_cmd; 3730 uint64_t mc_address_dst, mc_address_cmd; 3731 amdgpu_va_handle va_dst, va_cmd; 3732 int i, r; 3733 int bo_dst_size = 16384; 3734 int bo_cmd_size = 4096; 3735 struct amdgpu_cs_request ibs_request = {0}; 3736 struct amdgpu_cs_ib_info ib_info = {0}; 3737 struct amdgpu_cs_fence fence_status = {0}; 3738 uint32_t expired; 3739 amdgpu_bo_list_handle bo_list; 3740 3741 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3742 CU_ASSERT_EQUAL(r, 0); 3743 3744 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3745 AMDGPU_GEM_DOMAIN_GTT, 0, 3746 &bo_cmd, (void **)&ptr_cmd, 3747 &mc_address_cmd, &va_cmd); 3748 CU_ASSERT_EQUAL(r, 0); 3749 memset(ptr_cmd, 0, bo_cmd_size); 3750 3751 r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096, 3752 AMDGPU_GEM_DOMAIN_VRAM, 0, 3753 &bo_dst, (void **)&ptr_dst, 3754 &mc_address_dst, &va_dst); 3755 CU_ASSERT_EQUAL(r, 0); 3756 3757 i = 0; 3758 i += amdgpu_draw_init(ptr_cmd + i, version); 3759 3760 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0); 3761 3762 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0); 3763 3764 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs, 3765 version, 0); 3766 3767 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps, version); 3768 3769 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3770 ptr_cmd[i++] = 0xc; 3771 ptr_cmd[i++] = 0x33333333; 3772 ptr_cmd[i++] = 0x33333333; 3773 ptr_cmd[i++] = 0x33333333; 3774 ptr_cmd[i++] = 0x33333333; 3775 3776 i += amdgpu_draw_draw(ptr_cmd + i, version); 3777 3778 while (i & 7) 3779 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3780 3781 resources[0] = bo_dst; 3782 resources[1] = bo_shader_ps; 3783 resources[2] = bo_shader_vs; 3784 resources[3] = bo_cmd; 3785 r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list); 3786 CU_ASSERT_EQUAL(r, 0); 3787 3788 ib_info.ib_mc_address = mc_address_cmd; 3789 ib_info.size = i; 3790 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3791 ibs_request.ring = ring_id; 3792 ibs_request.resources = bo_list; 3793 ibs_request.number_of_ibs = 1; 3794 ibs_request.ibs = &ib_info; 3795 ibs_request.fence_info.handle = NULL; 3796 3797 /* submit CS */ 3798 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3799 CU_ASSERT_EQUAL(r, 0); 3800 3801 r = amdgpu_bo_list_destroy(bo_list); 3802 CU_ASSERT_EQUAL(r, 0); 3803 3804 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3805 fence_status.ip_instance = 0; 3806 fence_status.ring = ring_id; 3807 fence_status.context = context_handle; 3808 fence_status.fence = ibs_request.seq_no; 3809 3810 /* wait for IB accomplished */ 3811 r = amdgpu_cs_query_fence_status(&fence_status, 3812 AMDGPU_TIMEOUT_INFINITE, 3813 0, &expired); 3814 CU_ASSERT_EQUAL(r, 0); 3815 CU_ASSERT_EQUAL(expired, true); 3816 3817 /* verify if memset test result meets with expected */ 3818 i = 0; 3819 while(i < bo_dst_size) { 3820 CU_ASSERT_EQUAL(ptr_dst[i++], 0x33); 3821 } 3822 3823 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size); 3824 CU_ASSERT_EQUAL(r, 0); 3825 3826 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 3827 CU_ASSERT_EQUAL(r, 0); 3828 3829 r = amdgpu_cs_ctx_free(context_handle); 3830 CU_ASSERT_EQUAL(r, 0); 3831} 3832 3833static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle, 3834 uint32_t ring, int version) 3835{ 3836 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 3837 void *ptr_shader_ps; 3838 void *ptr_shader_vs; 3839 uint64_t mc_address_shader_ps, mc_address_shader_vs; 3840 amdgpu_va_handle va_shader_ps, va_shader_vs; 3841 int r; 3842 int bo_shader_size = 4096; 3843 3844 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3845 AMDGPU_GEM_DOMAIN_VRAM, 0, 3846 &bo_shader_ps, &ptr_shader_ps, 3847 &mc_address_shader_ps, &va_shader_ps); 3848 CU_ASSERT_EQUAL(r, 0); 3849 memset(ptr_shader_ps, 0, bo_shader_size); 3850 3851 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 3852 AMDGPU_GEM_DOMAIN_VRAM, 0, 3853 &bo_shader_vs, &ptr_shader_vs, 3854 &mc_address_shader_vs, &va_shader_vs); 3855 CU_ASSERT_EQUAL(r, 0); 3856 memset(ptr_shader_vs, 0, bo_shader_size); 3857 3858 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST, version); 3859 CU_ASSERT_EQUAL(r, 0); 3860 3861 r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); 3862 CU_ASSERT_EQUAL(r, 0); 3863 3864 amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs, 3865 mc_address_shader_ps, mc_address_shader_vs, 3866 ring, version); 3867 3868 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 3869 CU_ASSERT_EQUAL(r, 0); 3870 3871 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 3872 CU_ASSERT_EQUAL(r, 0); 3873} 3874 3875static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle, 3876 amdgpu_bo_handle bo_shader_ps, 3877 amdgpu_bo_handle bo_shader_vs, 3878 uint64_t mc_address_shader_ps, 3879 uint64_t mc_address_shader_vs, 3880 uint32_t ring, int version, int hang) 3881{ 3882 amdgpu_context_handle context_handle; 3883 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 3884 volatile unsigned char *ptr_dst; 3885 unsigned char *ptr_src; 3886 uint32_t *ptr_cmd; 3887 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 3888 amdgpu_va_handle va_dst, va_src, va_cmd; 3889 int i, r; 3890 int bo_size = 16384; 3891 int bo_cmd_size = 4096; 3892 struct amdgpu_cs_request ibs_request = {0}; 3893 struct amdgpu_cs_ib_info ib_info= {0}; 3894 uint32_t hang_state, hangs; 3895 uint32_t expired; 3896 amdgpu_bo_list_handle bo_list; 3897 struct amdgpu_cs_fence fence_status = {0}; 3898 3899 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 3900 CU_ASSERT_EQUAL(r, 0); 3901 3902 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 3903 AMDGPU_GEM_DOMAIN_GTT, 0, 3904 &bo_cmd, (void **)&ptr_cmd, 3905 &mc_address_cmd, &va_cmd); 3906 CU_ASSERT_EQUAL(r, 0); 3907 memset(ptr_cmd, 0, bo_cmd_size); 3908 3909 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3910 AMDGPU_GEM_DOMAIN_VRAM, 0, 3911 &bo_src, (void **)&ptr_src, 3912 &mc_address_src, &va_src); 3913 CU_ASSERT_EQUAL(r, 0); 3914 3915 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 3916 AMDGPU_GEM_DOMAIN_VRAM, 0, 3917 &bo_dst, (void **)&ptr_dst, 3918 &mc_address_dst, &va_dst); 3919 CU_ASSERT_EQUAL(r, 0); 3920 3921 memset(ptr_src, 0x55, bo_size); 3922 3923 i = 0; 3924 i += amdgpu_draw_init(ptr_cmd + i, version); 3925 3926 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0); 3927 3928 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0); 3929 3930 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs, 3931 version, 0); 3932 3933 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version); 3934 3935 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 3936 if (version == 9) { 3937 ptr_cmd[i++] = 0xc; 3938 ptr_cmd[i++] = mc_address_src >> 8; 3939 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 3940 ptr_cmd[i++] = 0x7c01f; 3941 ptr_cmd[i++] = 0x90500fac; 3942 ptr_cmd[i++] = 0x3e000; 3943 i += 3; 3944 } else if (version == 10) { 3945 ptr_cmd[i++] = 0xc; 3946 ptr_cmd[i++] = mc_address_src >> 8; 3947 ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000; 3948 ptr_cmd[i++] = 0x8007c007; 3949 ptr_cmd[i++] = 0x90500fac; 3950 i += 2; 3951 ptr_cmd[i++] = 0x400; 3952 i++; 3953 } 3954 3955 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 3956 ptr_cmd[i++] = 0x14; 3957 ptr_cmd[i++] = 0x92; 3958 i += 3; 3959 3960 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 3961 ptr_cmd[i++] = 0x191; 3962 ptr_cmd[i++] = 0; 3963 3964 i += amdgpu_draw_draw(ptr_cmd + i, version); 3965 3966 while (i & 7) 3967 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 3968 3969 resources[0] = bo_dst; 3970 resources[1] = bo_src; 3971 resources[2] = bo_shader_ps; 3972 resources[3] = bo_shader_vs; 3973 resources[4] = bo_cmd; 3974 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 3975 CU_ASSERT_EQUAL(r, 0); 3976 3977 ib_info.ib_mc_address = mc_address_cmd; 3978 ib_info.size = i; 3979 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 3980 ibs_request.ring = ring; 3981 ibs_request.resources = bo_list; 3982 ibs_request.number_of_ibs = 1; 3983 ibs_request.ibs = &ib_info; 3984 ibs_request.fence_info.handle = NULL; 3985 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 3986 CU_ASSERT_EQUAL(r, 0); 3987 3988 fence_status.ip_type = AMDGPU_HW_IP_GFX; 3989 fence_status.ip_instance = 0; 3990 fence_status.ring = ring; 3991 fence_status.context = context_handle; 3992 fence_status.fence = ibs_request.seq_no; 3993 3994 /* wait for IB accomplished */ 3995 r = amdgpu_cs_query_fence_status(&fence_status, 3996 AMDGPU_TIMEOUT_INFINITE, 3997 0, &expired); 3998 if (!hang) { 3999 CU_ASSERT_EQUAL(r, 0); 4000 CU_ASSERT_EQUAL(expired, true); 4001 4002 /* verify if memcpy test result meets with expected */ 4003 i = 0; 4004 while(i < bo_size) { 4005 CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]); 4006 i++; 4007 } 4008 } else { 4009 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 4010 CU_ASSERT_EQUAL(r, 0); 4011 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 4012 } 4013 4014 r = amdgpu_bo_list_destroy(bo_list); 4015 CU_ASSERT_EQUAL(r, 0); 4016 4017 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 4018 CU_ASSERT_EQUAL(r, 0); 4019 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 4020 CU_ASSERT_EQUAL(r, 0); 4021 4022 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 4023 CU_ASSERT_EQUAL(r, 0); 4024 4025 r = amdgpu_cs_ctx_free(context_handle); 4026 CU_ASSERT_EQUAL(r, 0); 4027} 4028 4029void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring, 4030 int version, int hang) 4031{ 4032 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 4033 void *ptr_shader_ps; 4034 void *ptr_shader_vs; 4035 uint64_t mc_address_shader_ps, mc_address_shader_vs; 4036 amdgpu_va_handle va_shader_ps, va_shader_vs; 4037 int bo_shader_size = 4096; 4038 enum ps_type ps_type = hang ? PS_HANG : PS_TEX; 4039 int r; 4040 4041 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 4042 AMDGPU_GEM_DOMAIN_VRAM, 0, 4043 &bo_shader_ps, &ptr_shader_ps, 4044 &mc_address_shader_ps, &va_shader_ps); 4045 CU_ASSERT_EQUAL(r, 0); 4046 memset(ptr_shader_ps, 0, bo_shader_size); 4047 4048 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096, 4049 AMDGPU_GEM_DOMAIN_VRAM, 0, 4050 &bo_shader_vs, &ptr_shader_vs, 4051 &mc_address_shader_vs, &va_shader_vs); 4052 CU_ASSERT_EQUAL(r, 0); 4053 memset(ptr_shader_vs, 0, bo_shader_size); 4054 4055 r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type, version); 4056 CU_ASSERT_EQUAL(r, 0); 4057 4058 r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); 4059 CU_ASSERT_EQUAL(r, 0); 4060 4061 amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs, 4062 mc_address_shader_ps, mc_address_shader_vs, 4063 ring, version, hang); 4064 4065 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size); 4066 CU_ASSERT_EQUAL(r, 0); 4067 4068 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size); 4069 CU_ASSERT_EQUAL(r, 0); 4070} 4071 4072static void amdgpu_draw_test(void) 4073{ 4074 int r; 4075 struct drm_amdgpu_info_hw_ip info; 4076 uint32_t ring_id, version; 4077 4078 r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info); 4079 CU_ASSERT_EQUAL(r, 0); 4080 if (!info.available_rings) 4081 printf("SKIP ... as there's no graphics ring\n"); 4082 4083 version = info.hw_ip_version_major; 4084 if (version != 9 && version != 10) { 4085 printf("SKIP ... unsupported gfx version %d\n", version); 4086 return; 4087 } 4088 4089 for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) { 4090 amdgpu_memset_draw_test(device_handle, ring_id, version); 4091 amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0); 4092 } 4093} 4094 4095void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version) 4096{ 4097 amdgpu_context_handle context_handle; 4098 amdgpu_bo_handle bo_shader_ps, bo_shader_vs; 4099 amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5]; 4100 void *ptr_shader_ps; 4101 void *ptr_shader_vs; 4102 volatile unsigned char *ptr_dst; 4103 unsigned char *ptr_src; 4104 uint32_t *ptr_cmd; 4105 uint64_t mc_address_dst, mc_address_src, mc_address_cmd; 4106 uint64_t mc_address_shader_ps, mc_address_shader_vs; 4107 amdgpu_va_handle va_shader_ps, va_shader_vs; 4108 amdgpu_va_handle va_dst, va_src, va_cmd; 4109 struct amdgpu_gpu_info gpu_info = {0}; 4110 int i, r; 4111 int bo_size = 0x4000000; 4112 int bo_shader_ps_size = 0x400000; 4113 int bo_shader_vs_size = 4096; 4114 int bo_cmd_size = 4096; 4115 struct amdgpu_cs_request ibs_request = {0}; 4116 struct amdgpu_cs_ib_info ib_info= {0}; 4117 uint32_t hang_state, hangs, expired; 4118 amdgpu_bo_list_handle bo_list; 4119 struct amdgpu_cs_fence fence_status = {0}; 4120 4121 r = amdgpu_query_gpu_info(device_handle, &gpu_info); 4122 CU_ASSERT_EQUAL(r, 0); 4123 4124 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 4125 CU_ASSERT_EQUAL(r, 0); 4126 4127 r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096, 4128 AMDGPU_GEM_DOMAIN_GTT, 0, 4129 &bo_cmd, (void **)&ptr_cmd, 4130 &mc_address_cmd, &va_cmd); 4131 CU_ASSERT_EQUAL(r, 0); 4132 memset(ptr_cmd, 0, bo_cmd_size); 4133 4134 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096, 4135 AMDGPU_GEM_DOMAIN_VRAM, 0, 4136 &bo_shader_ps, &ptr_shader_ps, 4137 &mc_address_shader_ps, &va_shader_ps); 4138 CU_ASSERT_EQUAL(r, 0); 4139 memset(ptr_shader_ps, 0, bo_shader_ps_size); 4140 4141 r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096, 4142 AMDGPU_GEM_DOMAIN_VRAM, 0, 4143 &bo_shader_vs, &ptr_shader_vs, 4144 &mc_address_shader_vs, &va_shader_vs); 4145 CU_ASSERT_EQUAL(r, 0); 4146 memset(ptr_shader_vs, 0, bo_shader_vs_size); 4147 4148 r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id); 4149 CU_ASSERT_EQUAL(r, 0); 4150 4151 r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version); 4152 CU_ASSERT_EQUAL(r, 0); 4153 4154 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 4155 AMDGPU_GEM_DOMAIN_VRAM, 0, 4156 &bo_src, (void **)&ptr_src, 4157 &mc_address_src, &va_src); 4158 CU_ASSERT_EQUAL(r, 0); 4159 4160 r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096, 4161 AMDGPU_GEM_DOMAIN_VRAM, 0, 4162 &bo_dst, (void **)&ptr_dst, 4163 &mc_address_dst, &va_dst); 4164 CU_ASSERT_EQUAL(r, 0); 4165 4166 memset(ptr_src, 0x55, bo_size); 4167 4168 i = 0; 4169 i += amdgpu_draw_init(ptr_cmd + i, version); 4170 4171 i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 1); 4172 4173 i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 1); 4174 4175 i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, 4176 mc_address_shader_vs, version, 1); 4177 4178 i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version); 4179 4180 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8); 4181 4182 if (version == 9) { 4183 ptr_cmd[i++] = 0xc; 4184 ptr_cmd[i++] = mc_address_src >> 8; 4185 ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000; 4186 ptr_cmd[i++] = 0x1ffcfff; 4187 ptr_cmd[i++] = 0x90500fac; 4188 ptr_cmd[i++] = 0x1ffe000; 4189 i += 3; 4190 } else if (version == 10) { 4191 ptr_cmd[i++] = 0xc; 4192 ptr_cmd[i++] = mc_address_src >> 8; 4193 ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000; 4194 ptr_cmd[i++] = 0x81ffc1ff; 4195 ptr_cmd[i++] = 0x90500fac; 4196 i += 4; 4197 } 4198 4199 ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4); 4200 ptr_cmd[i++] = 0x14; 4201 ptr_cmd[i++] = 0x92; 4202 i += 3; 4203 4204 ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1); 4205 ptr_cmd[i++] = 0x191; 4206 ptr_cmd[i++] = 0; 4207 4208 i += amdgpu_draw_draw(ptr_cmd + i, version); 4209 4210 while (i & 7) 4211 ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */ 4212 4213 resources[0] = bo_dst; 4214 resources[1] = bo_src; 4215 resources[2] = bo_shader_ps; 4216 resources[3] = bo_shader_vs; 4217 resources[4] = bo_cmd; 4218 r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list); 4219 CU_ASSERT_EQUAL(r, 0); 4220 4221 ib_info.ib_mc_address = mc_address_cmd; 4222 ib_info.size = i; 4223 ibs_request.ip_type = AMDGPU_HW_IP_GFX; 4224 ibs_request.ring = ring; 4225 ibs_request.resources = bo_list; 4226 ibs_request.number_of_ibs = 1; 4227 ibs_request.ibs = &ib_info; 4228 ibs_request.fence_info.handle = NULL; 4229 r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1); 4230 CU_ASSERT_EQUAL(r, 0); 4231 4232 fence_status.ip_type = AMDGPU_HW_IP_GFX; 4233 fence_status.ip_instance = 0; 4234 fence_status.ring = ring; 4235 fence_status.context = context_handle; 4236 fence_status.fence = ibs_request.seq_no; 4237 4238 /* wait for IB accomplished */ 4239 r = amdgpu_cs_query_fence_status(&fence_status, 4240 AMDGPU_TIMEOUT_INFINITE, 4241 0, &expired); 4242 4243 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 4244 CU_ASSERT_EQUAL(r, 0); 4245 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 4246 4247 r = amdgpu_bo_list_destroy(bo_list); 4248 CU_ASSERT_EQUAL(r, 0); 4249 4250 r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size); 4251 CU_ASSERT_EQUAL(r, 0); 4252 r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size); 4253 CU_ASSERT_EQUAL(r, 0); 4254 4255 r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size); 4256 CU_ASSERT_EQUAL(r, 0); 4257 4258 r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size); 4259 CU_ASSERT_EQUAL(r, 0); 4260 r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size); 4261 CU_ASSERT_EQUAL(r, 0); 4262 4263 r = amdgpu_cs_ctx_free(context_handle); 4264 CU_ASSERT_EQUAL(r, 0); 4265} 4266 4267static void amdgpu_gpu_reset_test(void) 4268{ 4269 int r; 4270 char debugfs_path[256], tmp[10]; 4271 int fd; 4272 struct stat sbuf; 4273 amdgpu_context_handle context_handle; 4274 uint32_t hang_state, hangs; 4275 4276 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 4277 CU_ASSERT_EQUAL(r, 0); 4278 4279 r = fstat(drm_amdgpu[0], &sbuf); 4280 CU_ASSERT_EQUAL(r, 0); 4281 4282 sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev)); 4283 fd = open(debugfs_path, O_RDONLY); 4284 CU_ASSERT(fd >= 0); 4285 4286 r = read(fd, tmp, sizeof(tmp)/sizeof(char)); 4287 CU_ASSERT(r > 0); 4288 4289 r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs); 4290 CU_ASSERT_EQUAL(r, 0); 4291 CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET); 4292 4293 close(fd); 4294 r = amdgpu_cs_ctx_free(context_handle); 4295 CU_ASSERT_EQUAL(r, 0); 4296 4297 amdgpu_compute_dispatch_test(); 4298 amdgpu_gfx_dispatch_test(); 4299} 4300 4301static void amdgpu_stable_pstate_test(void) 4302{ 4303 int r; 4304 amdgpu_context_handle context_handle; 4305 uint32_t current_pstate = 0, new_pstate = 0; 4306 4307 r = amdgpu_cs_ctx_create(device_handle, &context_handle); 4308 CU_ASSERT_EQUAL(r, 0); 4309 4310 r = amdgpu_cs_ctx_stable_pstate(context_handle, 4311 AMDGPU_CTX_OP_GET_STABLE_PSTATE, 4312 0, ¤t_pstate); 4313 CU_ASSERT_EQUAL(r, 0); 4314 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE); 4315 4316 r = amdgpu_cs_ctx_stable_pstate(context_handle, 4317 AMDGPU_CTX_OP_SET_STABLE_PSTATE, 4318 AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL); 4319 CU_ASSERT_EQUAL(r, 0); 4320 4321 r = amdgpu_cs_ctx_stable_pstate(context_handle, 4322 AMDGPU_CTX_OP_GET_STABLE_PSTATE, 4323 0, &new_pstate); 4324 CU_ASSERT_EQUAL(r, 0); 4325 CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK); 4326 4327 r = amdgpu_cs_ctx_free(context_handle); 4328 CU_ASSERT_EQUAL(r, 0); 4329} 4330