1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22*/
23
24#include <stdio.h>
25#include <stdlib.h>
26#include <unistd.h>
27#include <sys/types.h>
28#ifdef MAJOR_IN_SYSMACROS
29#include <sys/sysmacros.h>
30#endif
31#include <sys/stat.h>
32#include <fcntl.h>
33#if HAVE_ALLOCA_H
34# include <alloca.h>
35#endif
36#include <sys/wait.h>
37
38#include "CUnit/Basic.h"
39
40#include "amdgpu_test.h"
41#include "amdgpu_drm.h"
42#include "amdgpu_internal.h"
43#include "util_math.h"
44
45static  amdgpu_device_handle device_handle;
46static  uint32_t  major_version;
47static  uint32_t  minor_version;
48static  uint32_t  family_id;
49static  uint32_t  chip_id;
50static  uint32_t  chip_rev;
51
52static void amdgpu_query_info_test(void);
53static void amdgpu_command_submission_gfx(void);
54static void amdgpu_command_submission_compute(void);
55static void amdgpu_command_submission_multi_fence(void);
56static void amdgpu_command_submission_sdma(void);
57static void amdgpu_userptr_test(void);
58static void amdgpu_semaphore_test(void);
59static void amdgpu_sync_dependency_test(void);
60static void amdgpu_bo_eviction_test(void);
61static void amdgpu_compute_dispatch_test(void);
62static void amdgpu_gfx_dispatch_test(void);
63static void amdgpu_draw_test(void);
64static void amdgpu_gpu_reset_test(void);
65static void amdgpu_stable_pstate_test(void);
66
67static void amdgpu_command_submission_write_linear_helper(unsigned ip_type);
68static void amdgpu_command_submission_const_fill_helper(unsigned ip_type);
69static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type);
70static void amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
71				       unsigned ip_type,
72				       int instance, int pm4_dw, uint32_t *pm4_src,
73				       int res_cnt, amdgpu_bo_handle *resources,
74				       struct amdgpu_cs_ib_info *ib_info,
75				       struct amdgpu_cs_request *ibs_request);
76
77CU_TestInfo basic_tests[] = {
78	{ "Query Info Test",  amdgpu_query_info_test },
79	{ "Userptr Test",  amdgpu_userptr_test },
80	{ "bo eviction Test",  amdgpu_bo_eviction_test },
81	{ "Command submission Test (GFX)",  amdgpu_command_submission_gfx },
82	{ "Command submission Test (Compute)", amdgpu_command_submission_compute },
83	{ "Command submission Test (Multi-Fence)", amdgpu_command_submission_multi_fence },
84	{ "Command submission Test (SDMA)", amdgpu_command_submission_sdma },
85	{ "SW semaphore Test",  amdgpu_semaphore_test },
86	{ "Sync dependency Test",  amdgpu_sync_dependency_test },
87	{ "Dispatch Test (Compute)",  amdgpu_compute_dispatch_test },
88	{ "Dispatch Test (GFX)",  amdgpu_gfx_dispatch_test },
89	{ "Draw Test",  amdgpu_draw_test },
90	{ "GPU reset Test", amdgpu_gpu_reset_test },
91	{ "Stable pstate Test", amdgpu_stable_pstate_test },
92	CU_TEST_INFO_NULL,
93};
94#define BUFFER_SIZE (MAX2(8 * 1024, getpagesize()))
95#define SDMA_PKT_HEADER_op_offset 0
96#define SDMA_PKT_HEADER_op_mask   0x000000FF
97#define SDMA_PKT_HEADER_op_shift  0
98#define SDMA_PKT_HEADER_OP(x) (((x) & SDMA_PKT_HEADER_op_mask) << SDMA_PKT_HEADER_op_shift)
99#define SDMA_OPCODE_CONSTANT_FILL  11
100#       define SDMA_CONSTANT_FILL_EXTRA_SIZE(x)           ((x) << 14)
101	/* 0 = byte fill
102	 * 2 = DW fill
103	 */
104#define SDMA_PACKET(op, sub_op, e)	((((e) & 0xFFFF) << 16) |	\
105					(((sub_op) & 0xFF) << 8) |	\
106					(((op) & 0xFF) << 0))
107#define	SDMA_OPCODE_WRITE				  2
108#       define SDMA_WRITE_SUB_OPCODE_LINEAR               0
109#       define SDMA_WRTIE_SUB_OPCODE_TILED                1
110
111#define	SDMA_OPCODE_COPY				  1
112#       define SDMA_COPY_SUB_OPCODE_LINEAR                0
113
114#define	SDMA_OPCODE_ATOMIC				  10
115#		define SDMA_ATOMIC_LOOP(x)               ((x) << 0)
116        /* 0 - single_pass_atomic.
117         * 1 - loop_until_compare_satisfied.
118         */
119#		define SDMA_ATOMIC_TMZ(x)                ((x) << 2)
120		/* 0 - non-TMZ.
121		 * 1 - TMZ.
122	     */
123#		define SDMA_ATOMIC_OPCODE(x)             ((x) << 9)
124		/* TC_OP_ATOMIC_CMPSWAP_RTN_32 0x00000008
125		 * same as Packet 3
126		 */
127
128#define GFX_COMPUTE_NOP  0xffff1000
129#define SDMA_NOP  0x0
130
131/* PM4 */
132#define	PACKET_TYPE0	0
133#define	PACKET_TYPE1	1
134#define	PACKET_TYPE2	2
135#define	PACKET_TYPE3	3
136
137#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
138#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
139#define CP_PACKET0_GET_REG(h) ((h) & 0xFFFF)
140#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
141#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
142			 ((reg) & 0xFFFF) |			\
143			 ((n) & 0x3FFF) << 16)
144#define CP_PACKET2			0x80000000
145#define		PACKET2_PAD_SHIFT		0
146#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
147
148#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
149
150#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
151			 (((op) & 0xFF) << 8) |				\
152			 ((n) & 0x3FFF) << 16)
153#define PACKET3_COMPUTE(op, n) PACKET3(op, n) | (1 << 1)
154
155/* Packet 3 types */
156#define	PACKET3_NOP					0x10
157
158#define	PACKET3_WRITE_DATA				0x37
159#define		WRITE_DATA_DST_SEL(x)                   ((x) << 8)
160		/* 0 - register
161		 * 1 - memory (sync - via GRBM)
162		 * 2 - gl2
163		 * 3 - gds
164		 * 4 - reserved
165		 * 5 - memory (async - direct)
166		 */
167#define		WR_ONE_ADDR                             (1 << 16)
168#define		WR_CONFIRM                              (1 << 20)
169#define		WRITE_DATA_CACHE_POLICY(x)              ((x) << 25)
170		/* 0 - LRU
171		 * 1 - Stream
172		 */
173#define		WRITE_DATA_ENGINE_SEL(x)                ((x) << 30)
174		/* 0 - me
175		 * 1 - pfp
176		 * 2 - ce
177		 */
178
179#define	PACKET3_ATOMIC_MEM				0x1E
180#define     TC_OP_ATOMIC_CMPSWAP_RTN_32          0x00000008
181#define     ATOMIC_MEM_COMMAND(x)               ((x) << 8)
182            /* 0 - single_pass_atomic.
183             * 1 - loop_until_compare_satisfied.
184             */
185#define     ATOMIC_MEM_CACHEPOLICAY(x)          ((x) << 25)
186            /* 0 - lru.
187             * 1 - stream.
188             */
189#define     ATOMIC_MEM_ENGINESEL(x)             ((x) << 30)
190            /* 0 - micro_engine.
191			 */
192
193#define	PACKET3_DMA_DATA				0x50
194/* 1. header
195 * 2. CONTROL
196 * 3. SRC_ADDR_LO or DATA [31:0]
197 * 4. SRC_ADDR_HI [31:0]
198 * 5. DST_ADDR_LO [31:0]
199 * 6. DST_ADDR_HI [7:0]
200 * 7. COMMAND [30:21] | BYTE_COUNT [20:0]
201 */
202/* CONTROL */
203#              define PACKET3_DMA_DATA_ENGINE(x)     ((x) << 0)
204		/* 0 - ME
205		 * 1 - PFP
206		 */
207#              define PACKET3_DMA_DATA_SRC_CACHE_POLICY(x) ((x) << 13)
208		/* 0 - LRU
209		 * 1 - Stream
210		 * 2 - Bypass
211		 */
212#              define PACKET3_DMA_DATA_SRC_VOLATILE (1 << 15)
213#              define PACKET3_DMA_DATA_DST_SEL(x)  ((x) << 20)
214		/* 0 - DST_ADDR using DAS
215		 * 1 - GDS
216		 * 3 - DST_ADDR using L2
217		 */
218#              define PACKET3_DMA_DATA_DST_CACHE_POLICY(x) ((x) << 25)
219		/* 0 - LRU
220		 * 1 - Stream
221		 * 2 - Bypass
222		 */
223#              define PACKET3_DMA_DATA_DST_VOLATILE (1 << 27)
224#              define PACKET3_DMA_DATA_SRC_SEL(x)  ((x) << 29)
225		/* 0 - SRC_ADDR using SAS
226		 * 1 - GDS
227		 * 2 - DATA
228		 * 3 - SRC_ADDR using L2
229		 */
230#              define PACKET3_DMA_DATA_CP_SYNC     (1 << 31)
231/* COMMAND */
232#              define PACKET3_DMA_DATA_DIS_WC      (1 << 21)
233#              define PACKET3_DMA_DATA_CMD_SRC_SWAP(x) ((x) << 22)
234		/* 0 - none
235		 * 1 - 8 in 16
236		 * 2 - 8 in 32
237		 * 3 - 8 in 64
238		 */
239#              define PACKET3_DMA_DATA_CMD_DST_SWAP(x) ((x) << 24)
240		/* 0 - none
241		 * 1 - 8 in 16
242		 * 2 - 8 in 32
243		 * 3 - 8 in 64
244		 */
245#              define PACKET3_DMA_DATA_CMD_SAS     (1 << 26)
246		/* 0 - memory
247		 * 1 - register
248		 */
249#              define PACKET3_DMA_DATA_CMD_DAS     (1 << 27)
250		/* 0 - memory
251		 * 1 - register
252		 */
253#              define PACKET3_DMA_DATA_CMD_SAIC    (1 << 28)
254#              define PACKET3_DMA_DATA_CMD_DAIC    (1 << 29)
255#              define PACKET3_DMA_DATA_CMD_RAW_WAIT  (1 << 30)
256
257#define SDMA_PACKET_SI(op, b, t, s, cnt)	((((op) & 0xF) << 28) |	\
258						(((b) & 0x1) << 26) |		\
259						(((t) & 0x1) << 23) |		\
260						(((s) & 0x1) << 22) |		\
261						(((cnt) & 0xFFFFF) << 0))
262#define	SDMA_OPCODE_COPY_SI	3
263#define SDMA_OPCODE_CONSTANT_FILL_SI	13
264#define SDMA_NOP_SI  0xf
265#define GFX_COMPUTE_NOP_SI 0x80000000
266#define	PACKET3_DMA_DATA_SI	0x41
267#              define PACKET3_DMA_DATA_SI_ENGINE(x)     ((x) << 27)
268		/* 0 - ME
269		 * 1 - PFP
270		 */
271#              define PACKET3_DMA_DATA_SI_DST_SEL(x)  ((x) << 20)
272		/* 0 - DST_ADDR using DAS
273		 * 1 - GDS
274		 * 3 - DST_ADDR using L2
275		 */
276#              define PACKET3_DMA_DATA_SI_SRC_SEL(x)  ((x) << 29)
277		/* 0 - SRC_ADDR using SAS
278		 * 1 - GDS
279		 * 2 - DATA
280		 * 3 - SRC_ADDR using L2
281		 */
282#              define PACKET3_DMA_DATA_SI_CP_SYNC     (1 << 31)
283
284
285#define PKT3_CONTEXT_CONTROL                   0x28
286#define     CONTEXT_CONTROL_LOAD_ENABLE(x)     (((unsigned)(x) & 0x1) << 31)
287#define     CONTEXT_CONTROL_LOAD_CE_RAM(x)     (((unsigned)(x) & 0x1) << 28)
288#define     CONTEXT_CONTROL_SHADOW_ENABLE(x)   (((unsigned)(x) & 0x1) << 31)
289
290#define PKT3_CLEAR_STATE                       0x12
291
292#define PKT3_SET_SH_REG                        0x76
293#define		PACKET3_SET_SH_REG_START			0x00002c00
294
295#define PKT3_SET_SH_REG_INDEX			0x9B
296
297#define	PACKET3_DISPATCH_DIRECT				0x15
298#define PACKET3_EVENT_WRITE				0x46
299#define PACKET3_ACQUIRE_MEM				0x58
300#define PACKET3_SET_CONTEXT_REG				0x69
301#define PACKET3_SET_UCONFIG_REG				0x79
302#define PACKET3_DRAW_INDEX_AUTO				0x2D
303/* gfx 8 */
304#define mmCOMPUTE_PGM_LO                                                        0x2e0c
305#define mmCOMPUTE_PGM_RSRC1                                                     0x2e12
306#define mmCOMPUTE_TMPRING_SIZE                                                  0x2e18
307#define mmCOMPUTE_USER_DATA_0                                                   0x2e40
308#define mmCOMPUTE_USER_DATA_1                                                   0x2e41
309#define mmCOMPUTE_RESOURCE_LIMITS                                               0x2e15
310#define mmCOMPUTE_NUM_THREAD_X                                                  0x2e07
311
312
313
314#define SWAP_32(num) (((num & 0xff000000) >> 24) | \
315		      ((num & 0x0000ff00) << 8) | \
316		      ((num & 0x00ff0000) >> 8) | \
317		      ((num & 0x000000ff) << 24))
318
319
320/* Shader code
321 * void main()
322{
323
324	float x = some_input;
325		for (unsigned i = 0; i < 1000000; i++)
326  	x = sin(x);
327
328	u[0] = 42u;
329}
330*/
331
332static  uint32_t shader_bin[] = {
333	SWAP_32(0x800082be), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0x040085bf),
334	SWAP_32(0x02810281), SWAP_32(0x02ff08bf), SWAP_32(0x7f969800), SWAP_32(0xfcff84bf),
335	SWAP_32(0xff0083be), SWAP_32(0x00f00000), SWAP_32(0xc10082be), SWAP_32(0xaa02007e),
336	SWAP_32(0x000070e0), SWAP_32(0x00000080), SWAP_32(0x000081bf)
337};
338
339#define CODE_OFFSET 512
340#define DATA_OFFSET 1024
341
342enum cs_type {
343	CS_BUFFERCLEAR,
344	CS_BUFFERCOPY,
345	CS_HANG,
346	CS_HANG_SLOW
347};
348
349static const uint32_t bufferclear_cs_shader_gfx9[] = {
350    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
351    0x7e020280, 0x7e040204, 0x7e060205, 0x7e080206,
352    0x7e0a0207, 0xe01c2000, 0x80000200, 0xbf8c0000,
353    0xbf810000
354};
355
356static const uint32_t bufferclear_cs_shader_gfx10[] = {
357	0xD7460004, 0x04010C08, 0x7E000204, 0x7E020205,
358	0x7E040206, 0x7E060207, 0xE01C2000, 0x80000004,
359	0xBF810000
360};
361
362static const uint32_t bufferclear_cs_shader_registers_gfx9[][2] = {
363	{0x2e12, 0x000C0041},	//{ mmCOMPUTE_PGM_RSRC1,	  0x000C0041 },
364	{0x2e13, 0x00000090},	//{ mmCOMPUTE_PGM_RSRC2,	  0x00000090 },
365	{0x2e07, 0x00000040},	//{ mmCOMPUTE_NUM_THREAD_X, 0x00000040 },
366	{0x2e08, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Y, 0x00000001 },
367	{0x2e09, 0x00000001},	//{ mmCOMPUTE_NUM_THREAD_Z, 0x00000001 }
368};
369
370static const uint32_t bufferclear_cs_shader_registers_num_gfx9 = 5;
371
372static const uint32_t buffercopy_cs_shader_gfx9[] = {
373    0x260000ff, 0x000003ff, 0xd1fd0000, 0x04010c08,
374    0x7e020280, 0xe00c2000, 0x80000200, 0xbf8c0f70,
375    0xe01c2000, 0x80010200, 0xbf810000
376};
377
378static const uint32_t buffercopy_cs_shader_gfx10[] = {
379	0xD7460001, 0x04010C08, 0xE00C2000, 0x80000201,
380	0xBF8C3F70, 0xE01C2000, 0x80010201, 0xBF810000
381};
382
383static const uint32_t preamblecache_gfx9[] = {
384	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
385	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
386	0xc0026900, 0xb4,  0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
387	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
388	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
389	0xc0016900, 0x2d5, 0x10000, 0xc0016900,  0x2dc, 0x0,
390	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
391	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
392	0xc0036900, 0x311, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0x1e, 0x20,
393	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
394	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x19, 0x0,
395	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
396	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
397	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
398	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
399	0xc0016900, 0x314, 0x0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
400	0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1,
401	0xc0016900, 0x18, 0x2, 0xc0016900, 0x206, 0x300, 0xc0017900, 0x20000243, 0x0,
402	0xc0017900, 0x248, 0xffffffff, 0xc0017900, 0x249, 0x0, 0xc0017900, 0x24a, 0x0,
403	0xc0017900, 0x24b, 0x0
404};
405
406static const uint32_t preamblecache_gfx10[] = {
407	0xc0026900, 0x81, 0x80000000, 0x40004000, 0xc0026900, 0x8c, 0xaa99aaaa, 0x0,
408	0xc0026900, 0x90, 0x80000000, 0x40004000, 0xc0026900, 0x94, 0x80000000, 0x40004000,
409	0xc0026900, 0xb4, 0x0, 0x3f800000, 0xc0016900, 0x103, 0x0,
410	0xc0016900, 0x208, 0x0, 0xc0016900, 0x290, 0x0,
411	0xc0016900, 0x2a1, 0x0, 0xc0026900, 0x2ad, 0x0, 0x0,
412	0xc0016900, 0x2d5, 0x10000, 0xc0016900, 0x2dc, 0x0,
413	0xc0066900, 0x2de, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0026900, 0x2e5, 0x0, 0x0,
414	0xc0056900, 0x2f9, 0x5, 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
415	0xc0046900, 0x310, 0, 0x3, 0, 0x100000, 0xc0026900, 0x316, 0xe, 0x20,
416	0xc0016900, 0x349, 0x0, 0xc0016900, 0x358, 0x0, 0xc0016900, 0x367, 0x0,
417	0xc0016900, 0x376, 0x0, 0xc0016900, 0x385, 0x0, 0xc0016900, 0x6, 0x0,
418	0xc0056900, 0xe8, 0x0, 0x0, 0x0, 0x0, 0x0,
419	0xc0076900, 0x1e1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
420	0xc0026900, 0x204, 0x90000, 0x4, 0xc0046900, 0x20c, 0x0, 0x0, 0x0, 0x0,
421	0xc0016900, 0x2b2, 0x0, 0xc0026900, 0x30e, 0xffffffff, 0xffffffff,
422	0xc0016900, 0x314, 0x0, 0xc0016900, 0x10a, 0, 0xc0016900, 0x2a6, 0, 0xc0016900, 0x210, 0,
423	0xc0016900, 0x2db, 0, 0xc0016900, 0x1d4, 0, 0xc0002f00, 0x1, 0xc0016900, 0x1, 0x1, 0xc0016900, 0xe, 0x2,
424	0xc0016900, 0x206, 0x300, 0xc0016900, 0x212, 0x200, 0xc0017900, 0x7b, 0x20, 0xc0017a00, 0x20000243, 0x0,
425	0xc0017900, 0x249, 0, 0xc0017900, 0x24a, 0, 0xc0017900, 0x24b, 0, 0xc0017900, 0x259, 0xffffffff,
426	0xc0017900, 0x25f, 0, 0xc0017900, 0x260, 0, 0xc0017900, 0x262, 0,
427	0xc0017600, 0x45, 0x0, 0xc0017600, 0x6, 0x0,
428	0xc0067600, 0x70, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
429	0xc0067600, 0x30, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0
430};
431
432enum ps_type {
433	PS_CONST,
434	PS_TEX,
435	PS_HANG,
436	PS_HANG_SLOW
437};
438
439static const uint32_t ps_const_shader_gfx9[] = {
440    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
441    0xD2960000, 0x00020300, 0xD2960001, 0x00020702,
442    0xC4001C0F, 0x00000100, 0xBF810000
443};
444
445static const uint32_t ps_const_shader_patchinfo_code_size_gfx9 = 6;
446
447static const uint32_t ps_const_shader_patchinfo_code_gfx9[][10][6] = {
448    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
449     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000000 },
450     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000100 },
451     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000300 },
452     { 0xD2960000, 0x00020300, 0xD2960001, 0x00020702, 0xC4001C0F, 0x00000100 },
453     { 0xD2950000, 0x00020300, 0xD2950001, 0x00020702, 0xC4001C0F, 0x00000100 },
454     { 0xD2940000, 0x00020300, 0xD2940001, 0x00020702, 0xC4001C0F, 0x00000100 },
455     { 0xD2970000, 0x00020300, 0xD2970001, 0x00020702, 0xC4001C0F, 0x00000100 },
456     { 0xD2980000, 0x00020300, 0xD2980001, 0x00020702, 0xC4001C0F, 0x00000100 },
457     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x03020100 }
458    }
459};
460
461static const uint32_t ps_const_shader_patchinfo_offset_gfx9[] = {
462    0x00000004
463};
464
465static const uint32_t ps_num_sh_registers_gfx9 = 2;
466
467static const uint32_t ps_const_sh_registers_gfx9[][2] = {
468    {0x2C0A, 0x000C0040},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0040 },
469    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
470};
471
472static const uint32_t ps_num_context_registers_gfx9 = 7;
473
474static const uint32_t ps_const_context_reg_gfx9[][2] = {
475    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
476    {0xA1B6, 0x00000000}, //{ mmSPI_PS_IN_CONTROL,       0x00000000 },
477    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
478    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
479    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
480    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
481    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004 }
482};
483
484static const uint32_t ps_const_shader_gfx10[] = {
485    0x7E000200, 0x7E020201, 0x7E040202, 0x7E060203,
486    0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000,
487    0xF8001C0F, 0x00000100, 0xBF810000
488};
489
490static const uint32_t ps_const_shader_patchinfo_code_size_gfx10 = 6;
491
492static const uint32_t ps_const_shader_patchinfo_code_gfx10[][10][6] = {
493    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
494     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000000 },
495     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000100 },
496     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000300 },
497     { 0x5E000300, 0x5E020702, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
498     { 0xD7690000, 0x00020300, 0xD7690001, 0x00020702, 0xF8001C0F, 0x00000100 },
499     { 0xD7680000, 0x00020300, 0xD7680001, 0x00020702, 0xF8001C0F, 0x00000100 },
500     { 0xD76A0000, 0x00020300, 0xD76A0001, 0x00020702, 0xF8001C0F, 0x00000100 },
501     { 0xD76B0000, 0x00020300, 0xD76B0001, 0x00020702, 0xF8001C0F, 0x00000100 },
502     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x03020100 }
503    }
504};
505
506static const uint32_t ps_const_shader_patchinfo_offset_gfx10[] = {
507    0x00000004
508};
509
510static const uint32_t ps_num_sh_registers_gfx10 = 2;
511
512static const uint32_t ps_const_sh_registers_gfx10[][2] = {
513    {0x2C0A, 0x000C0000},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0000 },
514    {0x2C0B, 0x00000008}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000008 }
515};
516
517static const uint32_t ps_tex_shader_gfx9[] = {
518    0xBEFC000C, 0xBE8E017E, 0xBEFE077E, 0xD4180000,
519    0xD4190001, 0xD41C0100, 0xD41D0101, 0xF0800F00,
520    0x00400206, 0xBEFE010E, 0xBF8C0F70, 0xD2960000,
521    0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F,
522    0x00000100, 0xBF810000
523};
524
525static const uint32_t ps_tex_shader_patchinfo_offset_gfx9[] = {
526    0x0000000B
527};
528
529static const uint32_t ps_tex_shader_patchinfo_code_size_gfx9 = 6;
530
531static const uint32_t ps_tex_shader_patchinfo_code_gfx9[][10][6] = {
532    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001890, 0x00000000 },
533     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001801, 0x00000002 },
534     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000302 },
535     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC4001803, 0x00000502 },
536     { 0xD2960000, 0x00020702, 0xD2960001, 0x00020B04, 0xC4001C0F, 0x00000100 },
537     { 0xD2950000, 0x00020702, 0xD2950001, 0x00020B04, 0xC4001C0F, 0x00000100 },
538     { 0xD2940000, 0x00020702, 0xD2940001, 0x00020B04, 0xC4001C0F, 0x00000100 },
539     { 0xD2970000, 0x00020702, 0xD2970001, 0x00020B04, 0xC4001C0F, 0x00000100 },
540     { 0xD2980000, 0x00020702, 0xD2980001, 0x00020B04, 0xC4001C0F, 0x00000100 },
541     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xC400180F, 0x05040302 }
542    }
543};
544
545static const uint32_t ps_tex_sh_registers_gfx9[][2] = {
546    {0x2C0A, 0x000C0081},//{ mmSPI_SHADER_PGM_RSRC1_PS, 0x000C0081 },
547    {0x2C0B, 0x00000018}, //{ mmSPI_SHADER_PGM_RSRC2_PS, 0x00000018 }
548};
549
550static const uint32_t ps_tex_context_reg_gfx9[][2] = {
551    {0xA1B4, 0x00000002}, //{ mmSPI_PS_INPUT_ADDR,       0x00000002 },
552    {0xA1B6, 0x00000001}, //{ mmSPI_PS_IN_CONTROL,       0x00000001 },
553    {0xA08F, 0x0000000F}, //{ mmCB_SHADER_MASK,          0x0000000F },
554    {0xA203, 0x00000010}, //{ mmDB_SHADER_CONTROL,       0x00000010 },
555    {0xA1C4, 0x00000000}, //{ mmSPI_SHADER_Z_FORMAT,     0x00000000 },
556    {0xA1B8, 0x00000000}, //{ mmSPI_BARYC_CNTL,          0x00000000 /* Always 0 for now */},
557    {0xA1C5, 0x00000004}, //{ mmSPI_SHADER_COL_FORMAT,   0x00000004  }
558};
559
560static const uint32_t ps_tex_shader_gfx10[] = {
561    0xBEFC030C, 0xBE8E047E, 0xBEFE0A7E, 0xC8080000,
562    0xC80C0100, 0xC8090001, 0xC80D0101, 0xF0800F0A,
563    0x00400402, 0x00000003, 0xBEFE040E, 0xBF8C0F70,
564    0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000,
565    0xF8001C0F, 0x00000100, 0xBF810000
566};
567
568static const uint32_t ps_tex_shader_patchinfo_offset_gfx10[] = {
569    0x0000000C
570};
571
572static const uint32_t ps_tex_shader_patchinfo_code_size_gfx10 = 6;
573
574static const uint32_t ps_tex_shader_patchinfo_code_gfx10[][10][6] = {
575    {{ 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001890, 0x00000000 },
576     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001801, 0x00000004 },
577     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000504 },
578     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF8001803, 0x00000704 },
579     { 0x5E000B04, 0x5E020F06, 0xBF800000, 0xBF800000, 0xF8001C0F, 0x00000100 },
580     { 0xD7690000, 0x00020B04, 0xD7690001, 0x00020F06, 0xF8001C0F, 0x00000100 },
581     { 0xD7680000, 0x00020B04, 0xD7680001, 0x00020F06, 0xF8001C0F, 0x00000100 },
582     { 0xD76A0000, 0x00020B04, 0xD76A0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
583     { 0xD76B0000, 0x00020B04, 0xD76B0001, 0x00020F06, 0xF8001C0F, 0x00000100 },
584     { 0xBF800000, 0xBF800000, 0xBF800000, 0xBF800000, 0xF800180F, 0x07060504 }
585    }
586};
587
588static const uint32_t vs_RectPosTexFast_shader_gfx9[] = {
589    0x7E000B00, 0x020000F3, 0xD042000A, 0x00010100,
590    0x7E020202, 0x7E040200, 0x020000F3, 0x7E060206,
591    0x7E080204, 0xD1000001, 0x002A0302, 0x7C840080,
592    0x7E000200, 0x7E040203, 0x7E0A0201, 0xD1000003,
593    0x002A0704, 0x7E0C0207, 0x7E0E0205, 0x00000101,
594    0x00020505, 0x7E040208, 0x7E0A02F2, 0x00060903,
595    0x00080D07, 0x7E0C0209, 0xC40008CF, 0x05020100,
596    0xC400020F, 0x05060403, 0xBF810000
597};
598
599static const uint32_t vs_RectPosTexFast_shader_gfx10[] = {
600    0x7E000B00, 0x060000F3, 0x7E020202, 0x7E040206,
601    0x7C040080, 0x060000F3, 0xD5010001, 0x01AA0200,
602    0x7E060203, 0xD5010002, 0x01AA0404, 0x7E080207,
603    0x7C040080, 0xD5010000, 0x01A80101, 0xD5010001,
604    0x01AA0601, 0x7E060208, 0x7E0A02F2, 0xD5010002,
605    0x01A80902, 0xD5010004, 0x01AA0805, 0x7E0C0209,
606    0xF80008CF, 0x05030100, 0xF800020F, 0x05060402,
607    0xBF810000
608};
609
610static const uint32_t cached_cmd_gfx9[] = {
611	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
612	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
613	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
614	0xc0056900, 0x105, 0x0, 0x0,  0x0, 0x0, 0x12,
615	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
616	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
617	0xc0026900, 0x292, 0x20, 0x60201b8,
618	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
619};
620
621static const uint32_t cached_cmd_gfx10[] = {
622	0xc0016900, 0x0, 0x0, 0xc0026900, 0x3, 0x2a, 0x0,
623	0xc0046900, 0xa, 0x0, 0x0, 0x0, 0x200020,
624	0xc0016900, 0x83, 0xffff, 0xc0026900, 0x8e, 0xf, 0xf,
625	0xc0056900, 0x105, 0x0, 0x0, 0x0, 0x0, 0x18,
626	0xc0026900, 0x10b, 0x0, 0x0, 0xc0016900, 0x1e0, 0x0,
627	0xc0036900, 0x200, 0x0, 0x10000, 0xcc0011,
628	0xc0026900, 0x292, 0x20, 0x6020000,
629	0xc0026900, 0x2b0, 0x0, 0x0, 0xc0016900, 0x2f8, 0x0
630};
631
632unsigned int memcpy_ps_hang[] = {
633        0xFFFFFFFF, 0xBEFE0A7E, 0xBEFC0304, 0xC0C20100,
634        0xC0800300, 0xC8080000, 0xC80C0100, 0xC8090001,
635        0xC80D0101, 0xBF8C007F, 0xF0800F00, 0x00010002,
636        0xBEFE040C, 0xBF8C0F70, 0xBF800000, 0xBF800000,
637        0xF800180F, 0x03020100, 0xBF810000
638};
639
640struct amdgpu_test_shader {
641	uint32_t *shader;
642	uint32_t header_length;
643	uint32_t body_length;
644	uint32_t foot_length;
645};
646
647unsigned int memcpy_cs_hang_slow_ai_codes[] = {
648    0xd1fd0000, 0x04010c08, 0xe00c2000, 0x80000100,
649    0xbf8c0f70, 0xe01c2000, 0x80010100, 0xbf810000
650};
651
652struct amdgpu_test_shader memcpy_cs_hang_slow_ai = {
653        memcpy_cs_hang_slow_ai_codes,
654        4,
655        3,
656        1
657};
658
659unsigned int memcpy_cs_hang_slow_rv_codes[] = {
660    0x8e00860c, 0x32000000, 0xe00c2000, 0x80010100,
661    0xbf8c0f70, 0xe01c2000, 0x80020100, 0xbf810000
662};
663
664struct amdgpu_test_shader memcpy_cs_hang_slow_rv = {
665        memcpy_cs_hang_slow_rv_codes,
666        4,
667        3,
668        1
669};
670
671unsigned int memcpy_cs_hang_slow_nv_codes[] = {
672    0xd7460000, 0x04010c08, 0xe00c2000, 0x80000100,
673    0xbf8c0f70, 0xe01ca000, 0x80010100, 0xbf810000
674};
675
676struct amdgpu_test_shader memcpy_cs_hang_slow_nv = {
677        memcpy_cs_hang_slow_nv_codes,
678        4,
679        3,
680        1
681};
682
683unsigned int memcpy_ps_hang_slow_ai_codes[] = {
684        0xbefc000c, 0xbe8e017e, 0xbefe077e, 0xd4080000,
685        0xd4090001, 0xd40c0100, 0xd40d0101, 0xf0800f00,
686        0x00400002, 0xbefe010e, 0xbf8c0f70, 0xbf800000,
687        0xbf800000, 0xbf800000, 0xbf800000, 0xc400180f,
688        0x03020100, 0xbf810000
689};
690
691struct amdgpu_test_shader memcpy_ps_hang_slow_ai = {
692        memcpy_ps_hang_slow_ai_codes,
693        7,
694        2,
695        9
696};
697
698int amdgpu_bo_alloc_and_map_raw(amdgpu_device_handle dev, unsigned size,
699			unsigned alignment, unsigned heap, uint64_t alloc_flags,
700			uint64_t mapping_flags, amdgpu_bo_handle *bo, void **cpu,
701			uint64_t *mc_address,
702			amdgpu_va_handle *va_handle)
703{
704	struct amdgpu_bo_alloc_request request = {};
705	amdgpu_bo_handle buf_handle;
706	amdgpu_va_handle handle;
707	uint64_t vmc_addr;
708	int r;
709
710	request.alloc_size = size;
711	request.phys_alignment = alignment;
712	request.preferred_heap = heap;
713	request.flags = alloc_flags;
714
715	r = amdgpu_bo_alloc(dev, &request, &buf_handle);
716	if (r)
717		return r;
718
719	r = amdgpu_va_range_alloc(dev,
720				  amdgpu_gpu_va_range_general,
721				  size, alignment, 0, &vmc_addr,
722				  &handle, 0);
723	if (r)
724		goto error_va_alloc;
725
726	r = amdgpu_bo_va_op_raw(dev, buf_handle, 0,  ALIGN(size, getpagesize()), vmc_addr,
727				   AMDGPU_VM_PAGE_READABLE |
728				   AMDGPU_VM_PAGE_WRITEABLE |
729				   AMDGPU_VM_PAGE_EXECUTABLE |
730				   mapping_flags,
731				   AMDGPU_VA_OP_MAP);
732	if (r)
733		goto error_va_map;
734
735	r = amdgpu_bo_cpu_map(buf_handle, cpu);
736	if (r)
737		goto error_cpu_map;
738
739	*bo = buf_handle;
740	*mc_address = vmc_addr;
741	*va_handle = handle;
742
743	return 0;
744
745 error_cpu_map:
746	amdgpu_bo_cpu_unmap(buf_handle);
747
748 error_va_map:
749	amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
750
751 error_va_alloc:
752	amdgpu_bo_free(buf_handle);
753	return r;
754}
755
756
757
758CU_BOOL suite_basic_tests_enable(void)
759{
760
761	if (amdgpu_device_initialize(drm_amdgpu[0], &major_version,
762					     &minor_version, &device_handle))
763		return CU_FALSE;
764
765
766	family_id = device_handle->info.family_id;
767	chip_id = device_handle->info.chip_external_rev;
768	chip_rev = device_handle->info.chip_rev;
769
770	if (amdgpu_device_deinitialize(device_handle))
771		return CU_FALSE;
772
773	/* disable gfx engine basic test cases for some asics have no CPG */
774	if (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) {
775		if (amdgpu_set_test_active("Basic Tests",
776					"Command submission Test (GFX)",
777					CU_FALSE))
778			fprintf(stderr, "test deactivation failed - %s\n",
779				CU_get_error_msg());
780
781		if (amdgpu_set_test_active("Basic Tests",
782					"Command submission Test (Multi-Fence)",
783					CU_FALSE))
784			fprintf(stderr, "test deactivation failed - %s\n",
785				CU_get_error_msg());
786
787		if (amdgpu_set_test_active("Basic Tests",
788					"Sync dependency Test",
789					CU_FALSE))
790			fprintf(stderr, "test deactivation failed - %s\n",
791				CU_get_error_msg());
792	}
793
794	return CU_TRUE;
795}
796
797int suite_basic_tests_init(void)
798{
799	struct amdgpu_gpu_info gpu_info = {0};
800	int r;
801
802	r = amdgpu_device_initialize(drm_amdgpu[0], &major_version,
803				   &minor_version, &device_handle);
804
805	if (r) {
806		if ((r == -EACCES) && (errno == EACCES))
807			printf("\n\nError:%s. "
808				"Hint:Try to run this test program as root.",
809				strerror(errno));
810		return CUE_SINIT_FAILED;
811	}
812
813	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
814	if (r)
815		return CUE_SINIT_FAILED;
816
817	family_id = gpu_info.family_id;
818
819	return CUE_SUCCESS;
820}
821
822int suite_basic_tests_clean(void)
823{
824	int r = amdgpu_device_deinitialize(device_handle);
825
826	if (r == 0)
827		return CUE_SUCCESS;
828	else
829		return CUE_SCLEAN_FAILED;
830}
831
832static void amdgpu_query_info_test(void)
833{
834	struct amdgpu_gpu_info gpu_info = {0};
835	uint32_t version, feature;
836	int r;
837
838	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
839	CU_ASSERT_EQUAL(r, 0);
840
841	r = amdgpu_query_firmware_version(device_handle, AMDGPU_INFO_FW_VCE, 0,
842					  0, &version, &feature);
843	CU_ASSERT_EQUAL(r, 0);
844}
845
846static void amdgpu_command_submission_gfx_separate_ibs(void)
847{
848	amdgpu_context_handle context_handle;
849	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
850	void *ib_result_cpu, *ib_result_ce_cpu;
851	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
852	struct amdgpu_cs_request ibs_request = {0};
853	struct amdgpu_cs_ib_info ib_info[2];
854	struct amdgpu_cs_fence fence_status = {0};
855	uint32_t *ptr;
856	uint32_t expired;
857	amdgpu_bo_list_handle bo_list;
858	amdgpu_va_handle va_handle, va_handle_ce;
859	int r, i = 0;
860
861	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
862	CU_ASSERT_EQUAL(r, 0);
863
864	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
865				    AMDGPU_GEM_DOMAIN_GTT, 0,
866				    &ib_result_handle, &ib_result_cpu,
867				    &ib_result_mc_address, &va_handle);
868	CU_ASSERT_EQUAL(r, 0);
869
870	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
871				    AMDGPU_GEM_DOMAIN_GTT, 0,
872				    &ib_result_ce_handle, &ib_result_ce_cpu,
873				    &ib_result_ce_mc_address, &va_handle_ce);
874	CU_ASSERT_EQUAL(r, 0);
875
876	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
877			       ib_result_ce_handle, &bo_list);
878	CU_ASSERT_EQUAL(r, 0);
879
880	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
881
882	/* IT_SET_CE_DE_COUNTERS */
883	ptr = ib_result_ce_cpu;
884	if (family_id != AMDGPU_FAMILY_SI) {
885		ptr[i++] = 0xc0008900;
886		ptr[i++] = 0;
887	}
888	ptr[i++] = 0xc0008400;
889	ptr[i++] = 1;
890	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
891	ib_info[0].size = i;
892	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
893
894	/* IT_WAIT_ON_CE_COUNTER */
895	ptr = ib_result_cpu;
896	ptr[0] = 0xc0008600;
897	ptr[1] = 0x00000001;
898	ib_info[1].ib_mc_address = ib_result_mc_address;
899	ib_info[1].size = 2;
900
901	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
902	ibs_request.number_of_ibs = 2;
903	ibs_request.ibs = ib_info;
904	ibs_request.resources = bo_list;
905	ibs_request.fence_info.handle = NULL;
906
907	r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
908
909	CU_ASSERT_EQUAL(r, 0);
910
911	fence_status.context = context_handle;
912	fence_status.ip_type = AMDGPU_HW_IP_GFX;
913	fence_status.ip_instance = 0;
914	fence_status.fence = ibs_request.seq_no;
915
916	r = amdgpu_cs_query_fence_status(&fence_status,
917					 AMDGPU_TIMEOUT_INFINITE,
918					 0, &expired);
919	CU_ASSERT_EQUAL(r, 0);
920
921	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
922				     ib_result_mc_address, 4096);
923	CU_ASSERT_EQUAL(r, 0);
924
925	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
926				     ib_result_ce_mc_address, 4096);
927	CU_ASSERT_EQUAL(r, 0);
928
929	r = amdgpu_bo_list_destroy(bo_list);
930	CU_ASSERT_EQUAL(r, 0);
931
932	r = amdgpu_cs_ctx_free(context_handle);
933	CU_ASSERT_EQUAL(r, 0);
934
935}
936
937static void amdgpu_command_submission_gfx_shared_ib(void)
938{
939	amdgpu_context_handle context_handle;
940	amdgpu_bo_handle ib_result_handle;
941	void *ib_result_cpu;
942	uint64_t ib_result_mc_address;
943	struct amdgpu_cs_request ibs_request = {0};
944	struct amdgpu_cs_ib_info ib_info[2];
945	struct amdgpu_cs_fence fence_status = {0};
946	uint32_t *ptr;
947	uint32_t expired;
948	amdgpu_bo_list_handle bo_list;
949	amdgpu_va_handle va_handle;
950	int r, i = 0;
951
952	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
953	CU_ASSERT_EQUAL(r, 0);
954
955	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
956				    AMDGPU_GEM_DOMAIN_GTT, 0,
957				    &ib_result_handle, &ib_result_cpu,
958				    &ib_result_mc_address, &va_handle);
959	CU_ASSERT_EQUAL(r, 0);
960
961	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
962			       &bo_list);
963	CU_ASSERT_EQUAL(r, 0);
964
965	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
966
967	/* IT_SET_CE_DE_COUNTERS */
968	ptr = ib_result_cpu;
969	if (family_id != AMDGPU_FAMILY_SI) {
970		ptr[i++] = 0xc0008900;
971		ptr[i++] = 0;
972	}
973	ptr[i++] = 0xc0008400;
974	ptr[i++] = 1;
975	ib_info[0].ib_mc_address = ib_result_mc_address;
976	ib_info[0].size = i;
977	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
978
979	ptr = (uint32_t *)ib_result_cpu + 4;
980	ptr[0] = 0xc0008600;
981	ptr[1] = 0x00000001;
982	ib_info[1].ib_mc_address = ib_result_mc_address + 16;
983	ib_info[1].size = 2;
984
985	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
986	ibs_request.number_of_ibs = 2;
987	ibs_request.ibs = ib_info;
988	ibs_request.resources = bo_list;
989	ibs_request.fence_info.handle = NULL;
990
991	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
992
993	CU_ASSERT_EQUAL(r, 0);
994
995	fence_status.context = context_handle;
996	fence_status.ip_type = AMDGPU_HW_IP_GFX;
997	fence_status.ip_instance = 0;
998	fence_status.fence = ibs_request.seq_no;
999
1000	r = amdgpu_cs_query_fence_status(&fence_status,
1001					 AMDGPU_TIMEOUT_INFINITE,
1002					 0, &expired);
1003	CU_ASSERT_EQUAL(r, 0);
1004
1005	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1006				     ib_result_mc_address, 4096);
1007	CU_ASSERT_EQUAL(r, 0);
1008
1009	r = amdgpu_bo_list_destroy(bo_list);
1010	CU_ASSERT_EQUAL(r, 0);
1011
1012	r = amdgpu_cs_ctx_free(context_handle);
1013	CU_ASSERT_EQUAL(r, 0);
1014}
1015
1016static void amdgpu_command_submission_gfx_cp_write_data(void)
1017{
1018	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_GFX);
1019}
1020
1021static void amdgpu_command_submission_gfx_cp_const_fill(void)
1022{
1023	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_GFX);
1024}
1025
1026static void amdgpu_command_submission_gfx_cp_copy_data(void)
1027{
1028	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_GFX);
1029}
1030
1031static void amdgpu_bo_eviction_test(void)
1032{
1033	const int sdma_write_length = 1024;
1034	const int pm4_dw = 256;
1035	amdgpu_context_handle context_handle;
1036	amdgpu_bo_handle bo1, bo2, vram_max[2], gtt_max[2];
1037	amdgpu_bo_handle *resources;
1038	uint32_t *pm4;
1039	struct amdgpu_cs_ib_info *ib_info;
1040	struct amdgpu_cs_request *ibs_request;
1041	uint64_t bo1_mc, bo2_mc;
1042	volatile unsigned char *bo1_cpu, *bo2_cpu;
1043	int i, j, r, loop1, loop2;
1044	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1045	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1046	struct amdgpu_heap_info vram_info, gtt_info;
1047
1048	pm4 = calloc(pm4_dw, sizeof(*pm4));
1049	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1050
1051	ib_info = calloc(1, sizeof(*ib_info));
1052	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1053
1054	ibs_request = calloc(1, sizeof(*ibs_request));
1055	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1056
1057	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1058	CU_ASSERT_EQUAL(r, 0);
1059
1060	/* prepare resource */
1061	resources = calloc(4, sizeof(amdgpu_bo_handle));
1062	CU_ASSERT_NOT_EQUAL(resources, NULL);
1063
1064	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_VRAM,
1065				   0, &vram_info);
1066	CU_ASSERT_EQUAL(r, 0);
1067
1068	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1069				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[0]);
1070	CU_ASSERT_EQUAL(r, 0);
1071	r = amdgpu_bo_alloc_wrap(device_handle, vram_info.max_allocation, 4096,
1072				 AMDGPU_GEM_DOMAIN_VRAM, 0, &vram_max[1]);
1073	CU_ASSERT_EQUAL(r, 0);
1074
1075	r = amdgpu_query_heap_info(device_handle, AMDGPU_GEM_DOMAIN_GTT,
1076				   0, &gtt_info);
1077	CU_ASSERT_EQUAL(r, 0);
1078
1079	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1080				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[0]);
1081	CU_ASSERT_EQUAL(r, 0);
1082	r = amdgpu_bo_alloc_wrap(device_handle, gtt_info.max_allocation, 4096,
1083				 AMDGPU_GEM_DOMAIN_GTT, 0, &gtt_max[1]);
1084	CU_ASSERT_EQUAL(r, 0);
1085
1086
1087
1088	loop1 = loop2 = 0;
1089	/* run 9 circle to test all mapping combination */
1090	while(loop1 < 2) {
1091		while(loop2 < 2) {
1092			/* allocate UC bo1for sDMA use */
1093			r = amdgpu_bo_alloc_and_map(device_handle,
1094						    sdma_write_length, 4096,
1095						    AMDGPU_GEM_DOMAIN_GTT,
1096						    gtt_flags[loop1], &bo1,
1097						    (void**)&bo1_cpu, &bo1_mc,
1098						    &bo1_va_handle);
1099			CU_ASSERT_EQUAL(r, 0);
1100
1101			/* set bo1 */
1102			memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1103
1104			/* allocate UC bo2 for sDMA use */
1105			r = amdgpu_bo_alloc_and_map(device_handle,
1106						    sdma_write_length, 4096,
1107						    AMDGPU_GEM_DOMAIN_GTT,
1108						    gtt_flags[loop2], &bo2,
1109						    (void**)&bo2_cpu, &bo2_mc,
1110						    &bo2_va_handle);
1111			CU_ASSERT_EQUAL(r, 0);
1112
1113			/* clear bo2 */
1114			memset((void*)bo2_cpu, 0, sdma_write_length);
1115
1116			resources[0] = bo1;
1117			resources[1] = bo2;
1118			resources[2] = vram_max[loop2];
1119			resources[3] = gtt_max[loop2];
1120
1121			/* fulfill PM4: test DMA copy linear */
1122			i = j = 0;
1123			if (family_id == AMDGPU_FAMILY_SI) {
1124				pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI, 0, 0, 0,
1125							  sdma_write_length);
1126				pm4[i++] = 0xffffffff & bo2_mc;
1127				pm4[i++] = 0xffffffff & bo1_mc;
1128				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1129				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1130			} else {
1131				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0);
1132				if (family_id >= AMDGPU_FAMILY_AI)
1133					pm4[i++] = sdma_write_length - 1;
1134				else
1135					pm4[i++] = sdma_write_length;
1136				pm4[i++] = 0;
1137				pm4[i++] = 0xffffffff & bo1_mc;
1138				pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1139				pm4[i++] = 0xffffffff & bo2_mc;
1140				pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1141			}
1142
1143			amdgpu_test_exec_cs_helper(context_handle,
1144						   AMDGPU_HW_IP_DMA, 0,
1145						   i, pm4,
1146						   4, resources,
1147						   ib_info, ibs_request);
1148
1149			/* verify if SDMA test result meets with expected */
1150			i = 0;
1151			while(i < sdma_write_length) {
1152				CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
1153			}
1154			r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
1155						     sdma_write_length);
1156			CU_ASSERT_EQUAL(r, 0);
1157			r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
1158						     sdma_write_length);
1159			CU_ASSERT_EQUAL(r, 0);
1160			loop2++;
1161		}
1162		loop2 = 0;
1163		loop1++;
1164	}
1165	amdgpu_bo_free(vram_max[0]);
1166	amdgpu_bo_free(vram_max[1]);
1167	amdgpu_bo_free(gtt_max[0]);
1168	amdgpu_bo_free(gtt_max[1]);
1169	/* clean resources */
1170	free(resources);
1171	free(ibs_request);
1172	free(ib_info);
1173	free(pm4);
1174
1175	/* end of test */
1176	r = amdgpu_cs_ctx_free(context_handle);
1177	CU_ASSERT_EQUAL(r, 0);
1178}
1179
1180
1181static void amdgpu_command_submission_gfx(void)
1182{
1183	/* write data using the CP */
1184	amdgpu_command_submission_gfx_cp_write_data();
1185	/* const fill using the CP */
1186	amdgpu_command_submission_gfx_cp_const_fill();
1187	/* copy data using the CP */
1188	amdgpu_command_submission_gfx_cp_copy_data();
1189	/* separate IB buffers for multi-IB submission */
1190	amdgpu_command_submission_gfx_separate_ibs();
1191	/* shared IB buffer for multi-IB submission */
1192	amdgpu_command_submission_gfx_shared_ib();
1193}
1194
1195static void amdgpu_semaphore_test(void)
1196{
1197	amdgpu_context_handle context_handle[2];
1198	amdgpu_semaphore_handle sem;
1199	amdgpu_bo_handle ib_result_handle[2];
1200	void *ib_result_cpu[2];
1201	uint64_t ib_result_mc_address[2];
1202	struct amdgpu_cs_request ibs_request[2] = {0};
1203	struct amdgpu_cs_ib_info ib_info[2] = {0};
1204	struct amdgpu_cs_fence fence_status = {0};
1205	uint32_t *ptr;
1206	uint32_t expired;
1207	uint32_t sdma_nop, gfx_nop;
1208	amdgpu_bo_list_handle bo_list[2];
1209	amdgpu_va_handle va_handle[2];
1210	int r, i;
1211	struct amdgpu_gpu_info gpu_info = {0};
1212	unsigned gc_ip_type;
1213
1214	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
1215	CU_ASSERT_EQUAL(r, 0);
1216
1217	gc_ip_type = (asic_is_gfx_pipe_removed(family_id, chip_id, chip_rev)) ?
1218			AMDGPU_HW_IP_COMPUTE : AMDGPU_HW_IP_GFX;
1219
1220	if (family_id == AMDGPU_FAMILY_SI) {
1221		sdma_nop = SDMA_PACKET_SI(SDMA_NOP_SI, 0, 0, 0, 0);
1222		gfx_nop = GFX_COMPUTE_NOP_SI;
1223	} else {
1224		sdma_nop = SDMA_PKT_HEADER_OP(SDMA_NOP);
1225		gfx_nop = GFX_COMPUTE_NOP;
1226	}
1227
1228	r = amdgpu_cs_create_semaphore(&sem);
1229	CU_ASSERT_EQUAL(r, 0);
1230	for (i = 0; i < 2; i++) {
1231		r = amdgpu_cs_ctx_create(device_handle, &context_handle[i]);
1232		CU_ASSERT_EQUAL(r, 0);
1233
1234		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1235					    AMDGPU_GEM_DOMAIN_GTT, 0,
1236					    &ib_result_handle[i], &ib_result_cpu[i],
1237					    &ib_result_mc_address[i], &va_handle[i]);
1238		CU_ASSERT_EQUAL(r, 0);
1239
1240		r = amdgpu_get_bo_list(device_handle, ib_result_handle[i],
1241				       NULL, &bo_list[i]);
1242		CU_ASSERT_EQUAL(r, 0);
1243	}
1244
1245	/* 1. same context different engine */
1246	ptr = ib_result_cpu[0];
1247	ptr[0] = sdma_nop;
1248	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1249	ib_info[0].size = 1;
1250
1251	ibs_request[0].ip_type = AMDGPU_HW_IP_DMA;
1252	ibs_request[0].number_of_ibs = 1;
1253	ibs_request[0].ibs = &ib_info[0];
1254	ibs_request[0].resources = bo_list[0];
1255	ibs_request[0].fence_info.handle = NULL;
1256	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1257	CU_ASSERT_EQUAL(r, 0);
1258	r = amdgpu_cs_signal_semaphore(context_handle[0], AMDGPU_HW_IP_DMA, 0, 0, sem);
1259	CU_ASSERT_EQUAL(r, 0);
1260
1261	r = amdgpu_cs_wait_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1262	CU_ASSERT_EQUAL(r, 0);
1263	ptr = ib_result_cpu[1];
1264	ptr[0] = gfx_nop;
1265	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1266	ib_info[1].size = 1;
1267
1268	ibs_request[1].ip_type = gc_ip_type;
1269	ibs_request[1].number_of_ibs = 1;
1270	ibs_request[1].ibs = &ib_info[1];
1271	ibs_request[1].resources = bo_list[1];
1272	ibs_request[1].fence_info.handle = NULL;
1273
1274	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[1], 1);
1275	CU_ASSERT_EQUAL(r, 0);
1276
1277	fence_status.context = context_handle[0];
1278	fence_status.ip_type = gc_ip_type;
1279	fence_status.ip_instance = 0;
1280	fence_status.fence = ibs_request[1].seq_no;
1281	r = amdgpu_cs_query_fence_status(&fence_status,
1282					 500000000, 0, &expired);
1283	CU_ASSERT_EQUAL(r, 0);
1284	CU_ASSERT_EQUAL(expired, true);
1285
1286	/* 2. same engine different context */
1287	ptr = ib_result_cpu[0];
1288	ptr[0] = gfx_nop;
1289	ib_info[0].ib_mc_address = ib_result_mc_address[0];
1290	ib_info[0].size = 1;
1291
1292	ibs_request[0].ip_type = gc_ip_type;
1293	ibs_request[0].number_of_ibs = 1;
1294	ibs_request[0].ibs = &ib_info[0];
1295	ibs_request[0].resources = bo_list[0];
1296	ibs_request[0].fence_info.handle = NULL;
1297	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request[0], 1);
1298	CU_ASSERT_EQUAL(r, 0);
1299	r = amdgpu_cs_signal_semaphore(context_handle[0], gc_ip_type, 0, 0, sem);
1300	CU_ASSERT_EQUAL(r, 0);
1301
1302	r = amdgpu_cs_wait_semaphore(context_handle[1], gc_ip_type, 0, 0, sem);
1303	CU_ASSERT_EQUAL(r, 0);
1304	ptr = ib_result_cpu[1];
1305	ptr[0] = gfx_nop;
1306	ib_info[1].ib_mc_address = ib_result_mc_address[1];
1307	ib_info[1].size = 1;
1308
1309	ibs_request[1].ip_type = gc_ip_type;
1310	ibs_request[1].number_of_ibs = 1;
1311	ibs_request[1].ibs = &ib_info[1];
1312	ibs_request[1].resources = bo_list[1];
1313	ibs_request[1].fence_info.handle = NULL;
1314	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request[1], 1);
1315
1316	CU_ASSERT_EQUAL(r, 0);
1317
1318	fence_status.context = context_handle[1];
1319	fence_status.ip_type = gc_ip_type;
1320	fence_status.ip_instance = 0;
1321	fence_status.fence = ibs_request[1].seq_no;
1322	r = amdgpu_cs_query_fence_status(&fence_status,
1323					 500000000, 0, &expired);
1324	CU_ASSERT_EQUAL(r, 0);
1325	CU_ASSERT_EQUAL(expired, true);
1326
1327	for (i = 0; i < 2; i++) {
1328		r = amdgpu_bo_unmap_and_free(ib_result_handle[i], va_handle[i],
1329					     ib_result_mc_address[i], 4096);
1330		CU_ASSERT_EQUAL(r, 0);
1331
1332		r = amdgpu_bo_list_destroy(bo_list[i]);
1333		CU_ASSERT_EQUAL(r, 0);
1334
1335		r = amdgpu_cs_ctx_free(context_handle[i]);
1336		CU_ASSERT_EQUAL(r, 0);
1337	}
1338
1339	r = amdgpu_cs_destroy_semaphore(sem);
1340	CU_ASSERT_EQUAL(r, 0);
1341}
1342
1343static void amdgpu_command_submission_compute_nop(void)
1344{
1345	amdgpu_context_handle context_handle;
1346	amdgpu_bo_handle ib_result_handle;
1347	void *ib_result_cpu;
1348	uint64_t ib_result_mc_address;
1349	struct amdgpu_cs_request ibs_request;
1350	struct amdgpu_cs_ib_info ib_info;
1351	struct amdgpu_cs_fence fence_status;
1352	uint32_t *ptr;
1353	uint32_t expired;
1354	int r, instance;
1355	amdgpu_bo_list_handle bo_list;
1356	amdgpu_va_handle va_handle;
1357	struct drm_amdgpu_info_hw_ip info;
1358
1359	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
1360	CU_ASSERT_EQUAL(r, 0);
1361
1362	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1363	CU_ASSERT_EQUAL(r, 0);
1364
1365	for (instance = 0; (1 << instance) & info.available_rings; instance++) {
1366		r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1367					    AMDGPU_GEM_DOMAIN_GTT, 0,
1368					    &ib_result_handle, &ib_result_cpu,
1369					    &ib_result_mc_address, &va_handle);
1370		CU_ASSERT_EQUAL(r, 0);
1371
1372		r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
1373				       &bo_list);
1374		CU_ASSERT_EQUAL(r, 0);
1375
1376		ptr = ib_result_cpu;
1377		memset(ptr, 0, 16);
1378		ptr[0]=PACKET3(PACKET3_NOP, 14);
1379
1380		memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
1381		ib_info.ib_mc_address = ib_result_mc_address;
1382		ib_info.size = 16;
1383
1384		memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
1385		ibs_request.ip_type = AMDGPU_HW_IP_COMPUTE;
1386		ibs_request.ring = instance;
1387		ibs_request.number_of_ibs = 1;
1388		ibs_request.ibs = &ib_info;
1389		ibs_request.resources = bo_list;
1390		ibs_request.fence_info.handle = NULL;
1391
1392		memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
1393		r = amdgpu_cs_submit(context_handle, 0,&ibs_request, 1);
1394		CU_ASSERT_EQUAL(r, 0);
1395
1396		fence_status.context = context_handle;
1397		fence_status.ip_type = AMDGPU_HW_IP_COMPUTE;
1398		fence_status.ip_instance = 0;
1399		fence_status.ring = instance;
1400		fence_status.fence = ibs_request.seq_no;
1401
1402		r = amdgpu_cs_query_fence_status(&fence_status,
1403						 AMDGPU_TIMEOUT_INFINITE,
1404						 0, &expired);
1405		CU_ASSERT_EQUAL(r, 0);
1406
1407		r = amdgpu_bo_list_destroy(bo_list);
1408		CU_ASSERT_EQUAL(r, 0);
1409
1410		r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1411					     ib_result_mc_address, 4096);
1412		CU_ASSERT_EQUAL(r, 0);
1413	}
1414
1415	r = amdgpu_cs_ctx_free(context_handle);
1416	CU_ASSERT_EQUAL(r, 0);
1417}
1418
1419static void amdgpu_command_submission_compute_cp_write_data(void)
1420{
1421	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_COMPUTE);
1422}
1423
1424static void amdgpu_command_submission_compute_cp_const_fill(void)
1425{
1426	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_COMPUTE);
1427}
1428
1429static void amdgpu_command_submission_compute_cp_copy_data(void)
1430{
1431	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_COMPUTE);
1432}
1433
1434static void amdgpu_command_submission_compute(void)
1435{
1436	/* write data using the CP */
1437	amdgpu_command_submission_compute_cp_write_data();
1438	/* const fill using the CP */
1439	amdgpu_command_submission_compute_cp_const_fill();
1440	/* copy data using the CP */
1441	amdgpu_command_submission_compute_cp_copy_data();
1442	/* nop test */
1443	amdgpu_command_submission_compute_nop();
1444}
1445
1446/*
1447 * caller need create/release:
1448 * pm4_src, resources, ib_info, and ibs_request
1449 * submit command stream described in ibs_request and wait for this IB accomplished
1450 */
1451void
1452amdgpu_test_exec_cs_helper_raw(amdgpu_device_handle device_handle,
1453			       amdgpu_context_handle context_handle,
1454			       unsigned ip_type, int instance, int pm4_dw,
1455			       uint32_t *pm4_src, int res_cnt,
1456			       amdgpu_bo_handle *resources,
1457			       struct amdgpu_cs_ib_info *ib_info,
1458			       struct amdgpu_cs_request *ibs_request,
1459			       bool secure)
1460{
1461	int r;
1462	uint32_t expired;
1463	uint32_t *ring_ptr;
1464	amdgpu_bo_handle ib_result_handle;
1465	void *ib_result_cpu;
1466	uint64_t ib_result_mc_address;
1467	struct amdgpu_cs_fence fence_status = {0};
1468	amdgpu_bo_handle *all_res = alloca(sizeof(resources[0]) * (res_cnt + 1));
1469	amdgpu_va_handle va_handle;
1470
1471	/* prepare CS */
1472	CU_ASSERT_NOT_EQUAL(pm4_src, NULL);
1473	CU_ASSERT_NOT_EQUAL(resources, NULL);
1474	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1475	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1476	CU_ASSERT_TRUE(pm4_dw <= 1024);
1477
1478	/* allocate IB */
1479	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
1480				    AMDGPU_GEM_DOMAIN_GTT, 0,
1481				    &ib_result_handle, &ib_result_cpu,
1482				    &ib_result_mc_address, &va_handle);
1483	CU_ASSERT_EQUAL(r, 0);
1484
1485	/* copy PM4 packet to ring from caller */
1486	ring_ptr = ib_result_cpu;
1487	memcpy(ring_ptr, pm4_src, pm4_dw * sizeof(*pm4_src));
1488
1489	ib_info->ib_mc_address = ib_result_mc_address;
1490	ib_info->size = pm4_dw;
1491	if (secure)
1492		ib_info->flags |= AMDGPU_IB_FLAGS_SECURE;
1493
1494	ibs_request->ip_type = ip_type;
1495	ibs_request->ring = instance;
1496	ibs_request->number_of_ibs = 1;
1497	ibs_request->ibs = ib_info;
1498	ibs_request->fence_info.handle = NULL;
1499
1500	memcpy(all_res, resources, sizeof(resources[0]) * res_cnt);
1501	all_res[res_cnt] = ib_result_handle;
1502
1503	r = amdgpu_bo_list_create(device_handle, res_cnt+1, all_res,
1504				  NULL, &ibs_request->resources);
1505	CU_ASSERT_EQUAL(r, 0);
1506
1507	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1508
1509	/* submit CS */
1510	r = amdgpu_cs_submit(context_handle, 0, ibs_request, 1);
1511	CU_ASSERT_EQUAL(r, 0);
1512
1513	r = amdgpu_bo_list_destroy(ibs_request->resources);
1514	CU_ASSERT_EQUAL(r, 0);
1515
1516	fence_status.ip_type = ip_type;
1517	fence_status.ip_instance = 0;
1518	fence_status.ring = ibs_request->ring;
1519	fence_status.context = context_handle;
1520	fence_status.fence = ibs_request->seq_no;
1521
1522	/* wait for IB accomplished */
1523	r = amdgpu_cs_query_fence_status(&fence_status,
1524					 AMDGPU_TIMEOUT_INFINITE,
1525					 0, &expired);
1526	CU_ASSERT_EQUAL(r, 0);
1527	CU_ASSERT_EQUAL(expired, true);
1528
1529	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
1530				     ib_result_mc_address, 4096);
1531	CU_ASSERT_EQUAL(r, 0);
1532}
1533
1534static void
1535amdgpu_test_exec_cs_helper(amdgpu_context_handle context_handle,
1536			   unsigned ip_type, int instance, int pm4_dw,
1537			   uint32_t *pm4_src, int res_cnt,
1538			   amdgpu_bo_handle *resources,
1539			   struct amdgpu_cs_ib_info *ib_info,
1540			   struct amdgpu_cs_request *ibs_request)
1541{
1542	amdgpu_test_exec_cs_helper_raw(device_handle, context_handle,
1543				       ip_type, instance, pm4_dw, pm4_src,
1544				       res_cnt, resources, ib_info,
1545				       ibs_request, false);
1546}
1547
1548void
1549amdgpu_command_submission_write_linear_helper_with_secure(amdgpu_device_handle
1550							  device, unsigned
1551							  ip_type, bool secure)
1552{
1553	const int sdma_write_length = 128;
1554	const int pm4_dw = 256;
1555	amdgpu_context_handle context_handle;
1556	amdgpu_bo_handle bo;
1557	amdgpu_bo_handle *resources;
1558	uint32_t *pm4;
1559	struct amdgpu_cs_ib_info *ib_info;
1560	struct amdgpu_cs_request *ibs_request;
1561	uint64_t bo_mc;
1562	volatile uint32_t *bo_cpu;
1563	uint32_t bo_cpu_origin;
1564	int i, j, r, loop, ring_id;
1565	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1566	amdgpu_va_handle va_handle;
1567	struct drm_amdgpu_info_hw_ip hw_ip_info;
1568
1569	pm4 = calloc(pm4_dw, sizeof(*pm4));
1570	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1571
1572	ib_info = calloc(1, sizeof(*ib_info));
1573	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1574
1575	ibs_request = calloc(1, sizeof(*ibs_request));
1576	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1577
1578	r = amdgpu_query_hw_ip_info(device, ip_type, 0, &hw_ip_info);
1579	CU_ASSERT_EQUAL(r, 0);
1580
1581	for (i = 0; secure && (i < 2); i++)
1582		gtt_flags[i] |= AMDGPU_GEM_CREATE_ENCRYPTED;
1583
1584	r = amdgpu_cs_ctx_create(device, &context_handle);
1585
1586	CU_ASSERT_EQUAL(r, 0);
1587
1588	/* prepare resource */
1589	resources = calloc(1, sizeof(amdgpu_bo_handle));
1590	CU_ASSERT_NOT_EQUAL(resources, NULL);
1591
1592	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1593		loop = 0;
1594		while(loop < 2) {
1595			/* allocate UC bo for sDMA use */
1596			r = amdgpu_bo_alloc_and_map(device,
1597						    sdma_write_length * sizeof(uint32_t),
1598						    4096, AMDGPU_GEM_DOMAIN_GTT,
1599						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1600						    &bo_mc, &va_handle);
1601			CU_ASSERT_EQUAL(r, 0);
1602
1603			/* clear bo */
1604			memset((void*)bo_cpu, 0, sdma_write_length * sizeof(uint32_t));
1605
1606			resources[0] = bo;
1607
1608			/* fulfill PM4: test DMA write-linear */
1609			i = j = 0;
1610			if (ip_type == AMDGPU_HW_IP_DMA) {
1611				if (family_id == AMDGPU_FAMILY_SI)
1612					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
1613								  sdma_write_length);
1614				else
1615					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
1616							       SDMA_WRITE_SUB_OPCODE_LINEAR,
1617							       secure ? SDMA_ATOMIC_TMZ(1) : 0);
1618				pm4[i++] = 0xfffffffc & bo_mc;
1619				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1620				if (family_id >= AMDGPU_FAMILY_AI)
1621					pm4[i++] = sdma_write_length - 1;
1622				else if (family_id != AMDGPU_FAMILY_SI)
1623					pm4[i++] = sdma_write_length;
1624				while(j++ < sdma_write_length)
1625					pm4[i++] = 0xdeadbeaf;
1626			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1627				    (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1628				pm4[i++] = PACKET3(PACKET3_WRITE_DATA, 2 + sdma_write_length);
1629				pm4[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
1630				pm4[i++] = 0xfffffffc & bo_mc;
1631				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1632				while(j++ < sdma_write_length)
1633					pm4[i++] = 0xdeadbeaf;
1634			}
1635
1636			amdgpu_test_exec_cs_helper_raw(device, context_handle,
1637						       ip_type, ring_id, i, pm4,
1638						       1, resources, ib_info,
1639						       ibs_request, secure);
1640
1641			/* verify if SDMA test result meets with expected */
1642			i = 0;
1643			if (!secure) {
1644				while(i < sdma_write_length) {
1645					CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1646				}
1647			} else if (ip_type == AMDGPU_HW_IP_GFX) {
1648				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1649				pm4[i++] = PACKET3(PACKET3_ATOMIC_MEM, 7);
1650				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1651				 * command, 1-loop_until_compare_satisfied.
1652				 * single_pass_atomic, 0-lru
1653				 * engine_sel, 0-micro_engine
1654				 */
1655				pm4[i++] = (TC_OP_ATOMIC_CMPSWAP_RTN_32 |
1656							ATOMIC_MEM_COMMAND(1) |
1657							ATOMIC_MEM_CACHEPOLICAY(0) |
1658							ATOMIC_MEM_ENGINESEL(0));
1659				pm4[i++] = 0xfffffffc & bo_mc;
1660				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1661				pm4[i++] = 0x12345678;
1662				pm4[i++] = 0x0;
1663				pm4[i++] = 0xdeadbeaf;
1664				pm4[i++] = 0x0;
1665				pm4[i++] = 0x100;
1666				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1667							ip_type, ring_id, i, pm4,
1668							1, resources, ib_info,
1669							ibs_request, true);
1670			} else if (ip_type == AMDGPU_HW_IP_DMA) {
1671				/* restore the bo_cpu to compare */
1672				bo_cpu_origin = bo_cpu[0];
1673				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1674				/* atomic opcode for 32b w/ RTN and ATOMIC_SWAPCMP_RTN
1675				 * loop, 1-loop_until_compare_satisfied.
1676				 * single_pass_atomic, 0-lru
1677				 */
1678				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1679							       0,
1680							       SDMA_ATOMIC_LOOP(1) |
1681							       SDMA_ATOMIC_TMZ(1) |
1682							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1683				pm4[i++] = 0xfffffffc & bo_mc;
1684				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1685				pm4[i++] = 0x12345678;
1686				pm4[i++] = 0x0;
1687				pm4[i++] = 0xdeadbeaf;
1688				pm4[i++] = 0x0;
1689				pm4[i++] = 0x100;
1690				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1691							ip_type, ring_id, i, pm4,
1692							1, resources, ib_info,
1693							ibs_request, true);
1694				/* DMA's atomic behavir is unlike GFX
1695				 * If the comparing data is not equal to destination data,
1696				 * For GFX, loop again till gfx timeout(system hang).
1697				 * For DMA, loop again till timer expired and then send interrupt.
1698				 * So testcase can't use interrupt mechanism.
1699				 * We take another way to verify. When the comparing data is not
1700				 * equal to destination data, overwrite the source data to the destination
1701				 * buffer. Otherwise, original destination data unchanged.
1702				 * So if the bo_cpu data is overwritten, the result is passed.
1703				 */
1704				CU_ASSERT_NOT_EQUAL(bo_cpu[0], bo_cpu_origin);
1705
1706				/* compare again for the case of dest_data != cmp_data */
1707				i = 0;
1708				/* restore again, here dest_data should be */
1709				bo_cpu_origin = bo_cpu[0];
1710				memset((void*)pm4, 0, pm4_dw * sizeof(uint32_t));
1711				pm4[i++] = SDMA_PACKET(SDMA_OPCODE_ATOMIC,
1712							       0,
1713							       SDMA_ATOMIC_LOOP(1) |
1714							       SDMA_ATOMIC_TMZ(1) |
1715							       SDMA_ATOMIC_OPCODE(TC_OP_ATOMIC_CMPSWAP_RTN_32));
1716				pm4[i++] = 0xfffffffc & bo_mc;
1717				pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1718				pm4[i++] = 0x87654321;
1719				pm4[i++] = 0x0;
1720				pm4[i++] = 0xdeadbeaf;
1721				pm4[i++] = 0x0;
1722				pm4[i++] = 0x100;
1723				amdgpu_test_exec_cs_helper_raw(device, context_handle,
1724							ip_type, ring_id, i, pm4,
1725							1, resources, ib_info,
1726							ibs_request, true);
1727				/* here bo_cpu[0] should be unchanged, still is 0x12345678, otherwise failed*/
1728				CU_ASSERT_EQUAL(bo_cpu[0], bo_cpu_origin);
1729			}
1730
1731			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1732						     sdma_write_length * sizeof(uint32_t));
1733			CU_ASSERT_EQUAL(r, 0);
1734			loop++;
1735		}
1736	}
1737	/* clean resources */
1738	free(resources);
1739	free(ibs_request);
1740	free(ib_info);
1741	free(pm4);
1742
1743	/* end of test */
1744	r = amdgpu_cs_ctx_free(context_handle);
1745	CU_ASSERT_EQUAL(r, 0);
1746}
1747
1748static void amdgpu_command_submission_write_linear_helper(unsigned ip_type)
1749{
1750	amdgpu_command_submission_write_linear_helper_with_secure(device_handle,
1751								  ip_type,
1752								  false);
1753}
1754
1755static void amdgpu_command_submission_sdma_write_linear(void)
1756{
1757	amdgpu_command_submission_write_linear_helper(AMDGPU_HW_IP_DMA);
1758}
1759
1760static void amdgpu_command_submission_const_fill_helper(unsigned ip_type)
1761{
1762	const int sdma_write_length = 1024 * 1024;
1763	const int pm4_dw = 256;
1764	amdgpu_context_handle context_handle;
1765	amdgpu_bo_handle bo;
1766	amdgpu_bo_handle *resources;
1767	uint32_t *pm4;
1768	struct amdgpu_cs_ib_info *ib_info;
1769	struct amdgpu_cs_request *ibs_request;
1770	uint64_t bo_mc;
1771	volatile uint32_t *bo_cpu;
1772	int i, j, r, loop, ring_id;
1773	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1774	amdgpu_va_handle va_handle;
1775	struct drm_amdgpu_info_hw_ip hw_ip_info;
1776
1777	pm4 = calloc(pm4_dw, sizeof(*pm4));
1778	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1779
1780	ib_info = calloc(1, sizeof(*ib_info));
1781	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1782
1783	ibs_request = calloc(1, sizeof(*ibs_request));
1784	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1785
1786	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1787	CU_ASSERT_EQUAL(r, 0);
1788
1789	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1790	CU_ASSERT_EQUAL(r, 0);
1791
1792	/* prepare resource */
1793	resources = calloc(1, sizeof(amdgpu_bo_handle));
1794	CU_ASSERT_NOT_EQUAL(resources, NULL);
1795
1796	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1797		loop = 0;
1798		while(loop < 2) {
1799			/* allocate UC bo for sDMA use */
1800			r = amdgpu_bo_alloc_and_map(device_handle,
1801						    sdma_write_length, 4096,
1802						    AMDGPU_GEM_DOMAIN_GTT,
1803						    gtt_flags[loop], &bo, (void**)&bo_cpu,
1804						    &bo_mc, &va_handle);
1805			CU_ASSERT_EQUAL(r, 0);
1806
1807			/* clear bo */
1808			memset((void*)bo_cpu, 0, sdma_write_length);
1809
1810			resources[0] = bo;
1811
1812			/* fulfill PM4: test DMA const fill */
1813			i = j = 0;
1814			if (ip_type == AMDGPU_HW_IP_DMA) {
1815				if (family_id == AMDGPU_FAMILY_SI) {
1816					pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_CONSTANT_FILL_SI,
1817								  0, 0, 0,
1818								  sdma_write_length / 4);
1819					pm4[i++] = 0xfffffffc & bo_mc;
1820					pm4[i++] = 0xdeadbeaf;
1821					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 16;
1822				} else {
1823					pm4[i++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0,
1824							       SDMA_CONSTANT_FILL_EXTRA_SIZE(2));
1825					pm4[i++] = 0xffffffff & bo_mc;
1826					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1827					pm4[i++] = 0xdeadbeaf;
1828					if (family_id >= AMDGPU_FAMILY_AI)
1829						pm4[i++] = sdma_write_length - 1;
1830					else
1831						pm4[i++] = sdma_write_length;
1832				}
1833			} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1834				   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1835				if (family_id == AMDGPU_FAMILY_SI) {
1836					pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1837					pm4[i++] = 0xdeadbeaf;
1838					pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1839						   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1840						   PACKET3_DMA_DATA_SI_SRC_SEL(2) |
1841						   PACKET3_DMA_DATA_SI_CP_SYNC;
1842					pm4[i++] = 0xffffffff & bo_mc;
1843					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1844					pm4[i++] = sdma_write_length;
1845				} else {
1846					pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
1847					pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
1848						   PACKET3_DMA_DATA_DST_SEL(0) |
1849						   PACKET3_DMA_DATA_SRC_SEL(2) |
1850						   PACKET3_DMA_DATA_CP_SYNC;
1851					pm4[i++] = 0xdeadbeaf;
1852					pm4[i++] = 0;
1853					pm4[i++] = 0xfffffffc & bo_mc;
1854					pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
1855					pm4[i++] = sdma_write_length;
1856				}
1857			}
1858
1859			amdgpu_test_exec_cs_helper(context_handle,
1860						   ip_type, ring_id,
1861						   i, pm4,
1862						   1, resources,
1863						   ib_info, ibs_request);
1864
1865			/* verify if SDMA test result meets with expected */
1866			i = 0;
1867			while(i < (sdma_write_length / 4)) {
1868				CU_ASSERT_EQUAL(bo_cpu[i++], 0xdeadbeaf);
1869			}
1870
1871			r = amdgpu_bo_unmap_and_free(bo, va_handle, bo_mc,
1872						     sdma_write_length);
1873			CU_ASSERT_EQUAL(r, 0);
1874			loop++;
1875		}
1876	}
1877	/* clean resources */
1878	free(resources);
1879	free(ibs_request);
1880	free(ib_info);
1881	free(pm4);
1882
1883	/* end of test */
1884	r = amdgpu_cs_ctx_free(context_handle);
1885	CU_ASSERT_EQUAL(r, 0);
1886}
1887
1888static void amdgpu_command_submission_sdma_const_fill(void)
1889{
1890	amdgpu_command_submission_const_fill_helper(AMDGPU_HW_IP_DMA);
1891}
1892
1893static void amdgpu_command_submission_copy_linear_helper(unsigned ip_type)
1894{
1895	const int sdma_write_length = 1024;
1896	const int pm4_dw = 256;
1897	amdgpu_context_handle context_handle;
1898	amdgpu_bo_handle bo1, bo2;
1899	amdgpu_bo_handle *resources;
1900	uint32_t *pm4;
1901	struct amdgpu_cs_ib_info *ib_info;
1902	struct amdgpu_cs_request *ibs_request;
1903	uint64_t bo1_mc, bo2_mc;
1904	volatile unsigned char *bo1_cpu, *bo2_cpu;
1905	int i, j, r, loop1, loop2, ring_id;
1906	uint64_t gtt_flags[2] = {0, AMDGPU_GEM_CREATE_CPU_GTT_USWC};
1907	amdgpu_va_handle bo1_va_handle, bo2_va_handle;
1908	struct drm_amdgpu_info_hw_ip hw_ip_info;
1909
1910	pm4 = calloc(pm4_dw, sizeof(*pm4));
1911	CU_ASSERT_NOT_EQUAL(pm4, NULL);
1912
1913	ib_info = calloc(1, sizeof(*ib_info));
1914	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
1915
1916	ibs_request = calloc(1, sizeof(*ibs_request));
1917	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
1918
1919	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &hw_ip_info);
1920	CU_ASSERT_EQUAL(r, 0);
1921
1922	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
1923	CU_ASSERT_EQUAL(r, 0);
1924
1925	/* prepare resource */
1926	resources = calloc(2, sizeof(amdgpu_bo_handle));
1927	CU_ASSERT_NOT_EQUAL(resources, NULL);
1928
1929	for (ring_id = 0; (1 << ring_id) & hw_ip_info.available_rings; ring_id++) {
1930		loop1 = loop2 = 0;
1931		/* run 9 circle to test all mapping combination */
1932		while(loop1 < 2) {
1933			while(loop2 < 2) {
1934				/* allocate UC bo1for sDMA use */
1935				r = amdgpu_bo_alloc_and_map(device_handle,
1936							    sdma_write_length, 4096,
1937							    AMDGPU_GEM_DOMAIN_GTT,
1938							    gtt_flags[loop1], &bo1,
1939							    (void**)&bo1_cpu, &bo1_mc,
1940							    &bo1_va_handle);
1941				CU_ASSERT_EQUAL(r, 0);
1942
1943				/* set bo1 */
1944				memset((void*)bo1_cpu, 0xaa, sdma_write_length);
1945
1946				/* allocate UC bo2 for sDMA use */
1947				r = amdgpu_bo_alloc_and_map(device_handle,
1948							    sdma_write_length, 4096,
1949							    AMDGPU_GEM_DOMAIN_GTT,
1950							    gtt_flags[loop2], &bo2,
1951							    (void**)&bo2_cpu, &bo2_mc,
1952							    &bo2_va_handle);
1953				CU_ASSERT_EQUAL(r, 0);
1954
1955				/* clear bo2 */
1956				memset((void*)bo2_cpu, 0, sdma_write_length);
1957
1958				resources[0] = bo1;
1959				resources[1] = bo2;
1960
1961				/* fulfill PM4: test DMA copy linear */
1962				i = j = 0;
1963				if (ip_type == AMDGPU_HW_IP_DMA) {
1964					if (family_id == AMDGPU_FAMILY_SI) {
1965						pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_COPY_SI,
1966									  0, 0, 0,
1967									  sdma_write_length);
1968						pm4[i++] = 0xffffffff & bo2_mc;
1969						pm4[i++] = 0xffffffff & bo1_mc;
1970						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1971						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1972					} else {
1973						pm4[i++] = SDMA_PACKET(SDMA_OPCODE_COPY,
1974								       SDMA_COPY_SUB_OPCODE_LINEAR,
1975								       0);
1976						if (family_id >= AMDGPU_FAMILY_AI)
1977							pm4[i++] = sdma_write_length - 1;
1978						else
1979							pm4[i++] = sdma_write_length;
1980						pm4[i++] = 0;
1981						pm4[i++] = 0xffffffff & bo1_mc;
1982						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
1983						pm4[i++] = 0xffffffff & bo2_mc;
1984						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1985					}
1986				} else if ((ip_type == AMDGPU_HW_IP_GFX) ||
1987					   (ip_type == AMDGPU_HW_IP_COMPUTE)) {
1988					if (family_id == AMDGPU_FAMILY_SI) {
1989						pm4[i++] = PACKET3(PACKET3_DMA_DATA_SI, 4);
1990						pm4[i++] = 0xfffffffc & bo1_mc;
1991						pm4[i++] = PACKET3_DMA_DATA_SI_ENGINE(0) |
1992							   PACKET3_DMA_DATA_SI_DST_SEL(0) |
1993							   PACKET3_DMA_DATA_SI_SRC_SEL(0) |
1994							   PACKET3_DMA_DATA_SI_CP_SYNC |
1995							   (0xffff00000000 & bo1_mc) >> 32;
1996						pm4[i++] = 0xfffffffc & bo2_mc;
1997						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
1998						pm4[i++] = sdma_write_length;
1999					} else {
2000						pm4[i++] = PACKET3(PACKET3_DMA_DATA, 5);
2001						pm4[i++] = PACKET3_DMA_DATA_ENGINE(0) |
2002							   PACKET3_DMA_DATA_DST_SEL(0) |
2003							   PACKET3_DMA_DATA_SRC_SEL(0) |
2004							   PACKET3_DMA_DATA_CP_SYNC;
2005						pm4[i++] = 0xfffffffc & bo1_mc;
2006						pm4[i++] = (0xffffffff00000000 & bo1_mc) >> 32;
2007						pm4[i++] = 0xfffffffc & bo2_mc;
2008						pm4[i++] = (0xffffffff00000000 & bo2_mc) >> 32;
2009						pm4[i++] = sdma_write_length;
2010					}
2011				}
2012
2013				amdgpu_test_exec_cs_helper(context_handle,
2014							   ip_type, ring_id,
2015							   i, pm4,
2016							   2, resources,
2017							   ib_info, ibs_request);
2018
2019				/* verify if SDMA test result meets with expected */
2020				i = 0;
2021				while(i < sdma_write_length) {
2022					CU_ASSERT_EQUAL(bo2_cpu[i++], 0xaa);
2023				}
2024				r = amdgpu_bo_unmap_and_free(bo1, bo1_va_handle, bo1_mc,
2025							     sdma_write_length);
2026				CU_ASSERT_EQUAL(r, 0);
2027				r = amdgpu_bo_unmap_and_free(bo2, bo2_va_handle, bo2_mc,
2028							     sdma_write_length);
2029				CU_ASSERT_EQUAL(r, 0);
2030				loop2++;
2031			}
2032			loop1++;
2033		}
2034	}
2035	/* clean resources */
2036	free(resources);
2037	free(ibs_request);
2038	free(ib_info);
2039	free(pm4);
2040
2041	/* end of test */
2042	r = amdgpu_cs_ctx_free(context_handle);
2043	CU_ASSERT_EQUAL(r, 0);
2044}
2045
2046static void amdgpu_command_submission_sdma_copy_linear(void)
2047{
2048	amdgpu_command_submission_copy_linear_helper(AMDGPU_HW_IP_DMA);
2049}
2050
2051static void amdgpu_command_submission_sdma(void)
2052{
2053	amdgpu_command_submission_sdma_write_linear();
2054	amdgpu_command_submission_sdma_const_fill();
2055	amdgpu_command_submission_sdma_copy_linear();
2056}
2057
2058static void amdgpu_command_submission_multi_fence_wait_all(bool wait_all)
2059{
2060	amdgpu_context_handle context_handle;
2061	amdgpu_bo_handle ib_result_handle, ib_result_ce_handle;
2062	void *ib_result_cpu, *ib_result_ce_cpu;
2063	uint64_t ib_result_mc_address, ib_result_ce_mc_address;
2064	struct amdgpu_cs_request ibs_request[2] = {0};
2065	struct amdgpu_cs_ib_info ib_info[2];
2066	struct amdgpu_cs_fence fence_status[2] = {0};
2067	uint32_t *ptr;
2068	uint32_t expired;
2069	amdgpu_bo_list_handle bo_list;
2070	amdgpu_va_handle va_handle, va_handle_ce;
2071	int r;
2072	int i = 0, ib_cs_num = 2;
2073
2074	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2075	CU_ASSERT_EQUAL(r, 0);
2076
2077	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2078				    AMDGPU_GEM_DOMAIN_GTT, 0,
2079				    &ib_result_handle, &ib_result_cpu,
2080				    &ib_result_mc_address, &va_handle);
2081	CU_ASSERT_EQUAL(r, 0);
2082
2083	r = amdgpu_bo_alloc_and_map(device_handle, 4096, 4096,
2084				    AMDGPU_GEM_DOMAIN_GTT, 0,
2085				    &ib_result_ce_handle, &ib_result_ce_cpu,
2086				    &ib_result_ce_mc_address, &va_handle_ce);
2087	CU_ASSERT_EQUAL(r, 0);
2088
2089	r = amdgpu_get_bo_list(device_handle, ib_result_handle,
2090			       ib_result_ce_handle, &bo_list);
2091	CU_ASSERT_EQUAL(r, 0);
2092
2093	memset(ib_info, 0, 2 * sizeof(struct amdgpu_cs_ib_info));
2094
2095	/* IT_SET_CE_DE_COUNTERS */
2096	ptr = ib_result_ce_cpu;
2097	if (family_id != AMDGPU_FAMILY_SI) {
2098		ptr[i++] = 0xc0008900;
2099		ptr[i++] = 0;
2100	}
2101	ptr[i++] = 0xc0008400;
2102	ptr[i++] = 1;
2103	ib_info[0].ib_mc_address = ib_result_ce_mc_address;
2104	ib_info[0].size = i;
2105	ib_info[0].flags = AMDGPU_IB_FLAG_CE;
2106
2107	/* IT_WAIT_ON_CE_COUNTER */
2108	ptr = ib_result_cpu;
2109	ptr[0] = 0xc0008600;
2110	ptr[1] = 0x00000001;
2111	ib_info[1].ib_mc_address = ib_result_mc_address;
2112	ib_info[1].size = 2;
2113
2114	for (i = 0; i < ib_cs_num; i++) {
2115		ibs_request[i].ip_type = AMDGPU_HW_IP_GFX;
2116		ibs_request[i].number_of_ibs = 2;
2117		ibs_request[i].ibs = ib_info;
2118		ibs_request[i].resources = bo_list;
2119		ibs_request[i].fence_info.handle = NULL;
2120	}
2121
2122	r = amdgpu_cs_submit(context_handle, 0,ibs_request, ib_cs_num);
2123
2124	CU_ASSERT_EQUAL(r, 0);
2125
2126	for (i = 0; i < ib_cs_num; i++) {
2127		fence_status[i].context = context_handle;
2128		fence_status[i].ip_type = AMDGPU_HW_IP_GFX;
2129		fence_status[i].fence = ibs_request[i].seq_no;
2130	}
2131
2132	r = amdgpu_cs_wait_fences(fence_status, ib_cs_num, wait_all,
2133				AMDGPU_TIMEOUT_INFINITE,
2134				&expired, NULL);
2135	CU_ASSERT_EQUAL(r, 0);
2136
2137	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2138				     ib_result_mc_address, 4096);
2139	CU_ASSERT_EQUAL(r, 0);
2140
2141	r = amdgpu_bo_unmap_and_free(ib_result_ce_handle, va_handle_ce,
2142				     ib_result_ce_mc_address, 4096);
2143	CU_ASSERT_EQUAL(r, 0);
2144
2145	r = amdgpu_bo_list_destroy(bo_list);
2146	CU_ASSERT_EQUAL(r, 0);
2147
2148	r = amdgpu_cs_ctx_free(context_handle);
2149	CU_ASSERT_EQUAL(r, 0);
2150}
2151
2152static void amdgpu_command_submission_multi_fence(void)
2153{
2154	amdgpu_command_submission_multi_fence_wait_all(true);
2155	amdgpu_command_submission_multi_fence_wait_all(false);
2156}
2157
2158static void amdgpu_userptr_test(void)
2159{
2160	int i, r, j;
2161	uint32_t *pm4 = NULL;
2162	uint64_t bo_mc;
2163	void *ptr = NULL;
2164	int pm4_dw = 256;
2165	int sdma_write_length = 4;
2166	amdgpu_bo_handle handle;
2167	amdgpu_context_handle context_handle;
2168	struct amdgpu_cs_ib_info *ib_info;
2169	struct amdgpu_cs_request *ibs_request;
2170	amdgpu_bo_handle buf_handle;
2171	amdgpu_va_handle va_handle;
2172
2173	pm4 = calloc(pm4_dw, sizeof(*pm4));
2174	CU_ASSERT_NOT_EQUAL(pm4, NULL);
2175
2176	ib_info = calloc(1, sizeof(*ib_info));
2177	CU_ASSERT_NOT_EQUAL(ib_info, NULL);
2178
2179	ibs_request = calloc(1, sizeof(*ibs_request));
2180	CU_ASSERT_NOT_EQUAL(ibs_request, NULL);
2181
2182	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2183	CU_ASSERT_EQUAL(r, 0);
2184
2185	posix_memalign(&ptr, sysconf(_SC_PAGE_SIZE), BUFFER_SIZE);
2186	CU_ASSERT_NOT_EQUAL(ptr, NULL);
2187	memset(ptr, 0, BUFFER_SIZE);
2188
2189	r = amdgpu_create_bo_from_user_mem(device_handle,
2190					   ptr, BUFFER_SIZE, &buf_handle);
2191	CU_ASSERT_EQUAL(r, 0);
2192
2193	r = amdgpu_va_range_alloc(device_handle,
2194				  amdgpu_gpu_va_range_general,
2195				  BUFFER_SIZE, 1, 0, &bo_mc,
2196				  &va_handle, 0);
2197	CU_ASSERT_EQUAL(r, 0);
2198
2199	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_MAP);
2200	CU_ASSERT_EQUAL(r, 0);
2201
2202	handle = buf_handle;
2203
2204	j = i = 0;
2205
2206	if (family_id == AMDGPU_FAMILY_SI)
2207		pm4[i++] = SDMA_PACKET_SI(SDMA_OPCODE_WRITE, 0, 0, 0,
2208				sdma_write_length);
2209	else
2210		pm4[i++] = SDMA_PACKET(SDMA_OPCODE_WRITE,
2211				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
2212	pm4[i++] = 0xffffffff & bo_mc;
2213	pm4[i++] = (0xffffffff00000000 & bo_mc) >> 32;
2214	if (family_id >= AMDGPU_FAMILY_AI)
2215		pm4[i++] = sdma_write_length - 1;
2216	else if (family_id != AMDGPU_FAMILY_SI)
2217		pm4[i++] = sdma_write_length;
2218
2219	while (j++ < sdma_write_length)
2220		pm4[i++] = 0xdeadbeaf;
2221
2222	if (!fork()) {
2223		pm4[0] = 0x0;
2224		exit(0);
2225	}
2226
2227	amdgpu_test_exec_cs_helper(context_handle,
2228				   AMDGPU_HW_IP_DMA, 0,
2229				   i, pm4,
2230				   1, &handle,
2231				   ib_info, ibs_request);
2232	i = 0;
2233	while (i < sdma_write_length) {
2234		CU_ASSERT_EQUAL(((int*)ptr)[i++], 0xdeadbeaf);
2235	}
2236	free(ibs_request);
2237	free(ib_info);
2238	free(pm4);
2239
2240	r = amdgpu_bo_va_op(buf_handle, 0, BUFFER_SIZE, bo_mc, 0, AMDGPU_VA_OP_UNMAP);
2241	CU_ASSERT_EQUAL(r, 0);
2242	r = amdgpu_va_range_free(va_handle);
2243	CU_ASSERT_EQUAL(r, 0);
2244	r = amdgpu_bo_free(buf_handle);
2245	CU_ASSERT_EQUAL(r, 0);
2246	free(ptr);
2247
2248	r = amdgpu_cs_ctx_free(context_handle);
2249	CU_ASSERT_EQUAL(r, 0);
2250
2251	wait(NULL);
2252}
2253
2254static void amdgpu_sync_dependency_test(void)
2255{
2256	amdgpu_context_handle context_handle[2];
2257	amdgpu_bo_handle ib_result_handle;
2258	void *ib_result_cpu;
2259	uint64_t ib_result_mc_address;
2260	struct amdgpu_cs_request ibs_request;
2261	struct amdgpu_cs_ib_info ib_info;
2262	struct amdgpu_cs_fence fence_status;
2263	uint32_t expired;
2264	int i, j, r;
2265	amdgpu_bo_list_handle bo_list;
2266	amdgpu_va_handle va_handle;
2267	static uint32_t *ptr;
2268	uint64_t seq_no;
2269
2270	r = amdgpu_cs_ctx_create(device_handle, &context_handle[0]);
2271	CU_ASSERT_EQUAL(r, 0);
2272	r = amdgpu_cs_ctx_create(device_handle, &context_handle[1]);
2273	CU_ASSERT_EQUAL(r, 0);
2274
2275	r = amdgpu_bo_alloc_and_map(device_handle, 8192, 4096,
2276			AMDGPU_GEM_DOMAIN_GTT, 0,
2277						    &ib_result_handle, &ib_result_cpu,
2278						    &ib_result_mc_address, &va_handle);
2279	CU_ASSERT_EQUAL(r, 0);
2280
2281	r = amdgpu_get_bo_list(device_handle, ib_result_handle, NULL,
2282			       &bo_list);
2283	CU_ASSERT_EQUAL(r, 0);
2284
2285	ptr = ib_result_cpu;
2286	i = 0;
2287
2288	memcpy(ptr + CODE_OFFSET , shader_bin, sizeof(shader_bin));
2289
2290	/* Dispatch minimal init config and verify it's executed */
2291	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2292	ptr[i++] = 0x80000000;
2293	ptr[i++] = 0x80000000;
2294
2295	ptr[i++] = PACKET3(PKT3_CLEAR_STATE, 0);
2296	ptr[i++] = 0x80000000;
2297
2298
2299	/* Program compute regs */
2300	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2301	ptr[i++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
2302	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 8;
2303	ptr[i++] = (ib_result_mc_address + CODE_OFFSET * 4) >> 40;
2304
2305
2306	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2307	ptr[i++] = mmCOMPUTE_PGM_RSRC1 - PACKET3_SET_SH_REG_START;
2308	/*
2309	 * 002c0040         COMPUTE_PGM_RSRC1 <- VGPRS = 0
2310	                                      SGPRS = 1
2311	                                      PRIORITY = 0
2312	                                      FLOAT_MODE = 192 (0xc0)
2313	                                      PRIV = 0
2314	                                      DX10_CLAMP = 1
2315	                                      DEBUG_MODE = 0
2316	                                      IEEE_MODE = 0
2317	                                      BULKY = 0
2318	                                      CDBG_USER = 0
2319	 *
2320	 */
2321	ptr[i++] = 0x002c0040;
2322
2323
2324	/*
2325	 * 00000010         COMPUTE_PGM_RSRC2 <- SCRATCH_EN = 0
2326	                                      USER_SGPR = 8
2327	                                      TRAP_PRESENT = 0
2328	                                      TGID_X_EN = 0
2329	                                      TGID_Y_EN = 0
2330	                                      TGID_Z_EN = 0
2331	                                      TG_SIZE_EN = 0
2332	                                      TIDIG_COMP_CNT = 0
2333	                                      EXCP_EN_MSB = 0
2334	                                      LDS_SIZE = 0
2335	                                      EXCP_EN = 0
2336	 *
2337	 */
2338	ptr[i++] = 0x00000010;
2339
2340
2341/*
2342 * 00000100         COMPUTE_TMPRING_SIZE <- WAVES = 256 (0x100)
2343                                         WAVESIZE = 0
2344 *
2345 */
2346	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2347	ptr[i++] = mmCOMPUTE_TMPRING_SIZE - PACKET3_SET_SH_REG_START;
2348	ptr[i++] = 0x00000100;
2349
2350	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
2351	ptr[i++] = mmCOMPUTE_USER_DATA_0 - PACKET3_SET_SH_REG_START;
2352	ptr[i++] = 0xffffffff & (ib_result_mc_address + DATA_OFFSET * 4);
2353	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2354
2355	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
2356	ptr[i++] = mmCOMPUTE_RESOURCE_LIMITS - PACKET3_SET_SH_REG_START;
2357	ptr[i++] = 0;
2358
2359	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
2360	ptr[i++] = mmCOMPUTE_NUM_THREAD_X - PACKET3_SET_SH_REG_START;
2361	ptr[i++] = 1;
2362	ptr[i++] = 1;
2363	ptr[i++] = 1;
2364
2365
2366	/* Dispatch */
2367	ptr[i++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
2368	ptr[i++] = 1;
2369	ptr[i++] = 1;
2370	ptr[i++] = 1;
2371	ptr[i++] = 0x00000045; /* DISPATCH DIRECT field */
2372
2373
2374	while (i & 7)
2375		ptr[i++] =  0xffff1000; /* type3 nop packet */
2376
2377	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2378	ib_info.ib_mc_address = ib_result_mc_address;
2379	ib_info.size = i;
2380
2381	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2382	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2383	ibs_request.ring = 0;
2384	ibs_request.number_of_ibs = 1;
2385	ibs_request.ibs = &ib_info;
2386	ibs_request.resources = bo_list;
2387	ibs_request.fence_info.handle = NULL;
2388
2389	r = amdgpu_cs_submit(context_handle[1], 0,&ibs_request, 1);
2390	CU_ASSERT_EQUAL(r, 0);
2391	seq_no = ibs_request.seq_no;
2392
2393
2394
2395	/* Prepare second command with dependency on the first */
2396	j = i;
2397	ptr[i++] = PACKET3(PACKET3_WRITE_DATA, 3);
2398	ptr[i++] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM;
2399	ptr[i++] =          0xfffffffc & (ib_result_mc_address + DATA_OFFSET * 4);
2400	ptr[i++] = (0xffffffff00000000 & (ib_result_mc_address + DATA_OFFSET * 4)) >> 32;
2401	ptr[i++] = 99;
2402
2403	while (i & 7)
2404		ptr[i++] =  0xffff1000; /* type3 nop packet */
2405
2406	memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
2407	ib_info.ib_mc_address = ib_result_mc_address + j * 4;
2408	ib_info.size = i - j;
2409
2410	memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
2411	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
2412	ibs_request.ring = 0;
2413	ibs_request.number_of_ibs = 1;
2414	ibs_request.ibs = &ib_info;
2415	ibs_request.resources = bo_list;
2416	ibs_request.fence_info.handle = NULL;
2417
2418	ibs_request.number_of_dependencies = 1;
2419
2420	ibs_request.dependencies = calloc(1, sizeof(*ibs_request.dependencies));
2421	ibs_request.dependencies[0].context = context_handle[1];
2422	ibs_request.dependencies[0].ip_instance = 0;
2423	ibs_request.dependencies[0].ring = 0;
2424	ibs_request.dependencies[0].fence = seq_no;
2425
2426
2427	r = amdgpu_cs_submit(context_handle[0], 0,&ibs_request, 1);
2428	CU_ASSERT_EQUAL(r, 0);
2429
2430
2431	memset(&fence_status, 0, sizeof(struct amdgpu_cs_fence));
2432	fence_status.context = context_handle[0];
2433	fence_status.ip_type = AMDGPU_HW_IP_GFX;
2434	fence_status.ip_instance = 0;
2435	fence_status.ring = 0;
2436	fence_status.fence = ibs_request.seq_no;
2437
2438	r = amdgpu_cs_query_fence_status(&fence_status,
2439		       AMDGPU_TIMEOUT_INFINITE,0, &expired);
2440	CU_ASSERT_EQUAL(r, 0);
2441
2442	/* Expect the second command to wait for shader to complete */
2443	CU_ASSERT_EQUAL(ptr[DATA_OFFSET], 99);
2444
2445	r = amdgpu_bo_list_destroy(bo_list);
2446	CU_ASSERT_EQUAL(r, 0);
2447
2448	r = amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
2449				     ib_result_mc_address, 4096);
2450	CU_ASSERT_EQUAL(r, 0);
2451
2452	r = amdgpu_cs_ctx_free(context_handle[0]);
2453	CU_ASSERT_EQUAL(r, 0);
2454	r = amdgpu_cs_ctx_free(context_handle[1]);
2455	CU_ASSERT_EQUAL(r, 0);
2456
2457	free(ibs_request.dependencies);
2458}
2459
2460static int amdgpu_dispatch_load_cs_shader_hang_slow(uint32_t *ptr, int family)
2461{
2462	struct amdgpu_test_shader *shader;
2463	int i, loop = 0x10000;
2464
2465	switch (family) {
2466		case AMDGPU_FAMILY_AI:
2467			shader = &memcpy_cs_hang_slow_ai;
2468			break;
2469		case AMDGPU_FAMILY_RV:
2470			shader = &memcpy_cs_hang_slow_rv;
2471			break;
2472		case AMDGPU_FAMILY_NV:
2473			shader = &memcpy_cs_hang_slow_nv;
2474			break;
2475		default:
2476			return -1;
2477			break;
2478	}
2479
2480	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
2481
2482	for (i = 0; i < loop; i++)
2483		memcpy(ptr + shader->header_length + shader->body_length * i,
2484			shader->shader + shader->header_length,
2485			shader->body_length * sizeof(uint32_t));
2486
2487	memcpy(ptr + shader->header_length + shader->body_length * loop,
2488		shader->shader + shader->header_length + shader->body_length,
2489		shader->foot_length * sizeof(uint32_t));
2490
2491	return 0;
2492}
2493
2494static int amdgpu_dispatch_load_cs_shader(uint8_t *ptr,
2495					   int cs_type,
2496					   uint32_t version)
2497{
2498	uint32_t shader_size;
2499	const uint32_t *shader;
2500
2501	switch (cs_type) {
2502		case CS_BUFFERCLEAR:
2503			if (version == 9) {
2504				shader = bufferclear_cs_shader_gfx9;
2505				shader_size = sizeof(bufferclear_cs_shader_gfx9);
2506			} else if (version == 10) {
2507				shader = bufferclear_cs_shader_gfx10;
2508				shader_size = sizeof(bufferclear_cs_shader_gfx10);
2509			}
2510			break;
2511		case CS_BUFFERCOPY:
2512			if (version == 9) {
2513				shader = buffercopy_cs_shader_gfx9;
2514				shader_size = sizeof(buffercopy_cs_shader_gfx9);
2515			} else if (version == 10) {
2516				shader = buffercopy_cs_shader_gfx10;
2517				shader_size = sizeof(buffercopy_cs_shader_gfx10);
2518			}
2519			break;
2520		case CS_HANG:
2521			shader = memcpy_ps_hang;
2522			shader_size = sizeof(memcpy_ps_hang);
2523			break;
2524		default:
2525			return -1;
2526			break;
2527	}
2528
2529	memcpy(ptr, shader, shader_size);
2530	return 0;
2531}
2532
2533static int amdgpu_dispatch_init(uint32_t *ptr, uint32_t ip_type, uint32_t version)
2534{
2535	int i = 0;
2536
2537	/* Write context control and load shadowing register if necessary */
2538	if (ip_type == AMDGPU_HW_IP_GFX) {
2539		ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
2540		ptr[i++] = 0x80000000;
2541		ptr[i++] = 0x80000000;
2542	}
2543
2544	/* Issue commands to set default compute state. */
2545	/* clear mmCOMPUTE_START_Z - mmCOMPUTE_START_X */
2546	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 3);
2547	ptr[i++] = 0x204;
2548	i += 3;
2549
2550	/* clear mmCOMPUTE_TMPRING_SIZE */
2551	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2552	ptr[i++] = 0x218;
2553	ptr[i++] = 0;
2554
2555	/* Set new sh registers in GFX10 to 0 */
2556	if (version == 10) {
2557		/* mmCOMPUTE_SHADER_CHKSUM */
2558		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2559		ptr[i++] = 0x22a;
2560		ptr[i++] = 0;
2561		/* mmCOMPUTE_REQ_CTRL */
2562		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 6);
2563		ptr[i++] = 0x222;
2564		i += 6;
2565		/* mmCP_COHER_START_DELAY */
2566		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2567		ptr[i++] = 0x7b;
2568		ptr[i++] = 0x20;
2569	}
2570	return i;
2571}
2572
2573static int amdgpu_dispatch_write_cumask(uint32_t *ptr, uint32_t version)
2574{
2575	int i = 0;
2576
2577	/*  Issue commands to set cu mask used in current dispatch */
2578	if (version == 9) {
2579		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2580		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2581		ptr[i++] = 0x216;
2582		ptr[i++] = 0xffffffff;
2583		ptr[i++] = 0xffffffff;
2584		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2585		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2586		ptr[i++] = 0x219;
2587		ptr[i++] = 0xffffffff;
2588		ptr[i++] = 0xffffffff;
2589	} else if (version == 10) {
2590		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE1 - mmCOMPUTE_STATIC_THREAD_MGMT_SE0 */
2591		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2);
2592		ptr[i++] = 0x30000216;
2593		ptr[i++] = 0xffffffff;
2594		ptr[i++] = 0xffffffff;
2595		/* set mmCOMPUTE_STATIC_THREAD_MGMT_SE3 - mmCOMPUTE_STATIC_THREAD_MGMT_SE2 */
2596		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG_INDEX, 2);
2597		ptr[i++] = 0x30000219;
2598		ptr[i++] = 0xffffffff;
2599		ptr[i++] = 0xffffffff;
2600	}
2601
2602	return i;
2603}
2604
2605static int amdgpu_dispatch_write2hw(uint32_t *ptr, uint64_t shader_addr, uint32_t version)
2606{
2607	int i, j;
2608
2609	i = 0;
2610
2611	/* Writes shader state to HW */
2612	/* set mmCOMPUTE_PGM_HI - mmCOMPUTE_PGM_LO */
2613	ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 2);
2614	ptr[i++] = 0x20c;
2615	ptr[i++] = (shader_addr >> 8);
2616	ptr[i++] = (shader_addr >> 40);
2617	/* write sh regs*/
2618	for (j = 0; j < bufferclear_cs_shader_registers_num_gfx9; j++) {
2619		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2620		/* - Gfx9ShRegBase */
2621		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][0] - 0x2c00;
2622		ptr[i++] = bufferclear_cs_shader_registers_gfx9[j][1];
2623	}
2624
2625	if (version == 10) {
2626		/* mmCOMPUTE_PGM_RSRC3 */
2627		ptr[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2628		ptr[i++] = 0x228;
2629		ptr[i++] = 0;
2630	}
2631
2632	return i;
2633}
2634
2635static void amdgpu_memset_dispatch_test(amdgpu_device_handle device_handle,
2636					 uint32_t ip_type,
2637					 uint32_t ring,
2638					 uint32_t version)
2639{
2640	amdgpu_context_handle context_handle;
2641	amdgpu_bo_handle bo_dst, bo_shader, bo_cmd, resources[3];
2642	volatile unsigned char *ptr_dst;
2643	void *ptr_shader;
2644	uint32_t *ptr_cmd;
2645	uint64_t mc_address_dst, mc_address_shader, mc_address_cmd;
2646	amdgpu_va_handle va_dst, va_shader, va_cmd;
2647	int i, r;
2648	int bo_dst_size = 16384;
2649	int bo_shader_size = 4096;
2650	int bo_cmd_size = 4096;
2651	struct amdgpu_cs_request ibs_request = {0};
2652	struct amdgpu_cs_ib_info ib_info= {0};
2653	amdgpu_bo_list_handle bo_list;
2654	struct amdgpu_cs_fence fence_status = {0};
2655	uint32_t expired;
2656
2657	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2658	CU_ASSERT_EQUAL(r, 0);
2659
2660	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2661					AMDGPU_GEM_DOMAIN_GTT, 0,
2662					&bo_cmd, (void **)&ptr_cmd,
2663					&mc_address_cmd, &va_cmd);
2664	CU_ASSERT_EQUAL(r, 0);
2665	memset(ptr_cmd, 0, bo_cmd_size);
2666
2667	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2668					AMDGPU_GEM_DOMAIN_VRAM, 0,
2669					&bo_shader, &ptr_shader,
2670					&mc_address_shader, &va_shader);
2671	CU_ASSERT_EQUAL(r, 0);
2672	memset(ptr_shader, 0, bo_shader_size);
2673
2674	r = amdgpu_dispatch_load_cs_shader(ptr_shader, CS_BUFFERCLEAR, version);
2675	CU_ASSERT_EQUAL(r, 0);
2676
2677	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2678					AMDGPU_GEM_DOMAIN_VRAM, 0,
2679					&bo_dst, (void **)&ptr_dst,
2680					&mc_address_dst, &va_dst);
2681	CU_ASSERT_EQUAL(r, 0);
2682
2683	i = 0;
2684	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
2685
2686	/*  Issue commands to set cu mask used in current dispatch */
2687	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
2688
2689	/* Writes shader state to HW */
2690	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
2691
2692	/* Write constant data */
2693	/* Writes the UAV constant data to the SGPRs. */
2694	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2695	ptr_cmd[i++] = 0x240;
2696	ptr_cmd[i++] = mc_address_dst;
2697	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2698	ptr_cmd[i++] = 0x400;
2699	if (version == 9)
2700		ptr_cmd[i++] = 0x74fac;
2701	else if (version == 10)
2702		ptr_cmd[i++] = 0x1104bfac;
2703
2704	/* Sets a range of pixel shader constants */
2705	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2706	ptr_cmd[i++] = 0x244;
2707	ptr_cmd[i++] = 0x22222222;
2708	ptr_cmd[i++] = 0x22222222;
2709	ptr_cmd[i++] = 0x22222222;
2710	ptr_cmd[i++] = 0x22222222;
2711
2712	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2713	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2714	ptr_cmd[i++] = 0x215;
2715	ptr_cmd[i++] = 0;
2716
2717	/* dispatch direct command */
2718	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2719	ptr_cmd[i++] = 0x10;
2720	ptr_cmd[i++] = 1;
2721	ptr_cmd[i++] = 1;
2722	ptr_cmd[i++] = 1;
2723
2724	while (i & 7)
2725		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2726
2727	resources[0] = bo_dst;
2728	resources[1] = bo_shader;
2729	resources[2] = bo_cmd;
2730	r = amdgpu_bo_list_create(device_handle, 3, resources, NULL, &bo_list);
2731	CU_ASSERT_EQUAL(r, 0);
2732
2733	ib_info.ib_mc_address = mc_address_cmd;
2734	ib_info.size = i;
2735	ibs_request.ip_type = ip_type;
2736	ibs_request.ring = ring;
2737	ibs_request.resources = bo_list;
2738	ibs_request.number_of_ibs = 1;
2739	ibs_request.ibs = &ib_info;
2740	ibs_request.fence_info.handle = NULL;
2741
2742	/* submit CS */
2743	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2744	CU_ASSERT_EQUAL(r, 0);
2745
2746	r = amdgpu_bo_list_destroy(bo_list);
2747	CU_ASSERT_EQUAL(r, 0);
2748
2749	fence_status.ip_type = ip_type;
2750	fence_status.ip_instance = 0;
2751	fence_status.ring = ring;
2752	fence_status.context = context_handle;
2753	fence_status.fence = ibs_request.seq_no;
2754
2755	/* wait for IB accomplished */
2756	r = amdgpu_cs_query_fence_status(&fence_status,
2757					 AMDGPU_TIMEOUT_INFINITE,
2758					 0, &expired);
2759	CU_ASSERT_EQUAL(r, 0);
2760	CU_ASSERT_EQUAL(expired, true);
2761
2762	/* verify if memset test result meets with expected */
2763	i = 0;
2764	while(i < bo_dst_size) {
2765		CU_ASSERT_EQUAL(ptr_dst[i++], 0x22);
2766	}
2767
2768	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2769	CU_ASSERT_EQUAL(r, 0);
2770
2771	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2772	CU_ASSERT_EQUAL(r, 0);
2773
2774	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2775	CU_ASSERT_EQUAL(r, 0);
2776
2777	r = amdgpu_cs_ctx_free(context_handle);
2778	CU_ASSERT_EQUAL(r, 0);
2779}
2780
2781static void amdgpu_memcpy_dispatch_test(amdgpu_device_handle device_handle,
2782					uint32_t ip_type,
2783					uint32_t ring,
2784					uint32_t version,
2785					int hang)
2786{
2787	amdgpu_context_handle context_handle;
2788	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
2789	volatile unsigned char *ptr_dst;
2790	void *ptr_shader;
2791	unsigned char *ptr_src;
2792	uint32_t *ptr_cmd;
2793	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
2794	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
2795	int i, r;
2796	int bo_dst_size = 16384;
2797	int bo_shader_size = 4096;
2798	int bo_cmd_size = 4096;
2799	struct amdgpu_cs_request ibs_request = {0};
2800	struct amdgpu_cs_ib_info ib_info= {0};
2801	uint32_t expired, hang_state, hangs;
2802	enum cs_type cs_type;
2803	amdgpu_bo_list_handle bo_list;
2804	struct amdgpu_cs_fence fence_status = {0};
2805
2806	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
2807	CU_ASSERT_EQUAL(r, 0);
2808
2809	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
2810				    AMDGPU_GEM_DOMAIN_GTT, 0,
2811				    &bo_cmd, (void **)&ptr_cmd,
2812				    &mc_address_cmd, &va_cmd);
2813	CU_ASSERT_EQUAL(r, 0);
2814	memset(ptr_cmd, 0, bo_cmd_size);
2815
2816	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
2817					AMDGPU_GEM_DOMAIN_VRAM, 0,
2818					&bo_shader, &ptr_shader,
2819					&mc_address_shader, &va_shader);
2820	CU_ASSERT_EQUAL(r, 0);
2821	memset(ptr_shader, 0, bo_shader_size);
2822
2823	cs_type = hang ? CS_HANG : CS_BUFFERCOPY;
2824	r = amdgpu_dispatch_load_cs_shader(ptr_shader, cs_type, version);
2825	CU_ASSERT_EQUAL(r, 0);
2826
2827	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2828					AMDGPU_GEM_DOMAIN_VRAM, 0,
2829					&bo_src, (void **)&ptr_src,
2830					&mc_address_src, &va_src);
2831	CU_ASSERT_EQUAL(r, 0);
2832
2833	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
2834					AMDGPU_GEM_DOMAIN_VRAM, 0,
2835					&bo_dst, (void **)&ptr_dst,
2836					&mc_address_dst, &va_dst);
2837	CU_ASSERT_EQUAL(r, 0);
2838
2839	memset(ptr_src, 0x55, bo_dst_size);
2840
2841	i = 0;
2842	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
2843
2844	/*  Issue commands to set cu mask used in current dispatch */
2845	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
2846
2847	/* Writes shader state to HW */
2848	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
2849
2850	/* Write constant data */
2851	/* Writes the texture resource constants data to the SGPRs */
2852	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2853	ptr_cmd[i++] = 0x240;
2854	ptr_cmd[i++] = mc_address_src;
2855	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
2856	ptr_cmd[i++] = 0x400;
2857	if (version == 9)
2858		ptr_cmd[i++] = 0x74fac;
2859	else if (version == 10)
2860		ptr_cmd[i++] = 0x1104bfac;
2861
2862	/* Writes the UAV constant data to the SGPRs. */
2863	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
2864	ptr_cmd[i++] = 0x244;
2865	ptr_cmd[i++] = mc_address_dst;
2866	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
2867	ptr_cmd[i++] = 0x400;
2868	if (version == 9)
2869		ptr_cmd[i++] = 0x74fac;
2870	else if (version == 10)
2871		ptr_cmd[i++] = 0x1104bfac;
2872
2873	/* clear mmCOMPUTE_RESOURCE_LIMITS */
2874	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
2875	ptr_cmd[i++] = 0x215;
2876	ptr_cmd[i++] = 0;
2877
2878	/* dispatch direct command */
2879	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
2880	ptr_cmd[i++] = 0x10;
2881	ptr_cmd[i++] = 1;
2882	ptr_cmd[i++] = 1;
2883	ptr_cmd[i++] = 1;
2884
2885	while (i & 7)
2886		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
2887
2888	resources[0] = bo_shader;
2889	resources[1] = bo_src;
2890	resources[2] = bo_dst;
2891	resources[3] = bo_cmd;
2892	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
2893	CU_ASSERT_EQUAL(r, 0);
2894
2895	ib_info.ib_mc_address = mc_address_cmd;
2896	ib_info.size = i;
2897	ibs_request.ip_type = ip_type;
2898	ibs_request.ring = ring;
2899	ibs_request.resources = bo_list;
2900	ibs_request.number_of_ibs = 1;
2901	ibs_request.ibs = &ib_info;
2902	ibs_request.fence_info.handle = NULL;
2903	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
2904	CU_ASSERT_EQUAL(r, 0);
2905
2906	fence_status.ip_type = ip_type;
2907	fence_status.ip_instance = 0;
2908	fence_status.ring = ring;
2909	fence_status.context = context_handle;
2910	fence_status.fence = ibs_request.seq_no;
2911
2912	/* wait for IB accomplished */
2913	r = amdgpu_cs_query_fence_status(&fence_status,
2914					 AMDGPU_TIMEOUT_INFINITE,
2915					 0, &expired);
2916
2917	if (!hang) {
2918		CU_ASSERT_EQUAL(r, 0);
2919		CU_ASSERT_EQUAL(expired, true);
2920
2921		/* verify if memcpy test result meets with expected */
2922		i = 0;
2923		while(i < bo_dst_size) {
2924			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
2925			i++;
2926		}
2927	} else {
2928		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
2929		CU_ASSERT_EQUAL(r, 0);
2930		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
2931	}
2932
2933	r = amdgpu_bo_list_destroy(bo_list);
2934	CU_ASSERT_EQUAL(r, 0);
2935
2936	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
2937	CU_ASSERT_EQUAL(r, 0);
2938	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
2939	CU_ASSERT_EQUAL(r, 0);
2940
2941	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
2942	CU_ASSERT_EQUAL(r, 0);
2943
2944	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
2945	CU_ASSERT_EQUAL(r, 0);
2946
2947	r = amdgpu_cs_ctx_free(context_handle);
2948	CU_ASSERT_EQUAL(r, 0);
2949}
2950
2951static void amdgpu_compute_dispatch_test(void)
2952{
2953	int r;
2954	struct drm_amdgpu_info_hw_ip info;
2955	uint32_t ring_id, version;
2956
2957	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_COMPUTE, 0, &info);
2958	CU_ASSERT_EQUAL(r, 0);
2959	if (!info.available_rings)
2960		printf("SKIP ... as there's no compute ring\n");
2961
2962	version = info.hw_ip_version_major;
2963	if (version != 9 && version != 10) {
2964		printf("SKIP ... unsupported gfx version %d\n", version);
2965		return;
2966	}
2967
2968	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2969		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version);
2970		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_COMPUTE, ring_id, version, 0);
2971	}
2972}
2973
2974static void amdgpu_gfx_dispatch_test(void)
2975{
2976	int r;
2977	struct drm_amdgpu_info_hw_ip info;
2978	uint32_t ring_id, version;
2979
2980	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
2981	CU_ASSERT_EQUAL(r, 0);
2982	if (!info.available_rings)
2983		printf("SKIP ... as there's no graphics ring\n");
2984
2985	version = info.hw_ip_version_major;
2986	if (version != 9 && version != 10) {
2987		printf("SKIP ... unsupported gfx version %d\n", version);
2988		return;
2989	}
2990
2991	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
2992		amdgpu_memset_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version);
2993		amdgpu_memcpy_dispatch_test(device_handle, AMDGPU_HW_IP_GFX, ring_id, version, 0);
2994	}
2995}
2996
2997void amdgpu_dispatch_hang_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
2998{
2999	int r;
3000	struct drm_amdgpu_info_hw_ip info;
3001	uint32_t ring_id, version;
3002
3003	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
3004	CU_ASSERT_EQUAL(r, 0);
3005	if (!info.available_rings)
3006		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
3007
3008	version = info.hw_ip_version_major;
3009	if (version != 9 && version != 10) {
3010		printf("SKIP ... unsupported gfx version %d\n", version);
3011		return;
3012	}
3013
3014	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3015		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3016		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 1);
3017		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3018	}
3019}
3020
3021static void amdgpu_memcpy_dispatch_hang_slow_test(amdgpu_device_handle device_handle,
3022						  uint32_t ip_type, uint32_t ring, int version)
3023{
3024	amdgpu_context_handle context_handle;
3025	amdgpu_bo_handle bo_src, bo_dst, bo_shader, bo_cmd, resources[4];
3026	volatile unsigned char *ptr_dst;
3027	void *ptr_shader;
3028	unsigned char *ptr_src;
3029	uint32_t *ptr_cmd;
3030	uint64_t mc_address_src, mc_address_dst, mc_address_shader, mc_address_cmd;
3031	amdgpu_va_handle va_src, va_dst, va_shader, va_cmd;
3032	int i, r;
3033	int bo_dst_size = 0x4000000;
3034	int bo_shader_size = 0x400000;
3035	int bo_cmd_size = 4096;
3036	struct amdgpu_cs_request ibs_request = {0};
3037	struct amdgpu_cs_ib_info ib_info= {0};
3038	uint32_t hang_state, hangs, expired;
3039	struct amdgpu_gpu_info gpu_info = {0};
3040	amdgpu_bo_list_handle bo_list;
3041	struct amdgpu_cs_fence fence_status = {0};
3042
3043	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
3044	CU_ASSERT_EQUAL(r, 0);
3045
3046	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3047	CU_ASSERT_EQUAL(r, 0);
3048
3049	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3050				    AMDGPU_GEM_DOMAIN_GTT, 0,
3051				    &bo_cmd, (void **)&ptr_cmd,
3052				    &mc_address_cmd, &va_cmd);
3053	CU_ASSERT_EQUAL(r, 0);
3054	memset(ptr_cmd, 0, bo_cmd_size);
3055
3056	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3057					AMDGPU_GEM_DOMAIN_VRAM, 0,
3058					&bo_shader, &ptr_shader,
3059					&mc_address_shader, &va_shader);
3060	CU_ASSERT_EQUAL(r, 0);
3061	memset(ptr_shader, 0, bo_shader_size);
3062
3063	r = amdgpu_dispatch_load_cs_shader_hang_slow(ptr_shader, gpu_info.family_id);
3064	CU_ASSERT_EQUAL(r, 0);
3065
3066	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3067					AMDGPU_GEM_DOMAIN_VRAM, 0,
3068					&bo_src, (void **)&ptr_src,
3069					&mc_address_src, &va_src);
3070	CU_ASSERT_EQUAL(r, 0);
3071
3072	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3073					AMDGPU_GEM_DOMAIN_VRAM, 0,
3074					&bo_dst, (void **)&ptr_dst,
3075					&mc_address_dst, &va_dst);
3076	CU_ASSERT_EQUAL(r, 0);
3077
3078	memset(ptr_src, 0x55, bo_dst_size);
3079
3080	i = 0;
3081	i += amdgpu_dispatch_init(ptr_cmd + i, ip_type, version);
3082
3083	/*  Issue commands to set cu mask used in current dispatch */
3084	i += amdgpu_dispatch_write_cumask(ptr_cmd + i, version);
3085
3086	/* Writes shader state to HW */
3087	i += amdgpu_dispatch_write2hw(ptr_cmd + i, mc_address_shader, version);
3088
3089	/* Write constant data */
3090	/* Writes the texture resource constants data to the SGPRs */
3091	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
3092	ptr_cmd[i++] = 0x240;
3093	ptr_cmd[i++] = mc_address_src;
3094	ptr_cmd[i++] = (mc_address_src >> 32) | 0x100000;
3095	ptr_cmd[i++] = 0x400000;
3096	if (version == 9)
3097		ptr_cmd[i++] = 0x74fac;
3098	else if (version == 10)
3099		ptr_cmd[i++] = 0x1104bfac;
3100
3101	/* Writes the UAV constant data to the SGPRs. */
3102	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 4);
3103	ptr_cmd[i++] = 0x244;
3104	ptr_cmd[i++] = mc_address_dst;
3105	ptr_cmd[i++] = (mc_address_dst >> 32) | 0x100000;
3106	ptr_cmd[i++] = 0x400000;
3107	if (version == 9)
3108		ptr_cmd[i++] = 0x74fac;
3109	else if (version == 10)
3110		ptr_cmd[i++] = 0x1104bfac;
3111
3112	/* clear mmCOMPUTE_RESOURCE_LIMITS */
3113	ptr_cmd[i++] = PACKET3_COMPUTE(PKT3_SET_SH_REG, 1);
3114	ptr_cmd[i++] = 0x215;
3115	ptr_cmd[i++] = 0;
3116
3117	/* dispatch direct command */
3118	ptr_cmd[i++] = PACKET3_COMPUTE(PACKET3_DISPATCH_DIRECT, 3);
3119	ptr_cmd[i++] = 0x10000;
3120	ptr_cmd[i++] = 1;
3121	ptr_cmd[i++] = 1;
3122	ptr_cmd[i++] = 1;
3123
3124	while (i & 7)
3125		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3126
3127	resources[0] = bo_shader;
3128	resources[1] = bo_src;
3129	resources[2] = bo_dst;
3130	resources[3] = bo_cmd;
3131	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3132	CU_ASSERT_EQUAL(r, 0);
3133
3134	ib_info.ib_mc_address = mc_address_cmd;
3135	ib_info.size = i;
3136	ibs_request.ip_type = ip_type;
3137	ibs_request.ring = ring;
3138	ibs_request.resources = bo_list;
3139	ibs_request.number_of_ibs = 1;
3140	ibs_request.ibs = &ib_info;
3141	ibs_request.fence_info.handle = NULL;
3142	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3143	CU_ASSERT_EQUAL(r, 0);
3144
3145	fence_status.ip_type = ip_type;
3146	fence_status.ip_instance = 0;
3147	fence_status.ring = ring;
3148	fence_status.context = context_handle;
3149	fence_status.fence = ibs_request.seq_no;
3150
3151	/* wait for IB accomplished */
3152	r = amdgpu_cs_query_fence_status(&fence_status,
3153					 AMDGPU_TIMEOUT_INFINITE,
3154					 0, &expired);
3155
3156	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
3157	CU_ASSERT_EQUAL(r, 0);
3158	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
3159
3160	r = amdgpu_bo_list_destroy(bo_list);
3161	CU_ASSERT_EQUAL(r, 0);
3162
3163	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_dst_size);
3164	CU_ASSERT_EQUAL(r, 0);
3165	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3166	CU_ASSERT_EQUAL(r, 0);
3167
3168	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3169	CU_ASSERT_EQUAL(r, 0);
3170
3171	r = amdgpu_bo_unmap_and_free(bo_shader, va_shader, mc_address_shader, bo_shader_size);
3172	CU_ASSERT_EQUAL(r, 0);
3173
3174	r = amdgpu_cs_ctx_free(context_handle);
3175	CU_ASSERT_EQUAL(r, 0);
3176}
3177
3178void amdgpu_dispatch_hang_slow_helper(amdgpu_device_handle device_handle, uint32_t ip_type)
3179{
3180	int r;
3181	struct drm_amdgpu_info_hw_ip info;
3182	uint32_t ring_id, version;
3183
3184	r = amdgpu_query_hw_ip_info(device_handle, ip_type, 0, &info);
3185	CU_ASSERT_EQUAL(r, 0);
3186	if (!info.available_rings)
3187		printf("SKIP ... as there's no ring for ip %d\n", ip_type);
3188
3189	version = info.hw_ip_version_major;
3190	if (version != 9 && version != 10) {
3191		printf("SKIP ... unsupported gfx version %d\n", version);
3192		return;
3193	}
3194
3195	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
3196		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3197		amdgpu_memcpy_dispatch_hang_slow_test(device_handle, ip_type, ring_id, version);
3198		amdgpu_memcpy_dispatch_test(device_handle, ip_type, ring_id, version, 0);
3199	}
3200}
3201
3202static int amdgpu_draw_load_ps_shader_hang_slow(uint32_t *ptr, int family)
3203{
3204	struct amdgpu_test_shader *shader;
3205	int i, loop = 0x40000;
3206
3207	switch (family) {
3208		case AMDGPU_FAMILY_AI:
3209		case AMDGPU_FAMILY_RV:
3210			shader = &memcpy_ps_hang_slow_ai;
3211			break;
3212		default:
3213			return -1;
3214			break;
3215	}
3216
3217	memcpy(ptr, shader->shader, shader->header_length * sizeof(uint32_t));
3218
3219	for (i = 0; i < loop; i++)
3220		memcpy(ptr + shader->header_length + shader->body_length * i,
3221			shader->shader + shader->header_length,
3222			shader->body_length * sizeof(uint32_t));
3223
3224	memcpy(ptr + shader->header_length + shader->body_length * loop,
3225		shader->shader + shader->header_length + shader->body_length,
3226		shader->foot_length * sizeof(uint32_t));
3227
3228	return 0;
3229}
3230
3231static int amdgpu_draw_load_ps_shader(uint8_t *ptr, int ps_type, uint32_t version)
3232{
3233	int i;
3234	uint32_t shader_offset= 256;
3235	uint32_t mem_offset, patch_code_offset;
3236	uint32_t shader_size, patchinfo_code_size;
3237	const uint32_t *shader;
3238	const uint32_t *patchinfo_code;
3239	const uint32_t *patchcode_offset;
3240
3241	switch (ps_type) {
3242		case PS_CONST:
3243			if (version == 9) {
3244				shader = ps_const_shader_gfx9;
3245				shader_size = sizeof(ps_const_shader_gfx9);
3246				patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx9;
3247				patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx9;
3248				patchcode_offset = ps_const_shader_patchinfo_offset_gfx9;
3249			} else if (version == 10){
3250				shader = ps_const_shader_gfx10;
3251				shader_size = sizeof(ps_const_shader_gfx10);
3252				patchinfo_code = (const uint32_t *)ps_const_shader_patchinfo_code_gfx10;
3253				patchinfo_code_size = ps_const_shader_patchinfo_code_size_gfx10;
3254				patchcode_offset = ps_const_shader_patchinfo_offset_gfx10;
3255			}
3256			break;
3257		case PS_TEX:
3258			if (version == 9) {
3259				shader = ps_tex_shader_gfx9;
3260				shader_size = sizeof(ps_tex_shader_gfx9);
3261				patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx9;
3262				patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx9;
3263				patchcode_offset = ps_tex_shader_patchinfo_offset_gfx9;
3264			} else if (version == 10) {
3265				shader = ps_tex_shader_gfx10;
3266				shader_size = sizeof(ps_tex_shader_gfx10);
3267				patchinfo_code = (const uint32_t *)ps_tex_shader_patchinfo_code_gfx10;
3268				patchinfo_code_size = ps_tex_shader_patchinfo_code_size_gfx10;
3269				patchcode_offset = ps_tex_shader_patchinfo_offset_gfx10;
3270			}
3271			break;
3272		case PS_HANG:
3273			shader = memcpy_ps_hang;
3274			shader_size = sizeof(memcpy_ps_hang);
3275
3276			memcpy(ptr, shader, shader_size);
3277			return 0;
3278		default:
3279			return -1;
3280			break;
3281	}
3282
3283	/* write main shader program */
3284	for (i = 0 ; i < 10; i++) {
3285		mem_offset = i * shader_offset;
3286		memcpy(ptr + mem_offset, shader, shader_size);
3287	}
3288
3289	/* overwrite patch codes */
3290	for (i = 0 ; i < 10; i++) {
3291		mem_offset = i * shader_offset + patchcode_offset[0] * sizeof(uint32_t);
3292		patch_code_offset = i * patchinfo_code_size;
3293		memcpy(ptr + mem_offset,
3294			patchinfo_code + patch_code_offset,
3295			patchinfo_code_size * sizeof(uint32_t));
3296	}
3297
3298	return 0;
3299}
3300
3301/* load RectPosTexFast_VS */
3302static int amdgpu_draw_load_vs_shader(uint8_t *ptr, uint32_t version)
3303{
3304	const uint32_t *shader;
3305	uint32_t shader_size;
3306
3307	if (version == 9) {
3308		shader = vs_RectPosTexFast_shader_gfx9;
3309		shader_size = sizeof(vs_RectPosTexFast_shader_gfx9);
3310	} else if (version == 10) {
3311		shader = vs_RectPosTexFast_shader_gfx10;
3312		shader_size = sizeof(vs_RectPosTexFast_shader_gfx10);
3313	}
3314
3315	memcpy(ptr, shader, shader_size);
3316
3317	return 0;
3318}
3319
3320static int amdgpu_draw_init(uint32_t *ptr, uint32_t version)
3321{
3322	int i = 0;
3323	const uint32_t *preamblecache_ptr;
3324	uint32_t preamblecache_size;
3325
3326	/* Write context control and load shadowing register if necessary */
3327	ptr[i++] = PACKET3(PKT3_CONTEXT_CONTROL, 1);
3328	ptr[i++] = 0x80000000;
3329	ptr[i++] = 0x80000000;
3330
3331	if (version == 9) {
3332		preamblecache_ptr = preamblecache_gfx9;
3333		preamblecache_size = sizeof(preamblecache_gfx9);
3334	} else if (version == 10) {
3335		preamblecache_ptr = preamblecache_gfx10;
3336		preamblecache_size = sizeof(preamblecache_gfx10);
3337	}
3338
3339	memcpy(ptr + i, preamblecache_ptr, preamblecache_size);
3340	return i + preamblecache_size/sizeof(uint32_t);
3341}
3342
3343static int amdgpu_draw_setup_and_write_drawblt_surf_info(uint32_t *ptr,
3344							 uint64_t dst_addr,
3345							 uint32_t version,
3346							 int hang_slow)
3347{
3348	int i = 0;
3349
3350	/* setup color buffer */
3351	if (version == 9) {
3352		/* offset   reg
3353		   0xA318   CB_COLOR0_BASE
3354		   0xA319   CB_COLOR0_BASE_EXT
3355		   0xA31A   CB_COLOR0_ATTRIB2
3356		   0xA31B   CB_COLOR0_VIEW
3357		   0xA31C   CB_COLOR0_INFO
3358		   0xA31D   CB_COLOR0_ATTRIB
3359		   0xA31E   CB_COLOR0_DCC_CONTROL
3360		   0xA31F   CB_COLOR0_CMASK
3361		   0xA320   CB_COLOR0_CMASK_BASE_EXT
3362		   0xA321   CB_COLOR0_FMASK
3363		   0xA322   CB_COLOR0_FMASK_BASE_EXT
3364		   0xA323   CB_COLOR0_CLEAR_WORD0
3365		   0xA324   CB_COLOR0_CLEAR_WORD1
3366		   0xA325   CB_COLOR0_DCC_BASE
3367		   0xA326   CB_COLOR0_DCC_BASE_EXT */
3368		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 15);
3369		ptr[i++] = 0x318;
3370		ptr[i++] = dst_addr >> 8;
3371		ptr[i++] = dst_addr >> 40;
3372		ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f;
3373		ptr[i++] = 0;
3374		ptr[i++] = 0x50438;
3375		ptr[i++] = 0x10140000;
3376		i += 9;
3377
3378		/* mmCB_MRT0_EPITCH */
3379		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3380		ptr[i++] = 0x1e8;
3381		ptr[i++] = hang_slow ? 0xfff : 0x1f;
3382	} else if (version == 10) {
3383		/* 0xA318   CB_COLOR0_BASE
3384		   0xA319   CB_COLOR0_PITCH
3385		   0xA31A   CB_COLOR0_SLICE
3386		   0xA31B   CB_COLOR0_VIEW
3387		   0xA31C   CB_COLOR0_INFO
3388		   0xA31D   CB_COLOR0_ATTRIB
3389		   0xA31E   CB_COLOR0_DCC_CONTROL
3390		   0xA31F   CB_COLOR0_CMASK
3391		   0xA320   CB_COLOR0_CMASK_SLICE
3392		   0xA321   CB_COLOR0_FMASK
3393		   0xA322   CB_COLOR0_FMASK_SLICE
3394		   0xA323   CB_COLOR0_CLEAR_WORD0
3395		   0xA324   CB_COLOR0_CLEAR_WORD1
3396		   0xA325   CB_COLOR0_DCC_BASE */
3397		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 14);
3398		ptr[i++] = 0x318;
3399		ptr[i++] = dst_addr >> 8;
3400		i += 3;
3401		ptr[i++] = 0x50438;
3402		i += 9;
3403
3404		/* 0xA390   CB_COLOR0_BASE_EXT */
3405		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3406		ptr[i++] = 0x390;
3407		ptr[i++] = dst_addr >> 40;
3408
3409		/* 0xA398   CB_COLOR0_CMASK_BASE_EXT */
3410		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3411		ptr[i++] = 0x398;
3412		ptr[i++] = 0;
3413
3414		/* 0xA3A0   CB_COLOR0_FMASK_BASE_EXT */
3415		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3416		ptr[i++] = 0x3a0;
3417		ptr[i++] = 0;
3418
3419		/* 0xA3A8   CB_COLOR0_DCC_BASE_EXT */
3420		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3421		ptr[i++] = 0x3a8;
3422		ptr[i++] = 0;
3423
3424		/* 0xA3B0   CB_COLOR0_ATTRIB2 */
3425		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3426		ptr[i++] = 0x3b0;
3427		ptr[i++] = hang_slow ? 0x3ffc7ff : 0x7c01f;
3428
3429		/* 0xA3B8   CB_COLOR0_ATTRIB3 */
3430		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3431		ptr[i++] = 0x3b8;
3432		ptr[i++] = 0x9014000;
3433	}
3434
3435	/* 0xA32B   CB_COLOR1_BASE */
3436	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3437	ptr[i++] = 0x32b;
3438	ptr[i++] = 0;
3439
3440	/* 0xA33A   CB_COLOR1_BASE */
3441	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3442	ptr[i++] = 0x33a;
3443	ptr[i++] = 0;
3444
3445	/* SPI_SHADER_COL_FORMAT */
3446	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3447	ptr[i++] = 0x1c5;
3448	ptr[i++] = 9;
3449
3450	/* Setup depth buffer */
3451	if (version == 9) {
3452		/* mmDB_Z_INFO */
3453		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3454		ptr[i++] = 0xe;
3455		i += 2;
3456	} else if (version == 10) {
3457		/* mmDB_Z_INFO */
3458		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3459		ptr[i++] = 0x10;
3460		i += 2;
3461	}
3462
3463	return i;
3464}
3465
3466static int amdgpu_draw_setup_and_write_drawblt_state(uint32_t *ptr,
3467						     uint32_t version,
3468						     int hang_slow)
3469{
3470	int i = 0;
3471	const uint32_t *cached_cmd_ptr;
3472	uint32_t cached_cmd_size;
3473
3474	/* mmPA_SC_TILE_STEERING_OVERRIDE */
3475	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3476	ptr[i++] = 0xd7;
3477	ptr[i++] = 0;
3478
3479	ptr[i++] = 0xffff1000;
3480	ptr[i++] = 0xc0021000;
3481
3482	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3483	ptr[i++] = 0xd7;
3484	if (version == 9)
3485		ptr[i++] = 1;
3486	else if (version == 10)
3487		ptr[i++] = 0;
3488
3489	/* mmPA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0 */
3490	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 16);
3491	ptr[i++] = 0x2fe;
3492	i += 16;
3493
3494	/* mmPA_SC_CENTROID_PRIORITY_0 */
3495	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
3496	ptr[i++] = 0x2f5;
3497	i += 2;
3498
3499	if (version == 9) {
3500		cached_cmd_ptr = cached_cmd_gfx9;
3501		cached_cmd_size = sizeof(cached_cmd_gfx9);
3502	} else if (version == 10) {
3503		cached_cmd_ptr = cached_cmd_gfx10;
3504		cached_cmd_size = sizeof(cached_cmd_gfx10);
3505	}
3506
3507	memcpy(ptr + i, cached_cmd_ptr, cached_cmd_size);
3508	if (hang_slow)
3509		*(ptr + i + 12) = 0x8000800;
3510	i += cached_cmd_size/sizeof(uint32_t);
3511
3512	if (version == 10) {
3513		/* mmCB_RMI_GL2_CACHE_CONTROL */
3514		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3515		ptr[i++] = 0x104;
3516		ptr[i++] = 0x40aa0055;
3517		/* mmDB_RMI_L2_CACHE_CONTROL */
3518		ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3519		ptr[i++] = 0x1f;
3520		ptr[i++] = 0x2a0055;
3521	}
3522
3523	return i;
3524}
3525
3526static int amdgpu_draw_vs_RectPosTexFast_write2hw(uint32_t *ptr,
3527						  int ps_type,
3528						  uint64_t shader_addr,
3529						  uint32_t version,
3530						  int hang_slow)
3531{
3532	int i = 0;
3533
3534	/* mmPA_CL_VS_OUT_CNTL */
3535	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3536	ptr[i++] = 0x207;
3537	ptr[i++] = 0;
3538
3539	if (version == 9) {
3540		/* mmSPI_SHADER_PGM_RSRC3_VS */
3541		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3542		ptr[i++] = 0x46;
3543		ptr[i++] = 0xffff;
3544	} else if (version == 10) {
3545		/* mmSPI_SHADER_PGM_RSRC3_VS */
3546		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3547		ptr[i++] = 0x30000046;
3548		ptr[i++] = 0xffff;
3549		/* mmSPI_SHADER_PGM_RSRC4_VS */
3550		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3551		ptr[i++] = 0x30000041;
3552		ptr[i++] = 0xffff;
3553	}
3554
3555	/* mmSPI_SHADER_PGM_LO_VS...mmSPI_SHADER_PGM_HI_VS */
3556	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3557	ptr[i++] = 0x48;
3558	ptr[i++] = shader_addr >> 8;
3559	ptr[i++] = shader_addr >> 40;
3560
3561	/* mmSPI_SHADER_PGM_RSRC1_VS */
3562	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3563	ptr[i++] = 0x4a;
3564	if (version == 9)
3565		ptr[i++] = 0xc0081;
3566	else if (version == 10)
3567		ptr[i++] = 0xc0041;
3568	/* mmSPI_SHADER_PGM_RSRC2_VS */
3569	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3570	ptr[i++] = 0x4b;
3571	ptr[i++] = 0x18;
3572
3573	/* mmSPI_VS_OUT_CONFIG */
3574	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3575	ptr[i++] = 0x1b1;
3576	ptr[i++] = 2;
3577
3578	/* mmSPI_SHADER_POS_FORMAT */
3579	ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3580	ptr[i++] = 0x1c3;
3581	ptr[i++] = 4;
3582
3583	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3584	ptr[i++] = 0x4c;
3585	i += 2;
3586	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3587	ptr[i++] = hang_slow ? 0x45000000 : 0x42000000;
3588
3589	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3590	ptr[i++] = 0x50;
3591	i += 2;
3592	if (ps_type == PS_CONST) {
3593		i += 2;
3594	} else if (ps_type == PS_TEX) {
3595		ptr[i++] = 0x3f800000;
3596		ptr[i++] = 0x3f800000;
3597	}
3598
3599	ptr[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3600	ptr[i++] = 0x54;
3601	i += 4;
3602
3603	return i;
3604}
3605
3606static int amdgpu_draw_ps_write2hw(uint32_t *ptr,
3607				   int ps_type,
3608				   uint64_t shader_addr,
3609				   uint32_t version)
3610{
3611	int i, j;
3612	const uint32_t *sh_registers;
3613	const uint32_t *context_registers;
3614	uint32_t num_sh_reg, num_context_reg;
3615
3616	if (ps_type == PS_CONST) {
3617		if (version == 9) {
3618			sh_registers = (const uint32_t *)ps_const_sh_registers_gfx9;
3619			num_sh_reg = ps_num_sh_registers_gfx9;
3620		} else if (version == 10) {
3621			sh_registers = (const uint32_t *)ps_const_sh_registers_gfx10;
3622			num_sh_reg = ps_num_sh_registers_gfx10;
3623		}
3624		context_registers = (const uint32_t *)ps_const_context_reg_gfx9;
3625		num_context_reg = ps_num_context_registers_gfx9;
3626	} else if (ps_type == PS_TEX) {
3627		sh_registers = (const uint32_t *)ps_tex_sh_registers_gfx9;
3628		context_registers = (const uint32_t *)ps_tex_context_reg_gfx9;
3629		num_sh_reg = ps_num_sh_registers_gfx9;
3630		num_context_reg = ps_num_context_registers_gfx9;
3631	}
3632
3633	i = 0;
3634
3635	if (version == 9) {
3636		/* 0x2c07   SPI_SHADER_PGM_RSRC3_PS
3637		   0x2c08   SPI_SHADER_PGM_LO_PS
3638		   0x2c09   SPI_SHADER_PGM_HI_PS */
3639		/* multiplicator 9 is from  SPI_SHADER_COL_FORMAT */
3640		shader_addr += 256 * 9;
3641		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 3);
3642		ptr[i++] = 0x7;
3643		ptr[i++] = 0xffff;
3644		ptr[i++] = shader_addr >> 8;
3645		ptr[i++] = shader_addr >> 40;
3646	} else if (version == 10) {
3647		shader_addr += 256 * 9;
3648		/* 0x2c08	 SPI_SHADER_PGM_LO_PS
3649		     0x2c09	 SPI_SHADER_PGM_HI_PS */
3650		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 2);
3651		ptr[i++] = 0x8;
3652		ptr[i++] = shader_addr >> 8;
3653		ptr[i++] = shader_addr >> 40;
3654
3655		/* mmSPI_SHADER_PGM_RSRC3_PS */
3656		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3657		ptr[i++] = 0x30000007;
3658		ptr[i++] = 0xffff;
3659		/* mmSPI_SHADER_PGM_RSRC4_PS */
3660		ptr[i++] = PACKET3(PKT3_SET_SH_REG_INDEX, 1);
3661		ptr[i++] = 0x30000001;
3662		ptr[i++] = 0xffff;
3663	}
3664
3665	for (j = 0; j < num_sh_reg; j++) {
3666		ptr[i++] = PACKET3(PKT3_SET_SH_REG, 1);
3667		ptr[i++] = sh_registers[j * 2] - 0x2c00;
3668		ptr[i++] = sh_registers[j * 2 + 1];
3669	}
3670
3671	for (j = 0; j < num_context_reg; j++) {
3672		if (context_registers[j * 2] != 0xA1C5) {
3673			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3674			ptr[i++] = context_registers[j * 2] - 0xa000;
3675			ptr[i++] = context_registers[j * 2 + 1];
3676		}
3677
3678		if (context_registers[j * 2] == 0xA1B4) {
3679			ptr[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3680			ptr[i++] = 0x1b3;
3681			ptr[i++] = 2;
3682		}
3683	}
3684
3685	return i;
3686}
3687
3688static int amdgpu_draw_draw(uint32_t *ptr, uint32_t version)
3689{
3690	int i = 0;
3691
3692	if (version == 9) {
3693		/* mmIA_MULTI_VGT_PARAM */
3694		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3695		ptr[i++] = 0x40000258;
3696		ptr[i++] = 0xd00ff;
3697		/* mmVGT_PRIMITIVE_TYPE */
3698		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3699		ptr[i++] = 0x10000242;
3700		ptr[i++] = 0x11;
3701	} else if (version == 10) {
3702		/* mmGE_CNTL */
3703		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3704		ptr[i++] = 0x25b;
3705		ptr[i++] = 0xff;
3706		/* mmVGT_PRIMITIVE_TYPE */
3707		ptr[i++] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3708		ptr[i++] = 0x242;
3709		ptr[i++] = 0x11;
3710	}
3711
3712	ptr[i++] = PACKET3(PACKET3_DRAW_INDEX_AUTO, 1);
3713	ptr[i++] = 3;
3714	ptr[i++] = 2;
3715
3716	return i;
3717}
3718
3719void amdgpu_memset_draw(amdgpu_device_handle device_handle,
3720			amdgpu_bo_handle bo_shader_ps,
3721			amdgpu_bo_handle bo_shader_vs,
3722			uint64_t mc_address_shader_ps,
3723			uint64_t mc_address_shader_vs,
3724			uint32_t ring_id, uint32_t version)
3725{
3726	amdgpu_context_handle context_handle;
3727	amdgpu_bo_handle bo_dst, bo_cmd, resources[4];
3728	volatile unsigned char *ptr_dst;
3729	uint32_t *ptr_cmd;
3730	uint64_t mc_address_dst, mc_address_cmd;
3731	amdgpu_va_handle va_dst, va_cmd;
3732	int i, r;
3733	int bo_dst_size = 16384;
3734	int bo_cmd_size = 4096;
3735	struct amdgpu_cs_request ibs_request = {0};
3736	struct amdgpu_cs_ib_info ib_info = {0};
3737	struct amdgpu_cs_fence fence_status = {0};
3738	uint32_t expired;
3739	amdgpu_bo_list_handle bo_list;
3740
3741	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3742	CU_ASSERT_EQUAL(r, 0);
3743
3744	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3745					AMDGPU_GEM_DOMAIN_GTT, 0,
3746					&bo_cmd, (void **)&ptr_cmd,
3747					&mc_address_cmd, &va_cmd);
3748	CU_ASSERT_EQUAL(r, 0);
3749	memset(ptr_cmd, 0, bo_cmd_size);
3750
3751	r = amdgpu_bo_alloc_and_map(device_handle, bo_dst_size, 4096,
3752					AMDGPU_GEM_DOMAIN_VRAM, 0,
3753					&bo_dst, (void **)&ptr_dst,
3754					&mc_address_dst, &va_dst);
3755	CU_ASSERT_EQUAL(r, 0);
3756
3757	i = 0;
3758	i += amdgpu_draw_init(ptr_cmd + i, version);
3759
3760	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0);
3761
3762	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0);
3763
3764	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_vs,
3765						    version, 0);
3766
3767	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_CONST, mc_address_shader_ps, version);
3768
3769	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3770	ptr_cmd[i++] = 0xc;
3771	ptr_cmd[i++] = 0x33333333;
3772	ptr_cmd[i++] = 0x33333333;
3773	ptr_cmd[i++] = 0x33333333;
3774	ptr_cmd[i++] = 0x33333333;
3775
3776	i += amdgpu_draw_draw(ptr_cmd + i, version);
3777
3778	while (i & 7)
3779		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3780
3781	resources[0] = bo_dst;
3782	resources[1] = bo_shader_ps;
3783	resources[2] = bo_shader_vs;
3784	resources[3] = bo_cmd;
3785	r = amdgpu_bo_list_create(device_handle, 4, resources, NULL, &bo_list);
3786	CU_ASSERT_EQUAL(r, 0);
3787
3788	ib_info.ib_mc_address = mc_address_cmd;
3789	ib_info.size = i;
3790	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3791	ibs_request.ring = ring_id;
3792	ibs_request.resources = bo_list;
3793	ibs_request.number_of_ibs = 1;
3794	ibs_request.ibs = &ib_info;
3795	ibs_request.fence_info.handle = NULL;
3796
3797	/* submit CS */
3798	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3799	CU_ASSERT_EQUAL(r, 0);
3800
3801	r = amdgpu_bo_list_destroy(bo_list);
3802	CU_ASSERT_EQUAL(r, 0);
3803
3804	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3805	fence_status.ip_instance = 0;
3806	fence_status.ring = ring_id;
3807	fence_status.context = context_handle;
3808	fence_status.fence = ibs_request.seq_no;
3809
3810	/* wait for IB accomplished */
3811	r = amdgpu_cs_query_fence_status(&fence_status,
3812					 AMDGPU_TIMEOUT_INFINITE,
3813					 0, &expired);
3814	CU_ASSERT_EQUAL(r, 0);
3815	CU_ASSERT_EQUAL(expired, true);
3816
3817	/* verify if memset test result meets with expected */
3818	i = 0;
3819	while(i < bo_dst_size) {
3820		CU_ASSERT_EQUAL(ptr_dst[i++], 0x33);
3821	}
3822
3823	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_dst_size);
3824	CU_ASSERT_EQUAL(r, 0);
3825
3826	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
3827	CU_ASSERT_EQUAL(r, 0);
3828
3829	r = amdgpu_cs_ctx_free(context_handle);
3830	CU_ASSERT_EQUAL(r, 0);
3831}
3832
3833static void amdgpu_memset_draw_test(amdgpu_device_handle device_handle,
3834				    uint32_t ring, int version)
3835{
3836	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
3837	void *ptr_shader_ps;
3838	void *ptr_shader_vs;
3839	uint64_t mc_address_shader_ps, mc_address_shader_vs;
3840	amdgpu_va_handle va_shader_ps, va_shader_vs;
3841	int r;
3842	int bo_shader_size = 4096;
3843
3844	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3845					AMDGPU_GEM_DOMAIN_VRAM, 0,
3846					&bo_shader_ps, &ptr_shader_ps,
3847					&mc_address_shader_ps, &va_shader_ps);
3848	CU_ASSERT_EQUAL(r, 0);
3849	memset(ptr_shader_ps, 0, bo_shader_size);
3850
3851	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
3852					AMDGPU_GEM_DOMAIN_VRAM, 0,
3853					&bo_shader_vs, &ptr_shader_vs,
3854					&mc_address_shader_vs, &va_shader_vs);
3855	CU_ASSERT_EQUAL(r, 0);
3856	memset(ptr_shader_vs, 0, bo_shader_size);
3857
3858	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, PS_CONST, version);
3859	CU_ASSERT_EQUAL(r, 0);
3860
3861	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
3862	CU_ASSERT_EQUAL(r, 0);
3863
3864	amdgpu_memset_draw(device_handle, bo_shader_ps, bo_shader_vs,
3865			mc_address_shader_ps, mc_address_shader_vs,
3866			ring, version);
3867
3868	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
3869	CU_ASSERT_EQUAL(r, 0);
3870
3871	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
3872	CU_ASSERT_EQUAL(r, 0);
3873}
3874
3875static void amdgpu_memcpy_draw(amdgpu_device_handle device_handle,
3876			       amdgpu_bo_handle bo_shader_ps,
3877			       amdgpu_bo_handle bo_shader_vs,
3878			       uint64_t mc_address_shader_ps,
3879			       uint64_t mc_address_shader_vs,
3880			       uint32_t ring, int version, int hang)
3881{
3882	amdgpu_context_handle context_handle;
3883	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
3884	volatile unsigned char *ptr_dst;
3885	unsigned char *ptr_src;
3886	uint32_t *ptr_cmd;
3887	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
3888	amdgpu_va_handle va_dst, va_src, va_cmd;
3889	int i, r;
3890	int bo_size = 16384;
3891	int bo_cmd_size = 4096;
3892	struct amdgpu_cs_request ibs_request = {0};
3893	struct amdgpu_cs_ib_info ib_info= {0};
3894	uint32_t hang_state, hangs;
3895	uint32_t expired;
3896	amdgpu_bo_list_handle bo_list;
3897	struct amdgpu_cs_fence fence_status = {0};
3898
3899	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
3900	CU_ASSERT_EQUAL(r, 0);
3901
3902	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
3903				    AMDGPU_GEM_DOMAIN_GTT, 0,
3904				    &bo_cmd, (void **)&ptr_cmd,
3905				    &mc_address_cmd, &va_cmd);
3906	CU_ASSERT_EQUAL(r, 0);
3907	memset(ptr_cmd, 0, bo_cmd_size);
3908
3909	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3910					AMDGPU_GEM_DOMAIN_VRAM, 0,
3911					&bo_src, (void **)&ptr_src,
3912					&mc_address_src, &va_src);
3913	CU_ASSERT_EQUAL(r, 0);
3914
3915	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
3916					AMDGPU_GEM_DOMAIN_VRAM, 0,
3917					&bo_dst, (void **)&ptr_dst,
3918					&mc_address_dst, &va_dst);
3919	CU_ASSERT_EQUAL(r, 0);
3920
3921	memset(ptr_src, 0x55, bo_size);
3922
3923	i = 0;
3924	i += amdgpu_draw_init(ptr_cmd + i, version);
3925
3926	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 0);
3927
3928	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 0);
3929
3930	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_vs,
3931						    version, 0);
3932
3933	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version);
3934
3935	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
3936	if (version == 9) {
3937		ptr_cmd[i++] = 0xc;
3938		ptr_cmd[i++] = mc_address_src >> 8;
3939		ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
3940		ptr_cmd[i++] = 0x7c01f;
3941		ptr_cmd[i++] = 0x90500fac;
3942		ptr_cmd[i++] = 0x3e000;
3943		i += 3;
3944	} else if (version == 10) {
3945		ptr_cmd[i++] = 0xc;
3946		ptr_cmd[i++] = mc_address_src >> 8;
3947		ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000;
3948		ptr_cmd[i++] = 0x8007c007;
3949		ptr_cmd[i++] = 0x90500fac;
3950		i += 2;
3951		ptr_cmd[i++] = 0x400;
3952		i++;
3953	}
3954
3955	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
3956	ptr_cmd[i++] = 0x14;
3957	ptr_cmd[i++] = 0x92;
3958	i += 3;
3959
3960	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
3961	ptr_cmd[i++] = 0x191;
3962	ptr_cmd[i++] = 0;
3963
3964	i += amdgpu_draw_draw(ptr_cmd + i, version);
3965
3966	while (i & 7)
3967		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
3968
3969	resources[0] = bo_dst;
3970	resources[1] = bo_src;
3971	resources[2] = bo_shader_ps;
3972	resources[3] = bo_shader_vs;
3973	resources[4] = bo_cmd;
3974	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
3975	CU_ASSERT_EQUAL(r, 0);
3976
3977	ib_info.ib_mc_address = mc_address_cmd;
3978	ib_info.size = i;
3979	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
3980	ibs_request.ring = ring;
3981	ibs_request.resources = bo_list;
3982	ibs_request.number_of_ibs = 1;
3983	ibs_request.ibs = &ib_info;
3984	ibs_request.fence_info.handle = NULL;
3985	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
3986	CU_ASSERT_EQUAL(r, 0);
3987
3988	fence_status.ip_type = AMDGPU_HW_IP_GFX;
3989	fence_status.ip_instance = 0;
3990	fence_status.ring = ring;
3991	fence_status.context = context_handle;
3992	fence_status.fence = ibs_request.seq_no;
3993
3994	/* wait for IB accomplished */
3995	r = amdgpu_cs_query_fence_status(&fence_status,
3996					 AMDGPU_TIMEOUT_INFINITE,
3997					 0, &expired);
3998	if (!hang) {
3999		CU_ASSERT_EQUAL(r, 0);
4000		CU_ASSERT_EQUAL(expired, true);
4001
4002		/* verify if memcpy test result meets with expected */
4003		i = 0;
4004		while(i < bo_size) {
4005			CU_ASSERT_EQUAL(ptr_dst[i], ptr_src[i]);
4006			i++;
4007		}
4008	} else {
4009		r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4010		CU_ASSERT_EQUAL(r, 0);
4011		CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4012	}
4013
4014	r = amdgpu_bo_list_destroy(bo_list);
4015	CU_ASSERT_EQUAL(r, 0);
4016
4017	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
4018	CU_ASSERT_EQUAL(r, 0);
4019	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
4020	CU_ASSERT_EQUAL(r, 0);
4021
4022	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
4023	CU_ASSERT_EQUAL(r, 0);
4024
4025	r = amdgpu_cs_ctx_free(context_handle);
4026	CU_ASSERT_EQUAL(r, 0);
4027}
4028
4029void amdgpu_memcpy_draw_test(amdgpu_device_handle device_handle, uint32_t ring,
4030			     int version, int hang)
4031{
4032	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
4033	void *ptr_shader_ps;
4034	void *ptr_shader_vs;
4035	uint64_t mc_address_shader_ps, mc_address_shader_vs;
4036	amdgpu_va_handle va_shader_ps, va_shader_vs;
4037	int bo_shader_size = 4096;
4038	enum ps_type ps_type = hang ? PS_HANG : PS_TEX;
4039	int r;
4040
4041	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
4042					AMDGPU_GEM_DOMAIN_VRAM, 0,
4043					&bo_shader_ps, &ptr_shader_ps,
4044					&mc_address_shader_ps, &va_shader_ps);
4045	CU_ASSERT_EQUAL(r, 0);
4046	memset(ptr_shader_ps, 0, bo_shader_size);
4047
4048	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_size, 4096,
4049					AMDGPU_GEM_DOMAIN_VRAM, 0,
4050					&bo_shader_vs, &ptr_shader_vs,
4051					&mc_address_shader_vs, &va_shader_vs);
4052	CU_ASSERT_EQUAL(r, 0);
4053	memset(ptr_shader_vs, 0, bo_shader_size);
4054
4055	r = amdgpu_draw_load_ps_shader(ptr_shader_ps, ps_type, version);
4056	CU_ASSERT_EQUAL(r, 0);
4057
4058	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
4059	CU_ASSERT_EQUAL(r, 0);
4060
4061	amdgpu_memcpy_draw(device_handle, bo_shader_ps, bo_shader_vs,
4062			mc_address_shader_ps, mc_address_shader_vs,
4063			ring, version, hang);
4064
4065	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_size);
4066	CU_ASSERT_EQUAL(r, 0);
4067
4068	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_size);
4069	CU_ASSERT_EQUAL(r, 0);
4070}
4071
4072static void amdgpu_draw_test(void)
4073{
4074	int r;
4075	struct drm_amdgpu_info_hw_ip info;
4076	uint32_t ring_id, version;
4077
4078	r = amdgpu_query_hw_ip_info(device_handle, AMDGPU_HW_IP_GFX, 0, &info);
4079	CU_ASSERT_EQUAL(r, 0);
4080	if (!info.available_rings)
4081		printf("SKIP ... as there's no graphics ring\n");
4082
4083	version = info.hw_ip_version_major;
4084	if (version != 9 && version != 10) {
4085		printf("SKIP ... unsupported gfx version %d\n", version);
4086		return;
4087	}
4088
4089	for (ring_id = 0; (1 << ring_id) & info.available_rings; ring_id++) {
4090		amdgpu_memset_draw_test(device_handle, ring_id, version);
4091		amdgpu_memcpy_draw_test(device_handle, ring_id, version, 0);
4092	}
4093}
4094
4095void amdgpu_memcpy_draw_hang_slow_test(amdgpu_device_handle device_handle, uint32_t ring, int version)
4096{
4097	amdgpu_context_handle context_handle;
4098	amdgpu_bo_handle bo_shader_ps, bo_shader_vs;
4099	amdgpu_bo_handle bo_dst, bo_src, bo_cmd, resources[5];
4100	void *ptr_shader_ps;
4101	void *ptr_shader_vs;
4102	volatile unsigned char *ptr_dst;
4103	unsigned char *ptr_src;
4104	uint32_t *ptr_cmd;
4105	uint64_t mc_address_dst, mc_address_src, mc_address_cmd;
4106	uint64_t mc_address_shader_ps, mc_address_shader_vs;
4107	amdgpu_va_handle va_shader_ps, va_shader_vs;
4108	amdgpu_va_handle va_dst, va_src, va_cmd;
4109	struct amdgpu_gpu_info gpu_info = {0};
4110	int i, r;
4111	int bo_size = 0x4000000;
4112	int bo_shader_ps_size = 0x400000;
4113	int bo_shader_vs_size = 4096;
4114	int bo_cmd_size = 4096;
4115	struct amdgpu_cs_request ibs_request = {0};
4116	struct amdgpu_cs_ib_info ib_info= {0};
4117	uint32_t hang_state, hangs, expired;
4118	amdgpu_bo_list_handle bo_list;
4119	struct amdgpu_cs_fence fence_status = {0};
4120
4121	r = amdgpu_query_gpu_info(device_handle, &gpu_info);
4122	CU_ASSERT_EQUAL(r, 0);
4123
4124	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4125	CU_ASSERT_EQUAL(r, 0);
4126
4127	r = amdgpu_bo_alloc_and_map(device_handle, bo_cmd_size, 4096,
4128				    AMDGPU_GEM_DOMAIN_GTT, 0,
4129				    &bo_cmd, (void **)&ptr_cmd,
4130				    &mc_address_cmd, &va_cmd);
4131	CU_ASSERT_EQUAL(r, 0);
4132	memset(ptr_cmd, 0, bo_cmd_size);
4133
4134	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_ps_size, 4096,
4135					AMDGPU_GEM_DOMAIN_VRAM, 0,
4136					&bo_shader_ps, &ptr_shader_ps,
4137					&mc_address_shader_ps, &va_shader_ps);
4138	CU_ASSERT_EQUAL(r, 0);
4139	memset(ptr_shader_ps, 0, bo_shader_ps_size);
4140
4141	r = amdgpu_bo_alloc_and_map(device_handle, bo_shader_vs_size, 4096,
4142					AMDGPU_GEM_DOMAIN_VRAM, 0,
4143					&bo_shader_vs, &ptr_shader_vs,
4144					&mc_address_shader_vs, &va_shader_vs);
4145	CU_ASSERT_EQUAL(r, 0);
4146	memset(ptr_shader_vs, 0, bo_shader_vs_size);
4147
4148	r = amdgpu_draw_load_ps_shader_hang_slow(ptr_shader_ps, gpu_info.family_id);
4149	CU_ASSERT_EQUAL(r, 0);
4150
4151	r = amdgpu_draw_load_vs_shader(ptr_shader_vs, version);
4152	CU_ASSERT_EQUAL(r, 0);
4153
4154	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
4155					AMDGPU_GEM_DOMAIN_VRAM, 0,
4156					&bo_src, (void **)&ptr_src,
4157					&mc_address_src, &va_src);
4158	CU_ASSERT_EQUAL(r, 0);
4159
4160	r = amdgpu_bo_alloc_and_map(device_handle, bo_size, 4096,
4161					AMDGPU_GEM_DOMAIN_VRAM, 0,
4162					&bo_dst, (void **)&ptr_dst,
4163					&mc_address_dst, &va_dst);
4164	CU_ASSERT_EQUAL(r, 0);
4165
4166	memset(ptr_src, 0x55, bo_size);
4167
4168	i = 0;
4169	i += amdgpu_draw_init(ptr_cmd + i, version);
4170
4171	i += amdgpu_draw_setup_and_write_drawblt_surf_info(ptr_cmd + i, mc_address_dst, version, 1);
4172
4173	i += amdgpu_draw_setup_and_write_drawblt_state(ptr_cmd + i, version, 1);
4174
4175	i += amdgpu_draw_vs_RectPosTexFast_write2hw(ptr_cmd + i, PS_TEX,
4176							mc_address_shader_vs, version, 1);
4177
4178	i += amdgpu_draw_ps_write2hw(ptr_cmd + i, PS_TEX, mc_address_shader_ps, version);
4179
4180	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 8);
4181
4182	if (version == 9) {
4183		ptr_cmd[i++] = 0xc;
4184		ptr_cmd[i++] = mc_address_src >> 8;
4185		ptr_cmd[i++] = mc_address_src >> 40 | 0x10e00000;
4186		ptr_cmd[i++] = 0x1ffcfff;
4187		ptr_cmd[i++] = 0x90500fac;
4188		ptr_cmd[i++] = 0x1ffe000;
4189		i += 3;
4190	} else if (version == 10) {
4191		ptr_cmd[i++] = 0xc;
4192		ptr_cmd[i++] = mc_address_src >> 8;
4193		ptr_cmd[i++] = mc_address_src >> 40 | 0xc4b00000;
4194		ptr_cmd[i++] = 0x81ffc1ff;
4195		ptr_cmd[i++] = 0x90500fac;
4196		i += 4;
4197	}
4198
4199	ptr_cmd[i++] = PACKET3(PKT3_SET_SH_REG, 4);
4200	ptr_cmd[i++] = 0x14;
4201	ptr_cmd[i++] = 0x92;
4202	i += 3;
4203
4204	ptr_cmd[i++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
4205	ptr_cmd[i++] = 0x191;
4206	ptr_cmd[i++] = 0;
4207
4208	i += amdgpu_draw_draw(ptr_cmd + i, version);
4209
4210	while (i & 7)
4211		ptr_cmd[i++] = 0xffff1000; /* type3 nop packet */
4212
4213	resources[0] = bo_dst;
4214	resources[1] = bo_src;
4215	resources[2] = bo_shader_ps;
4216	resources[3] = bo_shader_vs;
4217	resources[4] = bo_cmd;
4218	r = amdgpu_bo_list_create(device_handle, 5, resources, NULL, &bo_list);
4219	CU_ASSERT_EQUAL(r, 0);
4220
4221	ib_info.ib_mc_address = mc_address_cmd;
4222	ib_info.size = i;
4223	ibs_request.ip_type = AMDGPU_HW_IP_GFX;
4224	ibs_request.ring = ring;
4225	ibs_request.resources = bo_list;
4226	ibs_request.number_of_ibs = 1;
4227	ibs_request.ibs = &ib_info;
4228	ibs_request.fence_info.handle = NULL;
4229	r = amdgpu_cs_submit(context_handle, 0, &ibs_request, 1);
4230	CU_ASSERT_EQUAL(r, 0);
4231
4232	fence_status.ip_type = AMDGPU_HW_IP_GFX;
4233	fence_status.ip_instance = 0;
4234	fence_status.ring = ring;
4235	fence_status.context = context_handle;
4236	fence_status.fence = ibs_request.seq_no;
4237
4238	/* wait for IB accomplished */
4239	r = amdgpu_cs_query_fence_status(&fence_status,
4240					 AMDGPU_TIMEOUT_INFINITE,
4241					 0, &expired);
4242
4243	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4244	CU_ASSERT_EQUAL(r, 0);
4245	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4246
4247	r = amdgpu_bo_list_destroy(bo_list);
4248	CU_ASSERT_EQUAL(r, 0);
4249
4250	r = amdgpu_bo_unmap_and_free(bo_dst, va_dst, mc_address_dst, bo_size);
4251	CU_ASSERT_EQUAL(r, 0);
4252	r = amdgpu_bo_unmap_and_free(bo_src, va_src, mc_address_src, bo_size);
4253	CU_ASSERT_EQUAL(r, 0);
4254
4255	r = amdgpu_bo_unmap_and_free(bo_cmd, va_cmd, mc_address_cmd, bo_cmd_size);
4256	CU_ASSERT_EQUAL(r, 0);
4257
4258	r = amdgpu_bo_unmap_and_free(bo_shader_ps, va_shader_ps, mc_address_shader_ps, bo_shader_ps_size);
4259	CU_ASSERT_EQUAL(r, 0);
4260	r = amdgpu_bo_unmap_and_free(bo_shader_vs, va_shader_vs, mc_address_shader_vs, bo_shader_vs_size);
4261	CU_ASSERT_EQUAL(r, 0);
4262
4263	r = amdgpu_cs_ctx_free(context_handle);
4264	CU_ASSERT_EQUAL(r, 0);
4265}
4266
4267static void amdgpu_gpu_reset_test(void)
4268{
4269	int r;
4270	char debugfs_path[256], tmp[10];
4271	int fd;
4272	struct stat sbuf;
4273	amdgpu_context_handle context_handle;
4274	uint32_t hang_state, hangs;
4275
4276	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4277	CU_ASSERT_EQUAL(r, 0);
4278
4279	r = fstat(drm_amdgpu[0], &sbuf);
4280	CU_ASSERT_EQUAL(r, 0);
4281
4282	sprintf(debugfs_path, "/sys/kernel/debug/dri/%d/amdgpu_gpu_recover", minor(sbuf.st_rdev));
4283	fd = open(debugfs_path, O_RDONLY);
4284	CU_ASSERT(fd >= 0);
4285
4286	r = read(fd, tmp, sizeof(tmp)/sizeof(char));
4287	CU_ASSERT(r > 0);
4288
4289	r = amdgpu_cs_query_reset_state(context_handle, &hang_state, &hangs);
4290	CU_ASSERT_EQUAL(r, 0);
4291	CU_ASSERT_EQUAL(hang_state, AMDGPU_CTX_UNKNOWN_RESET);
4292
4293	close(fd);
4294	r = amdgpu_cs_ctx_free(context_handle);
4295	CU_ASSERT_EQUAL(r, 0);
4296
4297	amdgpu_compute_dispatch_test();
4298	amdgpu_gfx_dispatch_test();
4299}
4300
4301static void amdgpu_stable_pstate_test(void)
4302{
4303	int r;
4304	amdgpu_context_handle context_handle;
4305	uint32_t current_pstate = 0, new_pstate = 0;
4306
4307	r = amdgpu_cs_ctx_create(device_handle, &context_handle);
4308	CU_ASSERT_EQUAL(r, 0);
4309
4310	r = amdgpu_cs_ctx_stable_pstate(context_handle,
4311					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
4312					0, &current_pstate);
4313	CU_ASSERT_EQUAL(r, 0);
4314	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_NONE);
4315
4316	r = amdgpu_cs_ctx_stable_pstate(context_handle,
4317					AMDGPU_CTX_OP_SET_STABLE_PSTATE,
4318					AMDGPU_CTX_STABLE_PSTATE_PEAK, NULL);
4319	CU_ASSERT_EQUAL(r, 0);
4320
4321	r = amdgpu_cs_ctx_stable_pstate(context_handle,
4322					AMDGPU_CTX_OP_GET_STABLE_PSTATE,
4323					0, &new_pstate);
4324	CU_ASSERT_EQUAL(r, 0);
4325	CU_ASSERT_EQUAL(new_pstate, AMDGPU_CTX_STABLE_PSTATE_PEAK);
4326
4327	r = amdgpu_cs_ctx_free(context_handle);
4328	CU_ASSERT_EQUAL(r, 0);
4329}
4330