18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * Copyright (C) 2014-2018 Etnaviv Project
48c2ecf20Sopenharmony_ci */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <drm/drm_drv.h>
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_ci#include "etnaviv_cmdbuf.h"
98c2ecf20Sopenharmony_ci#include "etnaviv_gpu.h"
108c2ecf20Sopenharmony_ci#include "etnaviv_gem.h"
118c2ecf20Sopenharmony_ci#include "etnaviv_mmu.h"
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include "common.xml.h"
148c2ecf20Sopenharmony_ci#include "state.xml.h"
158c2ecf20Sopenharmony_ci#include "state_blt.xml.h"
168c2ecf20Sopenharmony_ci#include "state_hi.xml.h"
178c2ecf20Sopenharmony_ci#include "state_3d.xml.h"
188c2ecf20Sopenharmony_ci#include "cmdstream.xml.h"
198c2ecf20Sopenharmony_ci
208c2ecf20Sopenharmony_ci/*
218c2ecf20Sopenharmony_ci * Command Buffer helper:
228c2ecf20Sopenharmony_ci */
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_ci
258c2ecf20Sopenharmony_cistatic inline void OUT(struct etnaviv_cmdbuf *buffer, u32 data)
268c2ecf20Sopenharmony_ci{
278c2ecf20Sopenharmony_ci	u32 *vaddr = (u32 *)buffer->vaddr;
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	BUG_ON(buffer->user_size >= buffer->size);
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	vaddr[buffer->user_size / 4] = data;
328c2ecf20Sopenharmony_ci	buffer->user_size += 4;
338c2ecf20Sopenharmony_ci}
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_cistatic inline void CMD_LOAD_STATE(struct etnaviv_cmdbuf *buffer,
368c2ecf20Sopenharmony_ci	u32 reg, u32 value)
378c2ecf20Sopenharmony_ci{
388c2ecf20Sopenharmony_ci	u32 index = reg >> VIV_FE_LOAD_STATE_HEADER_OFFSET__SHR;
398c2ecf20Sopenharmony_ci
408c2ecf20Sopenharmony_ci	buffer->user_size = ALIGN(buffer->user_size, 8);
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_ci	/* write a register via cmd stream */
438c2ecf20Sopenharmony_ci	OUT(buffer, VIV_FE_LOAD_STATE_HEADER_OP_LOAD_STATE |
448c2ecf20Sopenharmony_ci		    VIV_FE_LOAD_STATE_HEADER_COUNT(1) |
458c2ecf20Sopenharmony_ci		    VIV_FE_LOAD_STATE_HEADER_OFFSET(index));
468c2ecf20Sopenharmony_ci	OUT(buffer, value);
478c2ecf20Sopenharmony_ci}
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_cistatic inline void CMD_END(struct etnaviv_cmdbuf *buffer)
508c2ecf20Sopenharmony_ci{
518c2ecf20Sopenharmony_ci	buffer->user_size = ALIGN(buffer->user_size, 8);
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci	OUT(buffer, VIV_FE_END_HEADER_OP_END);
548c2ecf20Sopenharmony_ci}
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_cistatic inline void CMD_WAIT(struct etnaviv_cmdbuf *buffer)
578c2ecf20Sopenharmony_ci{
588c2ecf20Sopenharmony_ci	buffer->user_size = ALIGN(buffer->user_size, 8);
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	OUT(buffer, VIV_FE_WAIT_HEADER_OP_WAIT | 200);
618c2ecf20Sopenharmony_ci}
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_cistatic inline void CMD_LINK(struct etnaviv_cmdbuf *buffer,
648c2ecf20Sopenharmony_ci	u16 prefetch, u32 address)
658c2ecf20Sopenharmony_ci{
668c2ecf20Sopenharmony_ci	buffer->user_size = ALIGN(buffer->user_size, 8);
678c2ecf20Sopenharmony_ci
688c2ecf20Sopenharmony_ci	OUT(buffer, VIV_FE_LINK_HEADER_OP_LINK |
698c2ecf20Sopenharmony_ci		    VIV_FE_LINK_HEADER_PREFETCH(prefetch));
708c2ecf20Sopenharmony_ci	OUT(buffer, address);
718c2ecf20Sopenharmony_ci}
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_cistatic inline void CMD_STALL(struct etnaviv_cmdbuf *buffer,
748c2ecf20Sopenharmony_ci	u32 from, u32 to)
758c2ecf20Sopenharmony_ci{
768c2ecf20Sopenharmony_ci	buffer->user_size = ALIGN(buffer->user_size, 8);
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	OUT(buffer, VIV_FE_STALL_HEADER_OP_STALL);
798c2ecf20Sopenharmony_ci	OUT(buffer, VIV_FE_STALL_TOKEN_FROM(from) | VIV_FE_STALL_TOKEN_TO(to));
808c2ecf20Sopenharmony_ci}
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_cistatic inline void CMD_SEM(struct etnaviv_cmdbuf *buffer, u32 from, u32 to)
838c2ecf20Sopenharmony_ci{
848c2ecf20Sopenharmony_ci	CMD_LOAD_STATE(buffer, VIVS_GL_SEMAPHORE_TOKEN,
858c2ecf20Sopenharmony_ci		       VIVS_GL_SEMAPHORE_TOKEN_FROM(from) |
868c2ecf20Sopenharmony_ci		       VIVS_GL_SEMAPHORE_TOKEN_TO(to));
878c2ecf20Sopenharmony_ci}
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_cistatic void etnaviv_cmd_select_pipe(struct etnaviv_gpu *gpu,
908c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buffer, u8 pipe)
918c2ecf20Sopenharmony_ci{
928c2ecf20Sopenharmony_ci	u32 flush = 0;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	lockdep_assert_held(&gpu->lock);
958c2ecf20Sopenharmony_ci
968c2ecf20Sopenharmony_ci	/*
978c2ecf20Sopenharmony_ci	 * This assumes that if we're switching to 2D, we're switching
988c2ecf20Sopenharmony_ci	 * away from 3D, and vice versa.  Hence, if we're switching to
998c2ecf20Sopenharmony_ci	 * the 2D core, we need to flush the 3D depth and color caches,
1008c2ecf20Sopenharmony_ci	 * otherwise we need to flush the 2D pixel engine cache.
1018c2ecf20Sopenharmony_ci	 */
1028c2ecf20Sopenharmony_ci	if (gpu->exec_state == ETNA_PIPE_2D)
1038c2ecf20Sopenharmony_ci		flush = VIVS_GL_FLUSH_CACHE_PE2D;
1048c2ecf20Sopenharmony_ci	else if (gpu->exec_state == ETNA_PIPE_3D)
1058c2ecf20Sopenharmony_ci		flush = VIVS_GL_FLUSH_CACHE_DEPTH | VIVS_GL_FLUSH_CACHE_COLOR;
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
1088c2ecf20Sopenharmony_ci	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
1098c2ecf20Sopenharmony_ci	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ci	CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
1128c2ecf20Sopenharmony_ci		       VIVS_GL_PIPE_SELECT_PIPE(pipe));
1138c2ecf20Sopenharmony_ci}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_cistatic void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
1168c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buf, u32 off, u32 len)
1178c2ecf20Sopenharmony_ci{
1188c2ecf20Sopenharmony_ci	u32 size = buf->size;
1198c2ecf20Sopenharmony_ci	u32 *ptr = buf->vaddr + off;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
1228c2ecf20Sopenharmony_ci			ptr, etnaviv_cmdbuf_get_va(buf,
1238c2ecf20Sopenharmony_ci			&gpu->mmu_context->cmdbuf_mapping) +
1248c2ecf20Sopenharmony_ci			off, size - len * 4 - off);
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
1278c2ecf20Sopenharmony_ci			ptr, len * 4, 0);
1288c2ecf20Sopenharmony_ci}
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci/*
1318c2ecf20Sopenharmony_ci * Safely replace the WAIT of a waitlink with a new command and argument.
1328c2ecf20Sopenharmony_ci * The GPU may be executing this WAIT while we're modifying it, so we have
1338c2ecf20Sopenharmony_ci * to write it in a specific order to avoid the GPU branching to somewhere
1348c2ecf20Sopenharmony_ci * else.  'wl_offset' is the offset to the first byte of the WAIT command.
1358c2ecf20Sopenharmony_ci */
1368c2ecf20Sopenharmony_cistatic void etnaviv_buffer_replace_wait(struct etnaviv_cmdbuf *buffer,
1378c2ecf20Sopenharmony_ci	unsigned int wl_offset, u32 cmd, u32 arg)
1388c2ecf20Sopenharmony_ci{
1398c2ecf20Sopenharmony_ci	u32 *lw = buffer->vaddr + wl_offset;
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	lw[1] = arg;
1428c2ecf20Sopenharmony_ci	mb();
1438c2ecf20Sopenharmony_ci	lw[0] = cmd;
1448c2ecf20Sopenharmony_ci	mb();
1458c2ecf20Sopenharmony_ci}
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci/*
1488c2ecf20Sopenharmony_ci * Ensure that there is space in the command buffer to contiguously write
1498c2ecf20Sopenharmony_ci * 'cmd_dwords' 64-bit words into the buffer, wrapping if necessary.
1508c2ecf20Sopenharmony_ci */
1518c2ecf20Sopenharmony_cistatic u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
1528c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buffer, unsigned int cmd_dwords)
1538c2ecf20Sopenharmony_ci{
1548c2ecf20Sopenharmony_ci	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
1558c2ecf20Sopenharmony_ci		buffer->user_size = 0;
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	return etnaviv_cmdbuf_get_va(buffer,
1588c2ecf20Sopenharmony_ci				     &gpu->mmu_context->cmdbuf_mapping) +
1598c2ecf20Sopenharmony_ci	       buffer->user_size;
1608c2ecf20Sopenharmony_ci}
1618c2ecf20Sopenharmony_ci
1628c2ecf20Sopenharmony_ciu16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
1638c2ecf20Sopenharmony_ci{
1648c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
1658c2ecf20Sopenharmony_ci
1668c2ecf20Sopenharmony_ci	lockdep_assert_held(&gpu->lock);
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	/* initialize buffer */
1698c2ecf20Sopenharmony_ci	buffer->user_size = 0;
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	CMD_WAIT(buffer);
1728c2ecf20Sopenharmony_ci	CMD_LINK(buffer, 2,
1738c2ecf20Sopenharmony_ci		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
1748c2ecf20Sopenharmony_ci		 + buffer->user_size - 4);
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	return buffer->user_size / 8;
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ciu16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr)
1808c2ecf20Sopenharmony_ci{
1818c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	lockdep_assert_held(&gpu->lock);
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci	buffer->user_size = 0;
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	if (gpu->identity.features & chipFeatures_PIPE_3D) {
1888c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
1898c2ecf20Sopenharmony_ci			       VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_3D));
1908c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
1918c2ecf20Sopenharmony_ci			mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
1928c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
1938c2ecf20Sopenharmony_ci		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
1948c2ecf20Sopenharmony_ci		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
1958c2ecf20Sopenharmony_ci	}
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	if (gpu->identity.features & chipFeatures_PIPE_2D) {
1988c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_GL_PIPE_SELECT,
1998c2ecf20Sopenharmony_ci			       VIVS_GL_PIPE_SELECT_PIPE(ETNA_PIPE_2D));
2008c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
2018c2ecf20Sopenharmony_ci			mtlb_addr | VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K);
2028c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_MMUv2_SAFE_ADDRESS, safe_addr);
2038c2ecf20Sopenharmony_ci		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
2048c2ecf20Sopenharmony_ci		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
2058c2ecf20Sopenharmony_ci	}
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_ci	CMD_END(buffer);
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci	buffer->user_size = ALIGN(buffer->user_size, 8);
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_ci	return buffer->user_size / 8;
2128c2ecf20Sopenharmony_ci}
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ciu16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)
2158c2ecf20Sopenharmony_ci{
2168c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	lockdep_assert_held(&gpu->lock);
2198c2ecf20Sopenharmony_ci
2208c2ecf20Sopenharmony_ci	buffer->user_size = 0;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci	CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,
2238c2ecf20Sopenharmony_ci		       VIVS_MMUv2_PTA_CONFIG_INDEX(id));
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	CMD_END(buffer);
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci	buffer->user_size = ALIGN(buffer->user_size, 8);
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	return buffer->user_size / 8;
2308c2ecf20Sopenharmony_ci}
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_civoid etnaviv_buffer_end(struct etnaviv_gpu *gpu)
2338c2ecf20Sopenharmony_ci{
2348c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
2358c2ecf20Sopenharmony_ci	unsigned int waitlink_offset = buffer->user_size - 16;
2368c2ecf20Sopenharmony_ci	u32 link_target, flush = 0;
2378c2ecf20Sopenharmony_ci	bool has_blt = !!(gpu->identity.minor_features5 &
2388c2ecf20Sopenharmony_ci			  chipMinorFeatures5_BLT_ENGINE);
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	lockdep_assert_held(&gpu->lock);
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	if (gpu->exec_state == ETNA_PIPE_2D)
2438c2ecf20Sopenharmony_ci		flush = VIVS_GL_FLUSH_CACHE_PE2D;
2448c2ecf20Sopenharmony_ci	else if (gpu->exec_state == ETNA_PIPE_3D)
2458c2ecf20Sopenharmony_ci		flush = VIVS_GL_FLUSH_CACHE_DEPTH |
2468c2ecf20Sopenharmony_ci			VIVS_GL_FLUSH_CACHE_COLOR |
2478c2ecf20Sopenharmony_ci			VIVS_GL_FLUSH_CACHE_TEXTURE |
2488c2ecf20Sopenharmony_ci			VIVS_GL_FLUSH_CACHE_TEXTUREVS |
2498c2ecf20Sopenharmony_ci			VIVS_GL_FLUSH_CACHE_SHADER_L2;
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	if (flush) {
2528c2ecf20Sopenharmony_ci		unsigned int dwords = 7;
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_ci		if (has_blt)
2558c2ecf20Sopenharmony_ci			dwords += 10;
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci		link_target = etnaviv_buffer_reserve(gpu, buffer, dwords);
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ci		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
2608c2ecf20Sopenharmony_ci		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
2618c2ecf20Sopenharmony_ci		if (has_blt) {
2628c2ecf20Sopenharmony_ci			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
2638c2ecf20Sopenharmony_ci			CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
2648c2ecf20Sopenharmony_ci			CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
2658c2ecf20Sopenharmony_ci			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
2668c2ecf20Sopenharmony_ci		}
2678c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE, flush);
2688c2ecf20Sopenharmony_ci		if (gpu->exec_state == ETNA_PIPE_3D) {
2698c2ecf20Sopenharmony_ci			if (has_blt) {
2708c2ecf20Sopenharmony_ci				CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
2718c2ecf20Sopenharmony_ci				CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
2728c2ecf20Sopenharmony_ci				CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
2738c2ecf20Sopenharmony_ci			} else {
2748c2ecf20Sopenharmony_ci				CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
2758c2ecf20Sopenharmony_ci					       VIVS_TS_FLUSH_CACHE_FLUSH);
2768c2ecf20Sopenharmony_ci			}
2778c2ecf20Sopenharmony_ci		}
2788c2ecf20Sopenharmony_ci		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
2798c2ecf20Sopenharmony_ci		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
2808c2ecf20Sopenharmony_ci		if (has_blt) {
2818c2ecf20Sopenharmony_ci			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
2828c2ecf20Sopenharmony_ci			CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
2838c2ecf20Sopenharmony_ci			CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
2848c2ecf20Sopenharmony_ci			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
2858c2ecf20Sopenharmony_ci		}
2868c2ecf20Sopenharmony_ci		CMD_END(buffer);
2878c2ecf20Sopenharmony_ci
2888c2ecf20Sopenharmony_ci		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
2898c2ecf20Sopenharmony_ci					    VIV_FE_LINK_HEADER_OP_LINK |
2908c2ecf20Sopenharmony_ci					    VIV_FE_LINK_HEADER_PREFETCH(dwords),
2918c2ecf20Sopenharmony_ci					    link_target);
2928c2ecf20Sopenharmony_ci	} else {
2938c2ecf20Sopenharmony_ci		/* Replace the last link-wait with an "END" command */
2948c2ecf20Sopenharmony_ci		etnaviv_buffer_replace_wait(buffer, waitlink_offset,
2958c2ecf20Sopenharmony_ci					    VIV_FE_END_HEADER_OP_END, 0);
2968c2ecf20Sopenharmony_ci	}
2978c2ecf20Sopenharmony_ci}
2988c2ecf20Sopenharmony_ci
2998c2ecf20Sopenharmony_ci/* Append a 'sync point' to the ring buffer. */
3008c2ecf20Sopenharmony_civoid etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)
3018c2ecf20Sopenharmony_ci{
3028c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
3038c2ecf20Sopenharmony_ci	unsigned int waitlink_offset = buffer->user_size - 16;
3048c2ecf20Sopenharmony_ci	u32 dwords, target;
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	lockdep_assert_held(&gpu->lock);
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	/*
3098c2ecf20Sopenharmony_ci	 * We need at most 3 dwords in the return target:
3108c2ecf20Sopenharmony_ci	 * 1 event + 1 end + 1 wait + 1 link.
3118c2ecf20Sopenharmony_ci	 */
3128c2ecf20Sopenharmony_ci	dwords = 4;
3138c2ecf20Sopenharmony_ci	target = etnaviv_buffer_reserve(gpu, buffer, dwords);
3148c2ecf20Sopenharmony_ci
3158c2ecf20Sopenharmony_ci	/* Signal sync point event */
3168c2ecf20Sopenharmony_ci	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
3178c2ecf20Sopenharmony_ci		       VIVS_GL_EVENT_FROM_PE);
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci	/* Stop the FE to 'pause' the GPU */
3208c2ecf20Sopenharmony_ci	CMD_END(buffer);
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci	/* Append waitlink */
3238c2ecf20Sopenharmony_ci	CMD_WAIT(buffer);
3248c2ecf20Sopenharmony_ci	CMD_LINK(buffer, 2,
3258c2ecf20Sopenharmony_ci		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
3268c2ecf20Sopenharmony_ci		 + buffer->user_size - 4);
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci	/*
3298c2ecf20Sopenharmony_ci	 * Kick off the 'sync point' command by replacing the previous
3308c2ecf20Sopenharmony_ci	 * WAIT with a link to the address in the ring buffer.
3318c2ecf20Sopenharmony_ci	 */
3328c2ecf20Sopenharmony_ci	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
3338c2ecf20Sopenharmony_ci				    VIV_FE_LINK_HEADER_OP_LINK |
3348c2ecf20Sopenharmony_ci				    VIV_FE_LINK_HEADER_PREFETCH(dwords),
3358c2ecf20Sopenharmony_ci				    target);
3368c2ecf20Sopenharmony_ci}
3378c2ecf20Sopenharmony_ci
3388c2ecf20Sopenharmony_ci/* Append a command buffer to the ring buffer. */
3398c2ecf20Sopenharmony_civoid etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
3408c2ecf20Sopenharmony_ci	struct etnaviv_iommu_context *mmu_context, unsigned int event,
3418c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *cmdbuf)
3428c2ecf20Sopenharmony_ci{
3438c2ecf20Sopenharmony_ci	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
3448c2ecf20Sopenharmony_ci	unsigned int waitlink_offset = buffer->user_size - 16;
3458c2ecf20Sopenharmony_ci	u32 return_target, return_dwords;
3468c2ecf20Sopenharmony_ci	u32 link_target, link_dwords;
3478c2ecf20Sopenharmony_ci	bool switch_context = gpu->exec_state != exec_state;
3488c2ecf20Sopenharmony_ci	bool switch_mmu_context = gpu->mmu_context != mmu_context;
3498c2ecf20Sopenharmony_ci	unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
3508c2ecf20Sopenharmony_ci	bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq;
3518c2ecf20Sopenharmony_ci	bool has_blt = !!(gpu->identity.minor_features5 &
3528c2ecf20Sopenharmony_ci			  chipMinorFeatures5_BLT_ENGINE);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	lockdep_assert_held(&gpu->lock);
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci	if (drm_debug_enabled(DRM_UT_DRIVER))
3578c2ecf20Sopenharmony_ci		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
3588c2ecf20Sopenharmony_ci
3598c2ecf20Sopenharmony_ci	link_target = etnaviv_cmdbuf_get_va(cmdbuf,
3608c2ecf20Sopenharmony_ci					    &gpu->mmu_context->cmdbuf_mapping);
3618c2ecf20Sopenharmony_ci	link_dwords = cmdbuf->size / 8;
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_ci	/*
3648c2ecf20Sopenharmony_ci	 * If we need maintenance prior to submitting this buffer, we will
3658c2ecf20Sopenharmony_ci	 * need to append a mmu flush load state, followed by a new
3668c2ecf20Sopenharmony_ci	 * link to this buffer - a total of four additional words.
3678c2ecf20Sopenharmony_ci	 */
3688c2ecf20Sopenharmony_ci	if (need_flush || switch_context) {
3698c2ecf20Sopenharmony_ci		u32 target, extra_dwords;
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci		/* link command */
3728c2ecf20Sopenharmony_ci		extra_dwords = 1;
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci		/* flush command */
3758c2ecf20Sopenharmony_ci		if (need_flush) {
3768c2ecf20Sopenharmony_ci			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)
3778c2ecf20Sopenharmony_ci				extra_dwords += 1;
3788c2ecf20Sopenharmony_ci			else
3798c2ecf20Sopenharmony_ci				extra_dwords += 3;
3808c2ecf20Sopenharmony_ci		}
3818c2ecf20Sopenharmony_ci
3828c2ecf20Sopenharmony_ci		/* pipe switch commands */
3838c2ecf20Sopenharmony_ci		if (switch_context)
3848c2ecf20Sopenharmony_ci			extra_dwords += 4;
3858c2ecf20Sopenharmony_ci
3868c2ecf20Sopenharmony_ci		/* PTA load command */
3878c2ecf20Sopenharmony_ci		if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
3888c2ecf20Sopenharmony_ci			extra_dwords += 1;
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
3918c2ecf20Sopenharmony_ci		/*
3928c2ecf20Sopenharmony_ci		 * Switch MMU context if necessary. Must be done after the
3938c2ecf20Sopenharmony_ci		 * link target has been calculated, as the jump forward in the
3948c2ecf20Sopenharmony_ci		 * kernel ring still uses the last active MMU context before
3958c2ecf20Sopenharmony_ci		 * the switch.
3968c2ecf20Sopenharmony_ci		 */
3978c2ecf20Sopenharmony_ci		if (switch_mmu_context) {
3988c2ecf20Sopenharmony_ci			struct etnaviv_iommu_context *old_context = gpu->mmu_context;
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci			gpu->mmu_context = etnaviv_iommu_context_get(mmu_context);
4018c2ecf20Sopenharmony_ci			etnaviv_iommu_context_put(old_context);
4028c2ecf20Sopenharmony_ci		}
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci		if (need_flush) {
4058c2ecf20Sopenharmony_ci			/* Add the MMU flush */
4068c2ecf20Sopenharmony_ci			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {
4078c2ecf20Sopenharmony_ci				CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
4088c2ecf20Sopenharmony_ci					       VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
4098c2ecf20Sopenharmony_ci					       VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
4108c2ecf20Sopenharmony_ci					       VIVS_GL_FLUSH_MMU_FLUSH_UNK2 |
4118c2ecf20Sopenharmony_ci					       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
4128c2ecf20Sopenharmony_ci					       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
4138c2ecf20Sopenharmony_ci			} else {
4148c2ecf20Sopenharmony_ci				u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
4158c2ecf20Sopenharmony_ci					    VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
4168c2ecf20Sopenharmony_ci
4178c2ecf20Sopenharmony_ci				if (switch_mmu_context &&
4188c2ecf20Sopenharmony_ci				    gpu->sec_mode == ETNA_SEC_KERNEL) {
4198c2ecf20Sopenharmony_ci					unsigned short id =
4208c2ecf20Sopenharmony_ci						etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
4218c2ecf20Sopenharmony_ci					CMD_LOAD_STATE(buffer,
4228c2ecf20Sopenharmony_ci						VIVS_MMUv2_PTA_CONFIG,
4238c2ecf20Sopenharmony_ci						VIVS_MMUv2_PTA_CONFIG_INDEX(id));
4248c2ecf20Sopenharmony_ci				}
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci				if (gpu->sec_mode == ETNA_SEC_NONE)
4278c2ecf20Sopenharmony_ci					flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci				CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
4308c2ecf20Sopenharmony_ci					       flush);
4318c2ecf20Sopenharmony_ci				CMD_SEM(buffer, SYNC_RECIPIENT_FE,
4328c2ecf20Sopenharmony_ci					SYNC_RECIPIENT_PE);
4338c2ecf20Sopenharmony_ci				CMD_STALL(buffer, SYNC_RECIPIENT_FE,
4348c2ecf20Sopenharmony_ci					SYNC_RECIPIENT_PE);
4358c2ecf20Sopenharmony_ci			}
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci			gpu->flush_seq = new_flush_seq;
4388c2ecf20Sopenharmony_ci		}
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_ci		if (switch_context) {
4418c2ecf20Sopenharmony_ci			etnaviv_cmd_select_pipe(gpu, buffer, exec_state);
4428c2ecf20Sopenharmony_ci			gpu->exec_state = exec_state;
4438c2ecf20Sopenharmony_ci		}
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci		/* And the link to the submitted buffer */
4468c2ecf20Sopenharmony_ci		link_target = etnaviv_cmdbuf_get_va(cmdbuf,
4478c2ecf20Sopenharmony_ci					&gpu->mmu_context->cmdbuf_mapping);
4488c2ecf20Sopenharmony_ci		CMD_LINK(buffer, link_dwords, link_target);
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci		/* Update the link target to point to above instructions */
4518c2ecf20Sopenharmony_ci		link_target = target;
4528c2ecf20Sopenharmony_ci		link_dwords = extra_dwords;
4538c2ecf20Sopenharmony_ci	}
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci	/*
4568c2ecf20Sopenharmony_ci	 * Append a LINK to the submitted command buffer to return to
4578c2ecf20Sopenharmony_ci	 * the ring buffer.  return_target is the ring target address.
4588c2ecf20Sopenharmony_ci	 * We need at most 7 dwords in the return target: 2 cache flush +
4598c2ecf20Sopenharmony_ci	 * 2 semaphore stall + 1 event + 1 wait + 1 link.
4608c2ecf20Sopenharmony_ci	 */
4618c2ecf20Sopenharmony_ci	return_dwords = 7;
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci	/*
4648c2ecf20Sopenharmony_ci	 * When the BLT engine is present we need 6 more dwords in the return
4658c2ecf20Sopenharmony_ci	 * target: 3 enable/flush/disable + 4 enable/semaphore stall/disable,
4668c2ecf20Sopenharmony_ci	 * but we don't need the normal TS flush state.
4678c2ecf20Sopenharmony_ci	 */
4688c2ecf20Sopenharmony_ci	if (has_blt)
4698c2ecf20Sopenharmony_ci		return_dwords += 6;
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_ci	return_target = etnaviv_buffer_reserve(gpu, buffer, return_dwords);
4728c2ecf20Sopenharmony_ci	CMD_LINK(cmdbuf, return_dwords, return_target);
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	/*
4758c2ecf20Sopenharmony_ci	 * Append a cache flush, stall, event, wait and link pointing back to
4768c2ecf20Sopenharmony_ci	 * the wait command to the ring buffer.
4778c2ecf20Sopenharmony_ci	 */
4788c2ecf20Sopenharmony_ci	if (gpu->exec_state == ETNA_PIPE_2D) {
4798c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
4808c2ecf20Sopenharmony_ci				       VIVS_GL_FLUSH_CACHE_PE2D);
4818c2ecf20Sopenharmony_ci	} else {
4828c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_CACHE,
4838c2ecf20Sopenharmony_ci				       VIVS_GL_FLUSH_CACHE_DEPTH |
4848c2ecf20Sopenharmony_ci				       VIVS_GL_FLUSH_CACHE_COLOR);
4858c2ecf20Sopenharmony_ci		if (has_blt) {
4868c2ecf20Sopenharmony_ci			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
4878c2ecf20Sopenharmony_ci			CMD_LOAD_STATE(buffer, VIVS_BLT_SET_COMMAND, 0x1);
4888c2ecf20Sopenharmony_ci			CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
4898c2ecf20Sopenharmony_ci		} else {
4908c2ecf20Sopenharmony_ci			CMD_LOAD_STATE(buffer, VIVS_TS_FLUSH_CACHE,
4918c2ecf20Sopenharmony_ci					       VIVS_TS_FLUSH_CACHE_FLUSH);
4928c2ecf20Sopenharmony_ci		}
4938c2ecf20Sopenharmony_ci	}
4948c2ecf20Sopenharmony_ci	CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
4958c2ecf20Sopenharmony_ci	CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_PE);
4968c2ecf20Sopenharmony_ci
4978c2ecf20Sopenharmony_ci	if (has_blt) {
4988c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x1);
4998c2ecf20Sopenharmony_ci		CMD_SEM(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
5008c2ecf20Sopenharmony_ci		CMD_STALL(buffer, SYNC_RECIPIENT_FE, SYNC_RECIPIENT_BLT);
5018c2ecf20Sopenharmony_ci		CMD_LOAD_STATE(buffer, VIVS_BLT_ENABLE, 0x0);
5028c2ecf20Sopenharmony_ci	}
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
5058c2ecf20Sopenharmony_ci		       VIVS_GL_EVENT_FROM_PE);
5068c2ecf20Sopenharmony_ci	CMD_WAIT(buffer);
5078c2ecf20Sopenharmony_ci	CMD_LINK(buffer, 2,
5088c2ecf20Sopenharmony_ci		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
5098c2ecf20Sopenharmony_ci		 + buffer->user_size - 4);
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci	if (drm_debug_enabled(DRM_UT_DRIVER))
5128c2ecf20Sopenharmony_ci		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
5138c2ecf20Sopenharmony_ci			return_target,
5148c2ecf20Sopenharmony_ci			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
5158c2ecf20Sopenharmony_ci			cmdbuf->vaddr);
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci	if (drm_debug_enabled(DRM_UT_DRIVER)) {
5188c2ecf20Sopenharmony_ci		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
5198c2ecf20Sopenharmony_ci			       cmdbuf->vaddr, cmdbuf->size, 0);
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci		pr_info("link op: %p\n", buffer->vaddr + waitlink_offset);
5228c2ecf20Sopenharmony_ci		pr_info("addr: 0x%08x\n", link_target);
5238c2ecf20Sopenharmony_ci		pr_info("back: 0x%08x\n", return_target);
5248c2ecf20Sopenharmony_ci		pr_info("event: %d\n", event);
5258c2ecf20Sopenharmony_ci	}
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci	/*
5288c2ecf20Sopenharmony_ci	 * Kick off the submitted command by replacing the previous
5298c2ecf20Sopenharmony_ci	 * WAIT with a link to the address in the ring buffer.
5308c2ecf20Sopenharmony_ci	 */
5318c2ecf20Sopenharmony_ci	etnaviv_buffer_replace_wait(buffer, waitlink_offset,
5328c2ecf20Sopenharmony_ci				    VIV_FE_LINK_HEADER_OP_LINK |
5338c2ecf20Sopenharmony_ci				    VIV_FE_LINK_HEADER_PREFETCH(link_dwords),
5348c2ecf20Sopenharmony_ci				    link_target);
5358c2ecf20Sopenharmony_ci
5368c2ecf20Sopenharmony_ci	if (drm_debug_enabled(DRM_UT_DRIVER))
5378c2ecf20Sopenharmony_ci		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
5388c2ecf20Sopenharmony_ci}
539