162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright 2014 Advanced Micro Devices, Inc.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation
762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
1062306a36Sopenharmony_ci *
1162306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
1262306a36Sopenharmony_ci * all copies or substantial portions of the Software.
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1562306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1662306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1762306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
1862306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
1962306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
2062306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
2162306a36Sopenharmony_ci *
2262306a36Sopenharmony_ci */
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci#include <linux/dma-mapping.h>
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci#include "amdgpu.h"
2762306a36Sopenharmony_ci#include "amdgpu_ih.h"
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci/**
3062306a36Sopenharmony_ci * amdgpu_ih_ring_init - initialize the IH state
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci * @adev: amdgpu_device pointer
3362306a36Sopenharmony_ci * @ih: ih ring to initialize
3462306a36Sopenharmony_ci * @ring_size: ring size to allocate
3562306a36Sopenharmony_ci * @use_bus_addr: true when we can use dma_alloc_coherent
3662306a36Sopenharmony_ci *
3762306a36Sopenharmony_ci * Initializes the IH state and allocates a buffer
3862306a36Sopenharmony_ci * for the IH ring buffer.
3962306a36Sopenharmony_ci * Returns 0 for success, errors for failure.
4062306a36Sopenharmony_ci */
4162306a36Sopenharmony_ciint amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
4262306a36Sopenharmony_ci			unsigned ring_size, bool use_bus_addr)
4362306a36Sopenharmony_ci{
4462306a36Sopenharmony_ci	u32 rb_bufsz;
4562306a36Sopenharmony_ci	int r;
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_ci	/* Align ring size */
4862306a36Sopenharmony_ci	rb_bufsz = order_base_2(ring_size / 4);
4962306a36Sopenharmony_ci	ring_size = (1 << rb_bufsz) * 4;
5062306a36Sopenharmony_ci	ih->ring_size = ring_size;
5162306a36Sopenharmony_ci	ih->ptr_mask = ih->ring_size - 1;
5262306a36Sopenharmony_ci	ih->rptr = 0;
5362306a36Sopenharmony_ci	ih->use_bus_addr = use_bus_addr;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	if (use_bus_addr) {
5662306a36Sopenharmony_ci		dma_addr_t dma_addr;
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci		if (ih->ring)
5962306a36Sopenharmony_ci			return 0;
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci		/* add 8 bytes for the rptr/wptr shadows and
6262306a36Sopenharmony_ci		 * add them to the end of the ring allocation.
6362306a36Sopenharmony_ci		 */
6462306a36Sopenharmony_ci		ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
6562306a36Sopenharmony_ci					      &dma_addr, GFP_KERNEL);
6662306a36Sopenharmony_ci		if (ih->ring == NULL)
6762306a36Sopenharmony_ci			return -ENOMEM;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci		ih->gpu_addr = dma_addr;
7062306a36Sopenharmony_ci		ih->wptr_addr = dma_addr + ih->ring_size;
7162306a36Sopenharmony_ci		ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
7262306a36Sopenharmony_ci		ih->rptr_addr = dma_addr + ih->ring_size + 4;
7362306a36Sopenharmony_ci		ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
7462306a36Sopenharmony_ci	} else {
7562306a36Sopenharmony_ci		unsigned wptr_offs, rptr_offs;
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci		r = amdgpu_device_wb_get(adev, &wptr_offs);
7862306a36Sopenharmony_ci		if (r)
7962306a36Sopenharmony_ci			return r;
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci		r = amdgpu_device_wb_get(adev, &rptr_offs);
8262306a36Sopenharmony_ci		if (r) {
8362306a36Sopenharmony_ci			amdgpu_device_wb_free(adev, wptr_offs);
8462306a36Sopenharmony_ci			return r;
8562306a36Sopenharmony_ci		}
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_ci		r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE,
8862306a36Sopenharmony_ci					    AMDGPU_GEM_DOMAIN_GTT,
8962306a36Sopenharmony_ci					    &ih->ring_obj, &ih->gpu_addr,
9062306a36Sopenharmony_ci					    (void **)&ih->ring);
9162306a36Sopenharmony_ci		if (r) {
9262306a36Sopenharmony_ci			amdgpu_device_wb_free(adev, rptr_offs);
9362306a36Sopenharmony_ci			amdgpu_device_wb_free(adev, wptr_offs);
9462306a36Sopenharmony_ci			return r;
9562306a36Sopenharmony_ci		}
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci		ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
9862306a36Sopenharmony_ci		ih->wptr_cpu = &adev->wb.wb[wptr_offs];
9962306a36Sopenharmony_ci		ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
10062306a36Sopenharmony_ci		ih->rptr_cpu = &adev->wb.wb[rptr_offs];
10162306a36Sopenharmony_ci	}
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	init_waitqueue_head(&ih->wait_process);
10462306a36Sopenharmony_ci	return 0;
10562306a36Sopenharmony_ci}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci/**
10862306a36Sopenharmony_ci * amdgpu_ih_ring_fini - tear down the IH state
10962306a36Sopenharmony_ci *
11062306a36Sopenharmony_ci * @adev: amdgpu_device pointer
11162306a36Sopenharmony_ci * @ih: ih ring to tear down
11262306a36Sopenharmony_ci *
11362306a36Sopenharmony_ci * Tears down the IH state and frees buffer
11462306a36Sopenharmony_ci * used for the IH ring buffer.
11562306a36Sopenharmony_ci */
11662306a36Sopenharmony_civoid amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
11762306a36Sopenharmony_ci{
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	if (!ih->ring)
12062306a36Sopenharmony_ci		return;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci	if (ih->use_bus_addr) {
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci		/* add 8 bytes for the rptr/wptr shadows and
12562306a36Sopenharmony_ci		 * add them to the end of the ring allocation.
12662306a36Sopenharmony_ci		 */
12762306a36Sopenharmony_ci		dma_free_coherent(adev->dev, ih->ring_size + 8,
12862306a36Sopenharmony_ci				  (void *)ih->ring, ih->gpu_addr);
12962306a36Sopenharmony_ci		ih->ring = NULL;
13062306a36Sopenharmony_ci	} else {
13162306a36Sopenharmony_ci		amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
13262306a36Sopenharmony_ci				      (void **)&ih->ring);
13362306a36Sopenharmony_ci		amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
13462306a36Sopenharmony_ci		amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
13562306a36Sopenharmony_ci	}
13662306a36Sopenharmony_ci}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci/**
13962306a36Sopenharmony_ci * amdgpu_ih_ring_write - write IV to the ring buffer
14062306a36Sopenharmony_ci *
14162306a36Sopenharmony_ci * @adev: amdgpu_device pointer
14262306a36Sopenharmony_ci * @ih: ih ring to write to
14362306a36Sopenharmony_ci * @iv: the iv to write
14462306a36Sopenharmony_ci * @num_dw: size of the iv in dw
14562306a36Sopenharmony_ci *
14662306a36Sopenharmony_ci * Writes an IV to the ring buffer using the CPU and increment the wptr.
14762306a36Sopenharmony_ci * Used for testing and delegating IVs to a software ring.
14862306a36Sopenharmony_ci */
14962306a36Sopenharmony_civoid amdgpu_ih_ring_write(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
15062306a36Sopenharmony_ci			  const uint32_t *iv, unsigned int num_dw)
15162306a36Sopenharmony_ci{
15262306a36Sopenharmony_ci	uint32_t wptr = le32_to_cpu(*ih->wptr_cpu) >> 2;
15362306a36Sopenharmony_ci	unsigned int i;
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci	for (i = 0; i < num_dw; ++i)
15662306a36Sopenharmony_ci	        ih->ring[wptr++] = cpu_to_le32(iv[i]);
15762306a36Sopenharmony_ci
15862306a36Sopenharmony_ci	wptr <<= 2;
15962306a36Sopenharmony_ci	wptr &= ih->ptr_mask;
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	/* Only commit the new wptr if we don't overflow */
16262306a36Sopenharmony_ci	if (wptr != READ_ONCE(ih->rptr)) {
16362306a36Sopenharmony_ci		wmb();
16462306a36Sopenharmony_ci		WRITE_ONCE(*ih->wptr_cpu, cpu_to_le32(wptr));
16562306a36Sopenharmony_ci	} else if (adev->irq.retry_cam_enabled) {
16662306a36Sopenharmony_ci		dev_warn_once(adev->dev, "IH soft ring buffer overflow 0x%X, 0x%X\n",
16762306a36Sopenharmony_ci			      wptr, ih->rptr);
16862306a36Sopenharmony_ci	}
16962306a36Sopenharmony_ci}
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci/**
17262306a36Sopenharmony_ci * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint
17362306a36Sopenharmony_ci *
17462306a36Sopenharmony_ci * @adev: amdgpu_device pointer
17562306a36Sopenharmony_ci * @ih: ih ring to process
17662306a36Sopenharmony_ci *
17762306a36Sopenharmony_ci * Used to ensure ring has processed IVs up to the checkpoint write pointer.
17862306a36Sopenharmony_ci */
17962306a36Sopenharmony_ciint amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev,
18062306a36Sopenharmony_ci					struct amdgpu_ih_ring *ih)
18162306a36Sopenharmony_ci{
18262306a36Sopenharmony_ci	uint32_t checkpoint_wptr;
18362306a36Sopenharmony_ci	uint64_t checkpoint_ts;
18462306a36Sopenharmony_ci	long timeout = HZ;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	if (!ih->enabled || adev->shutdown)
18762306a36Sopenharmony_ci		return -ENODEV;
18862306a36Sopenharmony_ci
18962306a36Sopenharmony_ci	checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
19062306a36Sopenharmony_ci	/* Order wptr with ring data. */
19162306a36Sopenharmony_ci	rmb();
19262306a36Sopenharmony_ci	checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr, -1);
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci	return wait_event_interruptible_timeout(ih->wait_process,
19562306a36Sopenharmony_ci		    amdgpu_ih_ts_after(checkpoint_ts, ih->processed_timestamp) ||
19662306a36Sopenharmony_ci		    ih->rptr == amdgpu_ih_get_wptr(adev, ih), timeout);
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci/**
20062306a36Sopenharmony_ci * amdgpu_ih_process - interrupt handler
20162306a36Sopenharmony_ci *
20262306a36Sopenharmony_ci * @adev: amdgpu_device pointer
20362306a36Sopenharmony_ci * @ih: ih ring to process
20462306a36Sopenharmony_ci *
20562306a36Sopenharmony_ci * Interrupt hander (VI), walk the IH ring.
20662306a36Sopenharmony_ci * Returns irq process return code.
20762306a36Sopenharmony_ci */
20862306a36Sopenharmony_ciint amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
20962306a36Sopenharmony_ci{
21062306a36Sopenharmony_ci	unsigned int count;
21162306a36Sopenharmony_ci	u32 wptr;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	if (!ih->enabled || adev->shutdown)
21462306a36Sopenharmony_ci		return IRQ_NONE;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	wptr = amdgpu_ih_get_wptr(adev, ih);
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_cirestart_ih:
21962306a36Sopenharmony_ci	count  = AMDGPU_IH_MAX_NUM_IVS;
22062306a36Sopenharmony_ci	DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr);
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	/* Order reading of wptr vs. reading of IH ring data */
22362306a36Sopenharmony_ci	rmb();
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	while (ih->rptr != wptr && --count) {
22662306a36Sopenharmony_ci		amdgpu_irq_dispatch(adev, ih);
22762306a36Sopenharmony_ci		ih->rptr &= ih->ptr_mask;
22862306a36Sopenharmony_ci	}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	amdgpu_ih_set_rptr(adev, ih);
23162306a36Sopenharmony_ci	wake_up_all(&ih->wait_process);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	/* make sure wptr hasn't changed while processing */
23462306a36Sopenharmony_ci	wptr = amdgpu_ih_get_wptr(adev, ih);
23562306a36Sopenharmony_ci	if (wptr != ih->rptr)
23662306a36Sopenharmony_ci		goto restart_ih;
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	return IRQ_HANDLED;
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci/**
24262306a36Sopenharmony_ci * amdgpu_ih_decode_iv_helper - decode an interrupt vector
24362306a36Sopenharmony_ci *
24462306a36Sopenharmony_ci * @adev: amdgpu_device pointer
24562306a36Sopenharmony_ci * @ih: ih ring to process
24662306a36Sopenharmony_ci * @entry: IV entry
24762306a36Sopenharmony_ci *
24862306a36Sopenharmony_ci * Decodes the interrupt vector at the current rptr
24962306a36Sopenharmony_ci * position and also advance the position for Vega10
25062306a36Sopenharmony_ci * and later GPUs.
25162306a36Sopenharmony_ci */
25262306a36Sopenharmony_civoid amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
25362306a36Sopenharmony_ci				struct amdgpu_ih_ring *ih,
25462306a36Sopenharmony_ci				struct amdgpu_iv_entry *entry)
25562306a36Sopenharmony_ci{
25662306a36Sopenharmony_ci	/* wptr/rptr are in bytes! */
25762306a36Sopenharmony_ci	u32 ring_index = ih->rptr >> 2;
25862306a36Sopenharmony_ci	uint32_t dw[8];
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci	dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
26162306a36Sopenharmony_ci	dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
26262306a36Sopenharmony_ci	dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
26362306a36Sopenharmony_ci	dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);
26462306a36Sopenharmony_ci	dw[4] = le32_to_cpu(ih->ring[ring_index + 4]);
26562306a36Sopenharmony_ci	dw[5] = le32_to_cpu(ih->ring[ring_index + 5]);
26662306a36Sopenharmony_ci	dw[6] = le32_to_cpu(ih->ring[ring_index + 6]);
26762306a36Sopenharmony_ci	dw[7] = le32_to_cpu(ih->ring[ring_index + 7]);
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	entry->client_id = dw[0] & 0xff;
27062306a36Sopenharmony_ci	entry->src_id = (dw[0] >> 8) & 0xff;
27162306a36Sopenharmony_ci	entry->ring_id = (dw[0] >> 16) & 0xff;
27262306a36Sopenharmony_ci	entry->vmid = (dw[0] >> 24) & 0xf;
27362306a36Sopenharmony_ci	entry->vmid_src = (dw[0] >> 31);
27462306a36Sopenharmony_ci	entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32);
27562306a36Sopenharmony_ci	entry->timestamp_src = dw[2] >> 31;
27662306a36Sopenharmony_ci	entry->pasid = dw[3] & 0xffff;
27762306a36Sopenharmony_ci	entry->node_id = (dw[3] >> 16) & 0xff;
27862306a36Sopenharmony_ci	entry->src_data[0] = dw[4];
27962306a36Sopenharmony_ci	entry->src_data[1] = dw[5];
28062306a36Sopenharmony_ci	entry->src_data[2] = dw[6];
28162306a36Sopenharmony_ci	entry->src_data[3] = dw[7];
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	/* wptr/rptr are in bytes! */
28462306a36Sopenharmony_ci	ih->rptr += 32;
28562306a36Sopenharmony_ci}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ciuint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr,
28862306a36Sopenharmony_ci				       signed int offset)
28962306a36Sopenharmony_ci{
29062306a36Sopenharmony_ci	uint32_t iv_size = 32;
29162306a36Sopenharmony_ci	uint32_t ring_index;
29262306a36Sopenharmony_ci	uint32_t dw1, dw2;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	rptr += iv_size * offset;
29562306a36Sopenharmony_ci	ring_index = (rptr & ih->ptr_mask) >> 2;
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	dw1 = le32_to_cpu(ih->ring[ring_index + 1]);
29862306a36Sopenharmony_ci	dw2 = le32_to_cpu(ih->ring[ring_index + 2]);
29962306a36Sopenharmony_ci	return dw1 | ((u64)(dw2 & 0xffff) << 32);
30062306a36Sopenharmony_ci}
301