162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright 2010 Advanced Micro Devices, Inc. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 562306a36Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 662306a36Sopenharmony_ci * to deal in the Software without restriction, including without limitation 762306a36Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 862306a36Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 962306a36Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 1262306a36Sopenharmony_ci * all copies or substantial portions of the Software. 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1562306a36Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1662306a36Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1762306a36Sopenharmony_ci * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 1862306a36Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 1962306a36Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 2062306a36Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 2162306a36Sopenharmony_ci * 2262306a36Sopenharmony_ci * Authors: Alex Deucher 2362306a36Sopenharmony_ci */ 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci#include "radeon.h" 2662306a36Sopenharmony_ci#include "radeon_asic.h" 2762306a36Sopenharmony_ci#include "radeon_trace.h" 2862306a36Sopenharmony_ci#include "ni.h" 2962306a36Sopenharmony_ci#include "nid.h" 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci/* 3262306a36Sopenharmony_ci * DMA 3362306a36Sopenharmony_ci * Starting with R600, the GPU has an asynchronous 3462306a36Sopenharmony_ci * DMA engine. The programming model is very similar 3562306a36Sopenharmony_ci * to the 3D engine (ring buffer, IBs, etc.), but the 3662306a36Sopenharmony_ci * DMA controller has it's own packet format that is 3762306a36Sopenharmony_ci * different form the PM4 format used by the 3D engine. 3862306a36Sopenharmony_ci * It supports copying data, writing embedded data, 3962306a36Sopenharmony_ci * solid fills, and a number of other things. It also 4062306a36Sopenharmony_ci * has support for tiling/detiling of buffers. 4162306a36Sopenharmony_ci * Cayman and newer support two asynchronous DMA engines. 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci/** 4562306a36Sopenharmony_ci * cayman_dma_get_rptr - get the current read pointer 4662306a36Sopenharmony_ci * 4762306a36Sopenharmony_ci * @rdev: radeon_device pointer 4862306a36Sopenharmony_ci * @ring: radeon ring pointer 4962306a36Sopenharmony_ci * 5062306a36Sopenharmony_ci * Get the current rptr from the hardware (cayman+). 5162306a36Sopenharmony_ci */ 5262306a36Sopenharmony_ciuint32_t cayman_dma_get_rptr(struct radeon_device *rdev, 5362306a36Sopenharmony_ci struct radeon_ring *ring) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci u32 rptr, reg; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci if (rdev->wb.enabled) { 5862306a36Sopenharmony_ci rptr = rdev->wb.wb[ring->rptr_offs/4]; 5962306a36Sopenharmony_ci } else { 6062306a36Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 6162306a36Sopenharmony_ci reg = DMA_RB_RPTR + DMA0_REGISTER_OFFSET; 6262306a36Sopenharmony_ci else 6362306a36Sopenharmony_ci reg = DMA_RB_RPTR + DMA1_REGISTER_OFFSET; 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci rptr = RREG32(reg); 6662306a36Sopenharmony_ci } 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci return (rptr & 0x3fffc) >> 2; 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci/** 7262306a36Sopenharmony_ci * cayman_dma_get_wptr - get the current write pointer 7362306a36Sopenharmony_ci * 7462306a36Sopenharmony_ci * @rdev: radeon_device pointer 7562306a36Sopenharmony_ci * @ring: radeon ring pointer 7662306a36Sopenharmony_ci * 7762306a36Sopenharmony_ci * Get the current wptr from the hardware (cayman+). 7862306a36Sopenharmony_ci */ 7962306a36Sopenharmony_ciuint32_t cayman_dma_get_wptr(struct radeon_device *rdev, 8062306a36Sopenharmony_ci struct radeon_ring *ring) 8162306a36Sopenharmony_ci{ 8262306a36Sopenharmony_ci u32 reg; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 8562306a36Sopenharmony_ci reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 8662306a36Sopenharmony_ci else 8762306a36Sopenharmony_ci reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci return (RREG32(reg) & 0x3fffc) >> 2; 9062306a36Sopenharmony_ci} 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci/** 9362306a36Sopenharmony_ci * cayman_dma_set_wptr - commit the write pointer 9462306a36Sopenharmony_ci * 9562306a36Sopenharmony_ci * @rdev: radeon_device pointer 9662306a36Sopenharmony_ci * @ring: radeon ring pointer 9762306a36Sopenharmony_ci * 9862306a36Sopenharmony_ci * Write the wptr back to the hardware (cayman+). 9962306a36Sopenharmony_ci */ 10062306a36Sopenharmony_civoid cayman_dma_set_wptr(struct radeon_device *rdev, 10162306a36Sopenharmony_ci struct radeon_ring *ring) 10262306a36Sopenharmony_ci{ 10362306a36Sopenharmony_ci u32 reg; 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 10662306a36Sopenharmony_ci reg = DMA_RB_WPTR + DMA0_REGISTER_OFFSET; 10762306a36Sopenharmony_ci else 10862306a36Sopenharmony_ci reg = DMA_RB_WPTR + DMA1_REGISTER_OFFSET; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci WREG32(reg, (ring->wptr << 2) & 0x3fffc); 11162306a36Sopenharmony_ci} 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci/** 11462306a36Sopenharmony_ci * cayman_dma_ring_ib_execute - Schedule an IB on the DMA engine 11562306a36Sopenharmony_ci * 11662306a36Sopenharmony_ci * @rdev: radeon_device pointer 11762306a36Sopenharmony_ci * @ib: IB object to schedule 11862306a36Sopenharmony_ci * 11962306a36Sopenharmony_ci * Schedule an IB in the DMA ring (cayman-SI). 12062306a36Sopenharmony_ci */ 12162306a36Sopenharmony_civoid cayman_dma_ring_ib_execute(struct radeon_device *rdev, 12262306a36Sopenharmony_ci struct radeon_ib *ib) 12362306a36Sopenharmony_ci{ 12462306a36Sopenharmony_ci struct radeon_ring *ring = &rdev->ring[ib->ring]; 12562306a36Sopenharmony_ci unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (rdev->wb.enabled) { 12862306a36Sopenharmony_ci u32 next_rptr = ring->wptr + 4; 12962306a36Sopenharmony_ci while ((next_rptr & 7) != 5) 13062306a36Sopenharmony_ci next_rptr++; 13162306a36Sopenharmony_ci next_rptr += 3; 13262306a36Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 1)); 13362306a36Sopenharmony_ci radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc); 13462306a36Sopenharmony_ci radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xff); 13562306a36Sopenharmony_ci radeon_ring_write(ring, next_rptr); 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. 13962306a36Sopenharmony_ci * Pad as necessary with NOPs. 14062306a36Sopenharmony_ci */ 14162306a36Sopenharmony_ci while ((ring->wptr & 7) != 5) 14262306a36Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0)); 14362306a36Sopenharmony_ci radeon_ring_write(ring, DMA_IB_PACKET(DMA_PACKET_INDIRECT_BUFFER, vm_id, 0)); 14462306a36Sopenharmony_ci radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFE0)); 14562306a36Sopenharmony_ci radeon_ring_write(ring, (ib->length_dw << 12) | (upper_32_bits(ib->gpu_addr) & 0xFF)); 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci} 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci/** 15062306a36Sopenharmony_ci * cayman_dma_stop - stop the async dma engines 15162306a36Sopenharmony_ci * 15262306a36Sopenharmony_ci * @rdev: radeon_device pointer 15362306a36Sopenharmony_ci * 15462306a36Sopenharmony_ci * Stop the async dma engines (cayman-SI). 15562306a36Sopenharmony_ci */ 15662306a36Sopenharmony_civoid cayman_dma_stop(struct radeon_device *rdev) 15762306a36Sopenharmony_ci{ 15862306a36Sopenharmony_ci u32 rb_cntl; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 16162306a36Sopenharmony_ci (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 16262306a36Sopenharmony_ci radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size); 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci /* dma0 */ 16562306a36Sopenharmony_ci rb_cntl = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET); 16662306a36Sopenharmony_ci rb_cntl &= ~DMA_RB_ENABLE; 16762306a36Sopenharmony_ci WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, rb_cntl); 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci /* dma1 */ 17062306a36Sopenharmony_ci rb_cntl = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET); 17162306a36Sopenharmony_ci rb_cntl &= ~DMA_RB_ENABLE; 17262306a36Sopenharmony_ci WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, rb_cntl); 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false; 17562306a36Sopenharmony_ci rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false; 17662306a36Sopenharmony_ci} 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci/** 17962306a36Sopenharmony_ci * cayman_dma_resume - setup and start the async dma engines 18062306a36Sopenharmony_ci * 18162306a36Sopenharmony_ci * @rdev: radeon_device pointer 18262306a36Sopenharmony_ci * 18362306a36Sopenharmony_ci * Set up the DMA ring buffers and enable them. (cayman-SI). 18462306a36Sopenharmony_ci * Returns 0 for success, error for failure. 18562306a36Sopenharmony_ci */ 18662306a36Sopenharmony_ciint cayman_dma_resume(struct radeon_device *rdev) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci struct radeon_ring *ring; 18962306a36Sopenharmony_ci u32 rb_cntl, dma_cntl, ib_cntl; 19062306a36Sopenharmony_ci u32 rb_bufsz; 19162306a36Sopenharmony_ci u32 reg_offset, wb_offset; 19262306a36Sopenharmony_ci int i, r; 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_ci for (i = 0; i < 2; i++) { 19562306a36Sopenharmony_ci if (i == 0) { 19662306a36Sopenharmony_ci ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX]; 19762306a36Sopenharmony_ci reg_offset = DMA0_REGISTER_OFFSET; 19862306a36Sopenharmony_ci wb_offset = R600_WB_DMA_RPTR_OFFSET; 19962306a36Sopenharmony_ci } else { 20062306a36Sopenharmony_ci ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]; 20162306a36Sopenharmony_ci reg_offset = DMA1_REGISTER_OFFSET; 20262306a36Sopenharmony_ci wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET; 20362306a36Sopenharmony_ci } 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci WREG32(DMA_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0); 20662306a36Sopenharmony_ci WREG32(DMA_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0); 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci /* Set ring buffer size in dwords */ 20962306a36Sopenharmony_ci rb_bufsz = order_base_2(ring->ring_size / 4); 21062306a36Sopenharmony_ci rb_cntl = rb_bufsz << 1; 21162306a36Sopenharmony_ci#ifdef __BIG_ENDIAN 21262306a36Sopenharmony_ci rb_cntl |= DMA_RB_SWAP_ENABLE | DMA_RPTR_WRITEBACK_SWAP_ENABLE; 21362306a36Sopenharmony_ci#endif 21462306a36Sopenharmony_ci WREG32(DMA_RB_CNTL + reg_offset, rb_cntl); 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_ci /* Initialize the ring buffer's read and write pointers */ 21762306a36Sopenharmony_ci WREG32(DMA_RB_RPTR + reg_offset, 0); 21862306a36Sopenharmony_ci WREG32(DMA_RB_WPTR + reg_offset, 0); 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci /* set the wb address whether it's enabled or not */ 22162306a36Sopenharmony_ci WREG32(DMA_RB_RPTR_ADDR_HI + reg_offset, 22262306a36Sopenharmony_ci upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFF); 22362306a36Sopenharmony_ci WREG32(DMA_RB_RPTR_ADDR_LO + reg_offset, 22462306a36Sopenharmony_ci ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC)); 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci if (rdev->wb.enabled) 22762306a36Sopenharmony_ci rb_cntl |= DMA_RPTR_WRITEBACK_ENABLE; 22862306a36Sopenharmony_ci 22962306a36Sopenharmony_ci WREG32(DMA_RB_BASE + reg_offset, ring->gpu_addr >> 8); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci /* enable DMA IBs */ 23262306a36Sopenharmony_ci ib_cntl = DMA_IB_ENABLE | CMD_VMID_FORCE; 23362306a36Sopenharmony_ci#ifdef __BIG_ENDIAN 23462306a36Sopenharmony_ci ib_cntl |= DMA_IB_SWAP_ENABLE; 23562306a36Sopenharmony_ci#endif 23662306a36Sopenharmony_ci WREG32(DMA_IB_CNTL + reg_offset, ib_cntl); 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci dma_cntl = RREG32(DMA_CNTL + reg_offset); 23962306a36Sopenharmony_ci dma_cntl &= ~CTXEMPTY_INT_ENABLE; 24062306a36Sopenharmony_ci WREG32(DMA_CNTL + reg_offset, dma_cntl); 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci ring->wptr = 0; 24362306a36Sopenharmony_ci WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2); 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_ci WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE); 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci ring->ready = true; 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci r = radeon_ring_test(rdev, ring->idx, ring); 25062306a36Sopenharmony_ci if (r) { 25162306a36Sopenharmony_ci ring->ready = false; 25262306a36Sopenharmony_ci return r; 25362306a36Sopenharmony_ci } 25462306a36Sopenharmony_ci } 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci if ((rdev->asic->copy.copy_ring_index == R600_RING_TYPE_DMA_INDEX) || 25762306a36Sopenharmony_ci (rdev->asic->copy.copy_ring_index == CAYMAN_RING_TYPE_DMA1_INDEX)) 25862306a36Sopenharmony_ci radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size); 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci return 0; 26162306a36Sopenharmony_ci} 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci/** 26462306a36Sopenharmony_ci * cayman_dma_fini - tear down the async dma engines 26562306a36Sopenharmony_ci * 26662306a36Sopenharmony_ci * @rdev: radeon_device pointer 26762306a36Sopenharmony_ci * 26862306a36Sopenharmony_ci * Stop the async dma engines and free the rings (cayman-SI). 26962306a36Sopenharmony_ci */ 27062306a36Sopenharmony_civoid cayman_dma_fini(struct radeon_device *rdev) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci cayman_dma_stop(rdev); 27362306a36Sopenharmony_ci radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]); 27462306a36Sopenharmony_ci radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]); 27562306a36Sopenharmony_ci} 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci/** 27862306a36Sopenharmony_ci * cayman_dma_is_lockup - Check if the DMA engine is locked up 27962306a36Sopenharmony_ci * 28062306a36Sopenharmony_ci * @rdev: radeon_device pointer 28162306a36Sopenharmony_ci * @ring: radeon_ring structure holding ring information 28262306a36Sopenharmony_ci * 28362306a36Sopenharmony_ci * Check if the async DMA engine is locked up. 28462306a36Sopenharmony_ci * Returns true if the engine appears to be locked up, false if not. 28562306a36Sopenharmony_ci */ 28662306a36Sopenharmony_cibool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring) 28762306a36Sopenharmony_ci{ 28862306a36Sopenharmony_ci u32 reset_mask = cayman_gpu_check_soft_reset(rdev); 28962306a36Sopenharmony_ci u32 mask; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci if (ring->idx == R600_RING_TYPE_DMA_INDEX) 29262306a36Sopenharmony_ci mask = RADEON_RESET_DMA; 29362306a36Sopenharmony_ci else 29462306a36Sopenharmony_ci mask = RADEON_RESET_DMA1; 29562306a36Sopenharmony_ci 29662306a36Sopenharmony_ci if (!(reset_mask & mask)) { 29762306a36Sopenharmony_ci radeon_ring_lockup_update(rdev, ring); 29862306a36Sopenharmony_ci return false; 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci return radeon_ring_test_lockup(rdev, ring); 30162306a36Sopenharmony_ci} 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci/** 30462306a36Sopenharmony_ci * cayman_dma_vm_copy_pages - update PTEs by copying them from the GART 30562306a36Sopenharmony_ci * 30662306a36Sopenharmony_ci * @rdev: radeon_device pointer 30762306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands 30862306a36Sopenharmony_ci * @pe: addr of the page entry 30962306a36Sopenharmony_ci * @src: src addr where to copy from 31062306a36Sopenharmony_ci * @count: number of page entries to update 31162306a36Sopenharmony_ci * 31262306a36Sopenharmony_ci * Update PTEs by copying them from the GART using the DMA (cayman/TN). 31362306a36Sopenharmony_ci */ 31462306a36Sopenharmony_civoid cayman_dma_vm_copy_pages(struct radeon_device *rdev, 31562306a36Sopenharmony_ci struct radeon_ib *ib, 31662306a36Sopenharmony_ci uint64_t pe, uint64_t src, 31762306a36Sopenharmony_ci unsigned count) 31862306a36Sopenharmony_ci{ 31962306a36Sopenharmony_ci unsigned ndw; 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci while (count) { 32262306a36Sopenharmony_ci ndw = count * 2; 32362306a36Sopenharmony_ci if (ndw > 0xFFFFE) 32462306a36Sopenharmony_ci ndw = 0xFFFFE; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_COPY, 32762306a36Sopenharmony_ci 0, 0, ndw); 32862306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(pe); 32962306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = lower_32_bits(src); 33062306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 33162306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(src) & 0xff; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci pe += ndw * 4; 33462306a36Sopenharmony_ci src += ndw * 4; 33562306a36Sopenharmony_ci count -= ndw / 2; 33662306a36Sopenharmony_ci } 33762306a36Sopenharmony_ci} 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci/** 34062306a36Sopenharmony_ci * cayman_dma_vm_write_pages - update PTEs by writing them manually 34162306a36Sopenharmony_ci * 34262306a36Sopenharmony_ci * @rdev: radeon_device pointer 34362306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands 34462306a36Sopenharmony_ci * @pe: addr of the page entry 34562306a36Sopenharmony_ci * @addr: dst addr to write into pe 34662306a36Sopenharmony_ci * @count: number of page entries to update 34762306a36Sopenharmony_ci * @incr: increase next addr by incr bytes 34862306a36Sopenharmony_ci * @flags: hw access flags 34962306a36Sopenharmony_ci * 35062306a36Sopenharmony_ci * Update PTEs by writing them manually using the DMA (cayman/TN). 35162306a36Sopenharmony_ci */ 35262306a36Sopenharmony_civoid cayman_dma_vm_write_pages(struct radeon_device *rdev, 35362306a36Sopenharmony_ci struct radeon_ib *ib, 35462306a36Sopenharmony_ci uint64_t pe, 35562306a36Sopenharmony_ci uint64_t addr, unsigned count, 35662306a36Sopenharmony_ci uint32_t incr, uint32_t flags) 35762306a36Sopenharmony_ci{ 35862306a36Sopenharmony_ci uint64_t value; 35962306a36Sopenharmony_ci unsigned ndw; 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_ci while (count) { 36262306a36Sopenharmony_ci ndw = count * 2; 36362306a36Sopenharmony_ci if (ndw > 0xFFFFE) 36462306a36Sopenharmony_ci ndw = 0xFFFFE; 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci /* for non-physically contiguous pages (system) */ 36762306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_WRITE, 36862306a36Sopenharmony_ci 0, 0, ndw); 36962306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = pe; 37062306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 37162306a36Sopenharmony_ci for (; ndw > 0; ndw -= 2, --count, pe += 8) { 37262306a36Sopenharmony_ci if (flags & R600_PTE_SYSTEM) { 37362306a36Sopenharmony_ci value = radeon_vm_map_gart(rdev, addr); 37462306a36Sopenharmony_ci } else if (flags & R600_PTE_VALID) { 37562306a36Sopenharmony_ci value = addr; 37662306a36Sopenharmony_ci } else { 37762306a36Sopenharmony_ci value = 0; 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci addr += incr; 38062306a36Sopenharmony_ci value |= flags; 38162306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = value; 38262306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(value); 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci } 38562306a36Sopenharmony_ci} 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci/** 38862306a36Sopenharmony_ci * cayman_dma_vm_set_pages - update the page tables using the DMA 38962306a36Sopenharmony_ci * 39062306a36Sopenharmony_ci * @rdev: radeon_device pointer 39162306a36Sopenharmony_ci * @ib: indirect buffer to fill with commands 39262306a36Sopenharmony_ci * @pe: addr of the page entry 39362306a36Sopenharmony_ci * @addr: dst addr to write into pe 39462306a36Sopenharmony_ci * @count: number of page entries to update 39562306a36Sopenharmony_ci * @incr: increase next addr by incr bytes 39662306a36Sopenharmony_ci * @flags: hw access flags 39762306a36Sopenharmony_ci * 39862306a36Sopenharmony_ci * Update the page tables using the DMA (cayman/TN). 39962306a36Sopenharmony_ci */ 40062306a36Sopenharmony_civoid cayman_dma_vm_set_pages(struct radeon_device *rdev, 40162306a36Sopenharmony_ci struct radeon_ib *ib, 40262306a36Sopenharmony_ci uint64_t pe, 40362306a36Sopenharmony_ci uint64_t addr, unsigned count, 40462306a36Sopenharmony_ci uint32_t incr, uint32_t flags) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci uint64_t value; 40762306a36Sopenharmony_ci unsigned ndw; 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci while (count) { 41062306a36Sopenharmony_ci ndw = count * 2; 41162306a36Sopenharmony_ci if (ndw > 0xFFFFE) 41262306a36Sopenharmony_ci ndw = 0xFFFFE; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci if (flags & R600_PTE_VALID) 41562306a36Sopenharmony_ci value = addr; 41662306a36Sopenharmony_ci else 41762306a36Sopenharmony_ci value = 0; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci /* for physically contiguous pages (vram) */ 42062306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PTE_PDE_PACKET(ndw); 42162306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = pe; /* dst addr */ 42262306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(pe) & 0xff; 42362306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = flags; /* mask */ 42462306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = 0; 42562306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = value; /* value */ 42662306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = upper_32_bits(value); 42762306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = incr; /* increment size */ 42862306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = 0; 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci pe += ndw * 4; 43162306a36Sopenharmony_ci addr += (ndw / 2) * incr; 43262306a36Sopenharmony_ci count -= ndw / 2; 43362306a36Sopenharmony_ci } 43462306a36Sopenharmony_ci} 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci/** 43762306a36Sopenharmony_ci * cayman_dma_vm_pad_ib - pad the IB to the required number of dw 43862306a36Sopenharmony_ci * 43962306a36Sopenharmony_ci * @ib: indirect buffer to fill with padding 44062306a36Sopenharmony_ci * 44162306a36Sopenharmony_ci */ 44262306a36Sopenharmony_civoid cayman_dma_vm_pad_ib(struct radeon_ib *ib) 44362306a36Sopenharmony_ci{ 44462306a36Sopenharmony_ci while (ib->length_dw & 0x7) 44562306a36Sopenharmony_ci ib->ptr[ib->length_dw++] = DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0); 44662306a36Sopenharmony_ci} 44762306a36Sopenharmony_ci 44862306a36Sopenharmony_civoid cayman_dma_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring, 44962306a36Sopenharmony_ci unsigned vm_id, uint64_t pd_addr) 45062306a36Sopenharmony_ci{ 45162306a36Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 45262306a36Sopenharmony_ci radeon_ring_write(ring, (0xf << 16) | ((VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2)); 45362306a36Sopenharmony_ci radeon_ring_write(ring, pd_addr >> 12); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci /* flush hdp cache */ 45662306a36Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 45762306a36Sopenharmony_ci radeon_ring_write(ring, (0xf << 16) | (HDP_MEM_COHERENCY_FLUSH_CNTL >> 2)); 45862306a36Sopenharmony_ci radeon_ring_write(ring, 1); 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_ci /* bits 0-7 are the VM contexts0-7 */ 46162306a36Sopenharmony_ci radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_SRBM_WRITE, 0, 0, 0)); 46262306a36Sopenharmony_ci radeon_ring_write(ring, (0xf << 16) | (VM_INVALIDATE_REQUEST >> 2)); 46362306a36Sopenharmony_ci radeon_ring_write(ring, 1 << vm_id); 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci /* wait for invalidate to complete */ 46662306a36Sopenharmony_ci radeon_ring_write(ring, DMA_SRBM_READ_PACKET); 46762306a36Sopenharmony_ci radeon_ring_write(ring, (0xff << 20) | (VM_INVALIDATE_REQUEST >> 2)); 46862306a36Sopenharmony_ci radeon_ring_write(ring, 0); /* mask */ 46962306a36Sopenharmony_ci radeon_ring_write(ring, 0); /* value */ 47062306a36Sopenharmony_ci} 47162306a36Sopenharmony_ci 472