162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci#include <linux/prefetch.h> 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci/** 562306a36Sopenharmony_ci * iommu_fill_pdir - Insert coalesced scatter/gather chunks into the I/O Pdir. 662306a36Sopenharmony_ci * @ioc: The I/O Controller. 762306a36Sopenharmony_ci * @startsg: The scatter/gather list of coalesced chunks. 862306a36Sopenharmony_ci * @nents: The number of entries in the scatter/gather list. 962306a36Sopenharmony_ci * @hint: The DMA Hint. 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * This function inserts the coalesced scatter/gather list chunks into the 1262306a36Sopenharmony_ci * I/O Controller's I/O Pdir. 1362306a36Sopenharmony_ci */ 1462306a36Sopenharmony_cistatic inline unsigned int 1562306a36Sopenharmony_ciiommu_fill_pdir(struct ioc *ioc, struct scatterlist *startsg, int nents, 1662306a36Sopenharmony_ci unsigned long hint, 1762306a36Sopenharmony_ci void (*iommu_io_pdir_entry)(__le64 *, space_t, unsigned long, 1862306a36Sopenharmony_ci unsigned long)) 1962306a36Sopenharmony_ci{ 2062306a36Sopenharmony_ci struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ 2162306a36Sopenharmony_ci unsigned int n_mappings = 0; 2262306a36Sopenharmony_ci unsigned long dma_offset = 0, dma_len = 0; 2362306a36Sopenharmony_ci __le64 *pdirp = NULL; 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_ci /* Horrible hack. For efficiency's sake, dma_sg starts one 2662306a36Sopenharmony_ci * entry below the true start (it is immediately incremented 2762306a36Sopenharmony_ci * in the loop) */ 2862306a36Sopenharmony_ci dma_sg--; 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci while (nents-- > 0) { 3162306a36Sopenharmony_ci unsigned long vaddr; 3262306a36Sopenharmony_ci long size; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci DBG_RUN_SG(" %d : %08lx %p/%05x\n", nents, 3562306a36Sopenharmony_ci (unsigned long)sg_dma_address(startsg), 3662306a36Sopenharmony_ci sg_virt(startsg), startsg->length 3762306a36Sopenharmony_ci ); 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci /* 4162306a36Sopenharmony_ci ** Look for the start of a new DMA stream 4262306a36Sopenharmony_ci */ 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci if (sg_dma_address(startsg) & PIDE_FLAG) { 4562306a36Sopenharmony_ci u32 pide = sg_dma_address(startsg) & ~PIDE_FLAG; 4662306a36Sopenharmony_ci 4762306a36Sopenharmony_ci BUG_ON(pdirp && (dma_len != sg_dma_len(dma_sg))); 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci dma_sg++; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci dma_len = sg_dma_len(startsg); 5262306a36Sopenharmony_ci sg_dma_len(startsg) = 0; 5362306a36Sopenharmony_ci dma_offset = (unsigned long) pide & ~IOVP_MASK; 5462306a36Sopenharmony_ci n_mappings++; 5562306a36Sopenharmony_ci#if defined(ZX1_SUPPORT) 5662306a36Sopenharmony_ci /* Pluto IOMMU IO Virt Address is not zero based */ 5762306a36Sopenharmony_ci sg_dma_address(dma_sg) = pide | ioc->ibase; 5862306a36Sopenharmony_ci#else 5962306a36Sopenharmony_ci /* SBA, ccio, and dino are zero based. 6062306a36Sopenharmony_ci * Trying to save a few CPU cycles for most users. 6162306a36Sopenharmony_ci */ 6262306a36Sopenharmony_ci sg_dma_address(dma_sg) = pide; 6362306a36Sopenharmony_ci#endif 6462306a36Sopenharmony_ci pdirp = &(ioc->pdir_base[pide >> IOVP_SHIFT]); 6562306a36Sopenharmony_ci prefetchw(pdirp); 6662306a36Sopenharmony_ci } 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci BUG_ON(pdirp == NULL); 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci vaddr = (unsigned long)sg_virt(startsg); 7162306a36Sopenharmony_ci sg_dma_len(dma_sg) += startsg->length; 7262306a36Sopenharmony_ci size = startsg->length + dma_offset; 7362306a36Sopenharmony_ci dma_offset = 0; 7462306a36Sopenharmony_ci#ifdef IOMMU_MAP_STATS 7562306a36Sopenharmony_ci ioc->msg_pages += startsg->length >> IOVP_SHIFT; 7662306a36Sopenharmony_ci#endif 7762306a36Sopenharmony_ci do { 7862306a36Sopenharmony_ci iommu_io_pdir_entry(pdirp, KERNEL_SPACE, 7962306a36Sopenharmony_ci vaddr, hint); 8062306a36Sopenharmony_ci vaddr += IOVP_SIZE; 8162306a36Sopenharmony_ci size -= IOVP_SIZE; 8262306a36Sopenharmony_ci pdirp++; 8362306a36Sopenharmony_ci } while(unlikely(size > 0)); 8462306a36Sopenharmony_ci startsg++; 8562306a36Sopenharmony_ci } 8662306a36Sopenharmony_ci return(n_mappings); 8762306a36Sopenharmony_ci} 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci/* 9162306a36Sopenharmony_ci** First pass is to walk the SG list and determine where the breaks are 9262306a36Sopenharmony_ci** in the DMA stream. Allocates PDIR entries but does not fill them. 9362306a36Sopenharmony_ci** Returns the number of DMA chunks. 9462306a36Sopenharmony_ci** 9562306a36Sopenharmony_ci** Doing the fill separate from the coalescing/allocation keeps the 9662306a36Sopenharmony_ci** code simpler. Future enhancement could make one pass through 9762306a36Sopenharmony_ci** the sglist do both. 9862306a36Sopenharmony_ci*/ 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_cistatic inline unsigned int 10162306a36Sopenharmony_ciiommu_coalesce_chunks(struct ioc *ioc, struct device *dev, 10262306a36Sopenharmony_ci struct scatterlist *startsg, int nents, 10362306a36Sopenharmony_ci int (*iommu_alloc_range)(struct ioc *, struct device *, size_t)) 10462306a36Sopenharmony_ci{ 10562306a36Sopenharmony_ci struct scatterlist *contig_sg; /* contig chunk head */ 10662306a36Sopenharmony_ci unsigned long dma_offset, dma_len; /* start/len of DMA stream */ 10762306a36Sopenharmony_ci unsigned int n_mappings = 0; 10862306a36Sopenharmony_ci unsigned int max_seg_size = min(dma_get_max_seg_size(dev), 10962306a36Sopenharmony_ci (unsigned)DMA_CHUNK_SIZE); 11062306a36Sopenharmony_ci unsigned int max_seg_boundary = dma_get_seg_boundary(dev) + 1; 11162306a36Sopenharmony_ci if (max_seg_boundary) /* check if the addition above didn't overflow */ 11262306a36Sopenharmony_ci max_seg_size = min(max_seg_size, max_seg_boundary); 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci while (nents > 0) { 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci /* 11762306a36Sopenharmony_ci ** Prepare for first/next DMA stream 11862306a36Sopenharmony_ci */ 11962306a36Sopenharmony_ci contig_sg = startsg; 12062306a36Sopenharmony_ci dma_len = startsg->length; 12162306a36Sopenharmony_ci dma_offset = startsg->offset; 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci /* PARANOID: clear entries */ 12462306a36Sopenharmony_ci sg_dma_address(startsg) = 0; 12562306a36Sopenharmony_ci sg_dma_len(startsg) = 0; 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci /* 12862306a36Sopenharmony_ci ** This loop terminates one iteration "early" since 12962306a36Sopenharmony_ci ** it's always looking one "ahead". 13062306a36Sopenharmony_ci */ 13162306a36Sopenharmony_ci while(--nents > 0) { 13262306a36Sopenharmony_ci unsigned long prev_end, sg_start; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci prev_end = (unsigned long)sg_virt(startsg) + 13562306a36Sopenharmony_ci startsg->length; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci startsg++; 13862306a36Sopenharmony_ci sg_start = (unsigned long)sg_virt(startsg); 13962306a36Sopenharmony_ci 14062306a36Sopenharmony_ci /* PARANOID: clear entries */ 14162306a36Sopenharmony_ci sg_dma_address(startsg) = 0; 14262306a36Sopenharmony_ci sg_dma_len(startsg) = 0; 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci /* 14562306a36Sopenharmony_ci ** First make sure current dma stream won't 14662306a36Sopenharmony_ci ** exceed max_seg_size if we coalesce the 14762306a36Sopenharmony_ci ** next entry. 14862306a36Sopenharmony_ci */ 14962306a36Sopenharmony_ci if (unlikely(ALIGN(dma_len + dma_offset + startsg->length, IOVP_SIZE) > 15062306a36Sopenharmony_ci max_seg_size)) 15162306a36Sopenharmony_ci break; 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_ci /* 15462306a36Sopenharmony_ci * Next see if we can append the next chunk (i.e. 15562306a36Sopenharmony_ci * it must end on one page and begin on another, or 15662306a36Sopenharmony_ci * it must start on the same address as the previous 15762306a36Sopenharmony_ci * entry ended. 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_ci if (unlikely((prev_end != sg_start) || 16062306a36Sopenharmony_ci ((prev_end | sg_start) & ~PAGE_MASK))) 16162306a36Sopenharmony_ci break; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci dma_len += startsg->length; 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci /* 16762306a36Sopenharmony_ci ** End of DMA Stream 16862306a36Sopenharmony_ci ** Terminate last VCONTIG block. 16962306a36Sopenharmony_ci ** Allocate space for DMA stream. 17062306a36Sopenharmony_ci */ 17162306a36Sopenharmony_ci sg_dma_len(contig_sg) = dma_len; 17262306a36Sopenharmony_ci dma_len = ALIGN(dma_len + dma_offset, IOVP_SIZE); 17362306a36Sopenharmony_ci sg_dma_address(contig_sg) = 17462306a36Sopenharmony_ci PIDE_FLAG 17562306a36Sopenharmony_ci | (iommu_alloc_range(ioc, dev, dma_len) << IOVP_SHIFT) 17662306a36Sopenharmony_ci | dma_offset; 17762306a36Sopenharmony_ci n_mappings++; 17862306a36Sopenharmony_ci } 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci return n_mappings; 18162306a36Sopenharmony_ci} 18262306a36Sopenharmony_ci 183