162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright 2017 IBM Corp. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/hugetlb.h> 762306a36Sopenharmony_ci#include <linux/sched/mm.h> 862306a36Sopenharmony_ci#include <asm/opal-api.h> 962306a36Sopenharmony_ci#include <asm/pnv-pci.h> 1062306a36Sopenharmony_ci#include <misc/cxllib.h> 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include "cxl.h" 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci#define CXL_INVALID_DRA ~0ull 1562306a36Sopenharmony_ci#define CXL_DUMMY_READ_SIZE 128 1662306a36Sopenharmony_ci#define CXL_DUMMY_READ_ALIGN 8 1762306a36Sopenharmony_ci#define CXL_CAPI_WINDOW_START 0x2000000000000ull 1862306a36Sopenharmony_ci#define CXL_CAPI_WINDOW_LOG_SIZE 48 1962306a36Sopenharmony_ci#define CXL_XSL_CONFIG_CURRENT_VERSION CXL_XSL_CONFIG_VERSION1 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cibool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags) 2362306a36Sopenharmony_ci{ 2462306a36Sopenharmony_ci int rc; 2562306a36Sopenharmony_ci u32 phb_index; 2662306a36Sopenharmony_ci u64 chip_id, capp_unit_id; 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci /* No flags currently supported */ 2962306a36Sopenharmony_ci if (flags) 3062306a36Sopenharmony_ci return false; 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci if (!cpu_has_feature(CPU_FTR_HVMODE)) 3362306a36Sopenharmony_ci return false; 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ci if (!cxl_is_power9()) 3662306a36Sopenharmony_ci return false; 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci if (cxl_slot_is_switched(dev)) 3962306a36Sopenharmony_ci return false; 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci /* on p9, some pci slots are not connected to a CAPP unit */ 4262306a36Sopenharmony_ci rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 4362306a36Sopenharmony_ci if (rc) 4462306a36Sopenharmony_ci return false; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci return true; 4762306a36Sopenharmony_ci} 4862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_slot_is_supported); 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_cistatic DEFINE_MUTEX(dra_mutex); 5162306a36Sopenharmony_cistatic u64 dummy_read_addr = CXL_INVALID_DRA; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic int allocate_dummy_read_buf(void) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci u64 buf, vaddr; 5662306a36Sopenharmony_ci size_t buf_size; 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_ci /* 5962306a36Sopenharmony_ci * Dummy read buffer is 128-byte long, aligned on a 6062306a36Sopenharmony_ci * 256-byte boundary and we need the physical address. 6162306a36Sopenharmony_ci */ 6262306a36Sopenharmony_ci buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN); 6362306a36Sopenharmony_ci buf = (u64) kzalloc(buf_size, GFP_KERNEL); 6462306a36Sopenharmony_ci if (!buf) 6562306a36Sopenharmony_ci return -ENOMEM; 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) & 6862306a36Sopenharmony_ci (~0ull << CXL_DUMMY_READ_ALIGN); 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size), 7162306a36Sopenharmony_ci "Dummy read buffer alignment issue"); 7262306a36Sopenharmony_ci dummy_read_addr = virt_to_phys((void *) vaddr); 7362306a36Sopenharmony_ci return 0; 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ciint cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg) 7762306a36Sopenharmony_ci{ 7862306a36Sopenharmony_ci int rc; 7962306a36Sopenharmony_ci u32 phb_index; 8062306a36Sopenharmony_ci u64 chip_id, capp_unit_id; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci if (!cpu_has_feature(CPU_FTR_HVMODE)) 8362306a36Sopenharmony_ci return -EINVAL; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci mutex_lock(&dra_mutex); 8662306a36Sopenharmony_ci if (dummy_read_addr == CXL_INVALID_DRA) { 8762306a36Sopenharmony_ci rc = allocate_dummy_read_buf(); 8862306a36Sopenharmony_ci if (rc) { 8962306a36Sopenharmony_ci mutex_unlock(&dra_mutex); 9062306a36Sopenharmony_ci return rc; 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci } 9362306a36Sopenharmony_ci mutex_unlock(&dra_mutex); 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id); 9662306a36Sopenharmony_ci if (rc) 9762306a36Sopenharmony_ci return rc; 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl); 10062306a36Sopenharmony_ci if (rc) 10162306a36Sopenharmony_ci return rc; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci cfg->version = CXL_XSL_CONFIG_CURRENT_VERSION; 10462306a36Sopenharmony_ci cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE; 10562306a36Sopenharmony_ci cfg->bar_addr = CXL_CAPI_WINDOW_START; 10662306a36Sopenharmony_ci cfg->dra = dummy_read_addr; 10762306a36Sopenharmony_ci return 0; 10862306a36Sopenharmony_ci} 10962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_get_xsl_config); 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ciint cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode, 11262306a36Sopenharmony_ci unsigned long flags) 11362306a36Sopenharmony_ci{ 11462306a36Sopenharmony_ci int rc = 0; 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci if (!cpu_has_feature(CPU_FTR_HVMODE)) 11762306a36Sopenharmony_ci return -EINVAL; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci switch (mode) { 12062306a36Sopenharmony_ci case CXL_MODE_PCI: 12162306a36Sopenharmony_ci /* 12262306a36Sopenharmony_ci * We currently don't support going back to PCI mode 12362306a36Sopenharmony_ci * However, we'll turn the invalidations off, so that 12462306a36Sopenharmony_ci * the firmware doesn't have to ack them and can do 12562306a36Sopenharmony_ci * things like reset, etc.. with no worries. 12662306a36Sopenharmony_ci * So always return EPERM (can't go back to PCI) or 12762306a36Sopenharmony_ci * EBUSY if we couldn't even turn off snooping 12862306a36Sopenharmony_ci */ 12962306a36Sopenharmony_ci rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF); 13062306a36Sopenharmony_ci if (rc) 13162306a36Sopenharmony_ci rc = -EBUSY; 13262306a36Sopenharmony_ci else 13362306a36Sopenharmony_ci rc = -EPERM; 13462306a36Sopenharmony_ci break; 13562306a36Sopenharmony_ci case CXL_MODE_CXL: 13662306a36Sopenharmony_ci /* DMA only supported on TVT1 for the time being */ 13762306a36Sopenharmony_ci if (flags != CXL_MODE_DMA_TVT1) 13862306a36Sopenharmony_ci return -EINVAL; 13962306a36Sopenharmony_ci rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1); 14062306a36Sopenharmony_ci if (rc) 14162306a36Sopenharmony_ci return rc; 14262306a36Sopenharmony_ci rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON); 14362306a36Sopenharmony_ci break; 14462306a36Sopenharmony_ci default: 14562306a36Sopenharmony_ci rc = -EINVAL; 14662306a36Sopenharmony_ci } 14762306a36Sopenharmony_ci return rc; 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_switch_phb_mode); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci/* 15262306a36Sopenharmony_ci * When switching the PHB to capi mode, the TVT#1 entry for 15362306a36Sopenharmony_ci * the Partitionable Endpoint is set in bypass mode, like 15462306a36Sopenharmony_ci * in PCI mode. 15562306a36Sopenharmony_ci * Configure the device dma to use TVT#1, which is done 15662306a36Sopenharmony_ci * by calling dma_set_mask() with a mask large enough. 15762306a36Sopenharmony_ci */ 15862306a36Sopenharmony_ciint cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci int rc; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci if (flags) 16362306a36Sopenharmony_ci return -EINVAL; 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64)); 16662306a36Sopenharmony_ci return rc; 16762306a36Sopenharmony_ci} 16862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_set_device_dma); 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ciint cxllib_get_PE_attributes(struct task_struct *task, 17162306a36Sopenharmony_ci unsigned long translation_mode, 17262306a36Sopenharmony_ci struct cxllib_pe_attributes *attr) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci if (translation_mode != CXL_TRANSLATED_MODE && 17562306a36Sopenharmony_ci translation_mode != CXL_REAL_MODE) 17662306a36Sopenharmony_ci return -EINVAL; 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci attr->sr = cxl_calculate_sr(false, 17962306a36Sopenharmony_ci task == NULL, 18062306a36Sopenharmony_ci translation_mode == CXL_REAL_MODE, 18162306a36Sopenharmony_ci true); 18262306a36Sopenharmony_ci attr->lpid = mfspr(SPRN_LPID); 18362306a36Sopenharmony_ci if (task) { 18462306a36Sopenharmony_ci struct mm_struct *mm = get_task_mm(task); 18562306a36Sopenharmony_ci if (mm == NULL) 18662306a36Sopenharmony_ci return -EINVAL; 18762306a36Sopenharmony_ci /* 18862306a36Sopenharmony_ci * Caller is keeping a reference on mm_users for as long 18962306a36Sopenharmony_ci * as XSL uses the memory context 19062306a36Sopenharmony_ci */ 19162306a36Sopenharmony_ci attr->pid = mm->context.id; 19262306a36Sopenharmony_ci mmput(mm); 19362306a36Sopenharmony_ci attr->tid = task->thread.tidr; 19462306a36Sopenharmony_ci } else { 19562306a36Sopenharmony_ci attr->pid = 0; 19662306a36Sopenharmony_ci attr->tid = 0; 19762306a36Sopenharmony_ci } 19862306a36Sopenharmony_ci return 0; 19962306a36Sopenharmony_ci} 20062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_get_PE_attributes); 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_cistatic int get_vma_info(struct mm_struct *mm, u64 addr, 20362306a36Sopenharmony_ci u64 *vma_start, u64 *vma_end, 20462306a36Sopenharmony_ci unsigned long *page_size) 20562306a36Sopenharmony_ci{ 20662306a36Sopenharmony_ci struct vm_area_struct *vma = NULL; 20762306a36Sopenharmony_ci int rc = 0; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci mmap_read_lock(mm); 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci vma = find_vma(mm, addr); 21262306a36Sopenharmony_ci if (!vma) { 21362306a36Sopenharmony_ci rc = -EFAULT; 21462306a36Sopenharmony_ci goto out; 21562306a36Sopenharmony_ci } 21662306a36Sopenharmony_ci *page_size = vma_kernel_pagesize(vma); 21762306a36Sopenharmony_ci *vma_start = vma->vm_start; 21862306a36Sopenharmony_ci *vma_end = vma->vm_end; 21962306a36Sopenharmony_ciout: 22062306a36Sopenharmony_ci mmap_read_unlock(mm); 22162306a36Sopenharmony_ci return rc; 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ciint cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags) 22562306a36Sopenharmony_ci{ 22662306a36Sopenharmony_ci int rc; 22762306a36Sopenharmony_ci u64 dar, vma_start, vma_end; 22862306a36Sopenharmony_ci unsigned long page_size; 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci if (mm == NULL) 23162306a36Sopenharmony_ci return -EFAULT; 23262306a36Sopenharmony_ci 23362306a36Sopenharmony_ci /* 23462306a36Sopenharmony_ci * The buffer we have to process can extend over several pages 23562306a36Sopenharmony_ci * and may also cover several VMAs. 23662306a36Sopenharmony_ci * We iterate over all the pages. The page size could vary 23762306a36Sopenharmony_ci * between VMAs. 23862306a36Sopenharmony_ci */ 23962306a36Sopenharmony_ci rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size); 24062306a36Sopenharmony_ci if (rc) 24162306a36Sopenharmony_ci return rc; 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci for (dar = (addr & ~(page_size - 1)); dar < (addr + size); 24462306a36Sopenharmony_ci dar += page_size) { 24562306a36Sopenharmony_ci if (dar < vma_start || dar >= vma_end) { 24662306a36Sopenharmony_ci /* 24762306a36Sopenharmony_ci * We don't hold mm->mmap_lock while iterating, since 24862306a36Sopenharmony_ci * the lock is required by one of the lower-level page 24962306a36Sopenharmony_ci * fault processing functions and it could 25062306a36Sopenharmony_ci * create a deadlock. 25162306a36Sopenharmony_ci * 25262306a36Sopenharmony_ci * It means the VMAs can be altered between 2 25362306a36Sopenharmony_ci * loop iterations and we could theoretically 25462306a36Sopenharmony_ci * miss a page (however unlikely). But that's 25562306a36Sopenharmony_ci * not really a problem, as the driver will 25662306a36Sopenharmony_ci * retry access, get another page fault on the 25762306a36Sopenharmony_ci * missing page and call us again. 25862306a36Sopenharmony_ci */ 25962306a36Sopenharmony_ci rc = get_vma_info(mm, dar, &vma_start, &vma_end, 26062306a36Sopenharmony_ci &page_size); 26162306a36Sopenharmony_ci if (rc) 26262306a36Sopenharmony_ci return rc; 26362306a36Sopenharmony_ci } 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci rc = cxl_handle_mm_fault(mm, flags, dar); 26662306a36Sopenharmony_ci if (rc) 26762306a36Sopenharmony_ci return -EFAULT; 26862306a36Sopenharmony_ci } 26962306a36Sopenharmony_ci return 0; 27062306a36Sopenharmony_ci} 27162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_handle_fault); 272