162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright 2017 IBM Corp.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/hugetlb.h>
762306a36Sopenharmony_ci#include <linux/sched/mm.h>
862306a36Sopenharmony_ci#include <asm/opal-api.h>
962306a36Sopenharmony_ci#include <asm/pnv-pci.h>
1062306a36Sopenharmony_ci#include <misc/cxllib.h>
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci#include "cxl.h"
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci#define CXL_INVALID_DRA                 ~0ull
1562306a36Sopenharmony_ci#define CXL_DUMMY_READ_SIZE             128
1662306a36Sopenharmony_ci#define CXL_DUMMY_READ_ALIGN            8
1762306a36Sopenharmony_ci#define CXL_CAPI_WINDOW_START           0x2000000000000ull
1862306a36Sopenharmony_ci#define CXL_CAPI_WINDOW_LOG_SIZE        48
1962306a36Sopenharmony_ci#define CXL_XSL_CONFIG_CURRENT_VERSION  CXL_XSL_CONFIG_VERSION1
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cibool cxllib_slot_is_supported(struct pci_dev *dev, unsigned long flags)
2362306a36Sopenharmony_ci{
2462306a36Sopenharmony_ci	int rc;
2562306a36Sopenharmony_ci	u32 phb_index;
2662306a36Sopenharmony_ci	u64 chip_id, capp_unit_id;
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	/* No flags currently supported */
2962306a36Sopenharmony_ci	if (flags)
3062306a36Sopenharmony_ci		return false;
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci	if (!cpu_has_feature(CPU_FTR_HVMODE))
3362306a36Sopenharmony_ci		return false;
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	if (!cxl_is_power9())
3662306a36Sopenharmony_ci		return false;
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	if (cxl_slot_is_switched(dev))
3962306a36Sopenharmony_ci		return false;
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci	/* on p9, some pci slots are not connected to a CAPP unit */
4262306a36Sopenharmony_ci	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
4362306a36Sopenharmony_ci	if (rc)
4462306a36Sopenharmony_ci		return false;
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci	return true;
4762306a36Sopenharmony_ci}
4862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_slot_is_supported);
4962306a36Sopenharmony_ci
5062306a36Sopenharmony_cistatic DEFINE_MUTEX(dra_mutex);
5162306a36Sopenharmony_cistatic u64 dummy_read_addr = CXL_INVALID_DRA;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistatic int allocate_dummy_read_buf(void)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	u64 buf, vaddr;
5662306a36Sopenharmony_ci	size_t buf_size;
5762306a36Sopenharmony_ci
5862306a36Sopenharmony_ci	/*
5962306a36Sopenharmony_ci	 * Dummy read buffer is 128-byte long, aligned on a
6062306a36Sopenharmony_ci	 * 256-byte boundary and we need the physical address.
6162306a36Sopenharmony_ci	 */
6262306a36Sopenharmony_ci	buf_size = CXL_DUMMY_READ_SIZE + (1ull << CXL_DUMMY_READ_ALIGN);
6362306a36Sopenharmony_ci	buf = (u64) kzalloc(buf_size, GFP_KERNEL);
6462306a36Sopenharmony_ci	if (!buf)
6562306a36Sopenharmony_ci		return -ENOMEM;
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_ci	vaddr = (buf + (1ull << CXL_DUMMY_READ_ALIGN) - 1) &
6862306a36Sopenharmony_ci					(~0ull << CXL_DUMMY_READ_ALIGN);
6962306a36Sopenharmony_ci
7062306a36Sopenharmony_ci	WARN((vaddr + CXL_DUMMY_READ_SIZE) > (buf + buf_size),
7162306a36Sopenharmony_ci		"Dummy read buffer alignment issue");
7262306a36Sopenharmony_ci	dummy_read_addr = virt_to_phys((void *) vaddr);
7362306a36Sopenharmony_ci	return 0;
7462306a36Sopenharmony_ci}
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ciint cxllib_get_xsl_config(struct pci_dev *dev, struct cxllib_xsl_config *cfg)
7762306a36Sopenharmony_ci{
7862306a36Sopenharmony_ci	int rc;
7962306a36Sopenharmony_ci	u32 phb_index;
8062306a36Sopenharmony_ci	u64 chip_id, capp_unit_id;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	if (!cpu_has_feature(CPU_FTR_HVMODE))
8362306a36Sopenharmony_ci		return -EINVAL;
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	mutex_lock(&dra_mutex);
8662306a36Sopenharmony_ci	if (dummy_read_addr == CXL_INVALID_DRA) {
8762306a36Sopenharmony_ci		rc = allocate_dummy_read_buf();
8862306a36Sopenharmony_ci		if (rc) {
8962306a36Sopenharmony_ci			mutex_unlock(&dra_mutex);
9062306a36Sopenharmony_ci			return rc;
9162306a36Sopenharmony_ci		}
9262306a36Sopenharmony_ci	}
9362306a36Sopenharmony_ci	mutex_unlock(&dra_mutex);
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	rc = cxl_calc_capp_routing(dev, &chip_id, &phb_index, &capp_unit_id);
9662306a36Sopenharmony_ci	if (rc)
9762306a36Sopenharmony_ci		return rc;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, &cfg->dsnctl);
10062306a36Sopenharmony_ci	if (rc)
10162306a36Sopenharmony_ci		return rc;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	cfg->version  = CXL_XSL_CONFIG_CURRENT_VERSION;
10462306a36Sopenharmony_ci	cfg->log_bar_size = CXL_CAPI_WINDOW_LOG_SIZE;
10562306a36Sopenharmony_ci	cfg->bar_addr = CXL_CAPI_WINDOW_START;
10662306a36Sopenharmony_ci	cfg->dra = dummy_read_addr;
10762306a36Sopenharmony_ci	return 0;
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_get_xsl_config);
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ciint cxllib_switch_phb_mode(struct pci_dev *dev, enum cxllib_mode mode,
11262306a36Sopenharmony_ci			unsigned long flags)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	int rc = 0;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	if (!cpu_has_feature(CPU_FTR_HVMODE))
11762306a36Sopenharmony_ci		return -EINVAL;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	switch (mode) {
12062306a36Sopenharmony_ci	case CXL_MODE_PCI:
12162306a36Sopenharmony_ci		/*
12262306a36Sopenharmony_ci		 * We currently don't support going back to PCI mode
12362306a36Sopenharmony_ci		 * However, we'll turn the invalidations off, so that
12462306a36Sopenharmony_ci		 * the firmware doesn't have to ack them and can do
12562306a36Sopenharmony_ci		 * things like reset, etc.. with no worries.
12662306a36Sopenharmony_ci		 * So always return EPERM (can't go back to PCI) or
12762306a36Sopenharmony_ci		 * EBUSY if we couldn't even turn off snooping
12862306a36Sopenharmony_ci		 */
12962306a36Sopenharmony_ci		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_OFF);
13062306a36Sopenharmony_ci		if (rc)
13162306a36Sopenharmony_ci			rc = -EBUSY;
13262306a36Sopenharmony_ci		else
13362306a36Sopenharmony_ci			rc = -EPERM;
13462306a36Sopenharmony_ci		break;
13562306a36Sopenharmony_ci	case CXL_MODE_CXL:
13662306a36Sopenharmony_ci		/* DMA only supported on TVT1 for the time being */
13762306a36Sopenharmony_ci		if (flags != CXL_MODE_DMA_TVT1)
13862306a36Sopenharmony_ci			return -EINVAL;
13962306a36Sopenharmony_ci		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_DMA_TVT1);
14062306a36Sopenharmony_ci		if (rc)
14162306a36Sopenharmony_ci			return rc;
14262306a36Sopenharmony_ci		rc = pnv_phb_to_cxl_mode(dev, OPAL_PHB_CAPI_MODE_SNOOP_ON);
14362306a36Sopenharmony_ci		break;
14462306a36Sopenharmony_ci	default:
14562306a36Sopenharmony_ci		rc = -EINVAL;
14662306a36Sopenharmony_ci	}
14762306a36Sopenharmony_ci	return rc;
14862306a36Sopenharmony_ci}
14962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_switch_phb_mode);
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci/*
15262306a36Sopenharmony_ci * When switching the PHB to capi mode, the TVT#1 entry for
15362306a36Sopenharmony_ci * the Partitionable Endpoint is set in bypass mode, like
15462306a36Sopenharmony_ci * in PCI mode.
15562306a36Sopenharmony_ci * Configure the device dma to use TVT#1, which is done
15662306a36Sopenharmony_ci * by calling dma_set_mask() with a mask large enough.
15762306a36Sopenharmony_ci */
15862306a36Sopenharmony_ciint cxllib_set_device_dma(struct pci_dev *dev, unsigned long flags)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	int rc;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	if (flags)
16362306a36Sopenharmony_ci		return -EINVAL;
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci	rc = dma_set_mask(&dev->dev, DMA_BIT_MASK(64));
16662306a36Sopenharmony_ci	return rc;
16762306a36Sopenharmony_ci}
16862306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_set_device_dma);
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ciint cxllib_get_PE_attributes(struct task_struct *task,
17162306a36Sopenharmony_ci			     unsigned long translation_mode,
17262306a36Sopenharmony_ci			     struct cxllib_pe_attributes *attr)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	if (translation_mode != CXL_TRANSLATED_MODE &&
17562306a36Sopenharmony_ci		translation_mode != CXL_REAL_MODE)
17662306a36Sopenharmony_ci		return -EINVAL;
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	attr->sr = cxl_calculate_sr(false,
17962306a36Sopenharmony_ci				task == NULL,
18062306a36Sopenharmony_ci				translation_mode == CXL_REAL_MODE,
18162306a36Sopenharmony_ci				true);
18262306a36Sopenharmony_ci	attr->lpid = mfspr(SPRN_LPID);
18362306a36Sopenharmony_ci	if (task) {
18462306a36Sopenharmony_ci		struct mm_struct *mm = get_task_mm(task);
18562306a36Sopenharmony_ci		if (mm == NULL)
18662306a36Sopenharmony_ci			return -EINVAL;
18762306a36Sopenharmony_ci		/*
18862306a36Sopenharmony_ci		 * Caller is keeping a reference on mm_users for as long
18962306a36Sopenharmony_ci		 * as XSL uses the memory context
19062306a36Sopenharmony_ci		 */
19162306a36Sopenharmony_ci		attr->pid = mm->context.id;
19262306a36Sopenharmony_ci		mmput(mm);
19362306a36Sopenharmony_ci		attr->tid = task->thread.tidr;
19462306a36Sopenharmony_ci	} else {
19562306a36Sopenharmony_ci		attr->pid = 0;
19662306a36Sopenharmony_ci		attr->tid = 0;
19762306a36Sopenharmony_ci	}
19862306a36Sopenharmony_ci	return 0;
19962306a36Sopenharmony_ci}
20062306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_cistatic int get_vma_info(struct mm_struct *mm, u64 addr,
20362306a36Sopenharmony_ci			u64 *vma_start, u64 *vma_end,
20462306a36Sopenharmony_ci			unsigned long *page_size)
20562306a36Sopenharmony_ci{
20662306a36Sopenharmony_ci	struct vm_area_struct *vma = NULL;
20762306a36Sopenharmony_ci	int rc = 0;
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_ci	mmap_read_lock(mm);
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	vma = find_vma(mm, addr);
21262306a36Sopenharmony_ci	if (!vma) {
21362306a36Sopenharmony_ci		rc = -EFAULT;
21462306a36Sopenharmony_ci		goto out;
21562306a36Sopenharmony_ci	}
21662306a36Sopenharmony_ci	*page_size = vma_kernel_pagesize(vma);
21762306a36Sopenharmony_ci	*vma_start = vma->vm_start;
21862306a36Sopenharmony_ci	*vma_end = vma->vm_end;
21962306a36Sopenharmony_ciout:
22062306a36Sopenharmony_ci	mmap_read_unlock(mm);
22162306a36Sopenharmony_ci	return rc;
22262306a36Sopenharmony_ci}
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ciint cxllib_handle_fault(struct mm_struct *mm, u64 addr, u64 size, u64 flags)
22562306a36Sopenharmony_ci{
22662306a36Sopenharmony_ci	int rc;
22762306a36Sopenharmony_ci	u64 dar, vma_start, vma_end;
22862306a36Sopenharmony_ci	unsigned long page_size;
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	if (mm == NULL)
23162306a36Sopenharmony_ci		return -EFAULT;
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci	/*
23462306a36Sopenharmony_ci	 * The buffer we have to process can extend over several pages
23562306a36Sopenharmony_ci	 * and may also cover several VMAs.
23662306a36Sopenharmony_ci	 * We iterate over all the pages. The page size could vary
23762306a36Sopenharmony_ci	 * between VMAs.
23862306a36Sopenharmony_ci	 */
23962306a36Sopenharmony_ci	rc = get_vma_info(mm, addr, &vma_start, &vma_end, &page_size);
24062306a36Sopenharmony_ci	if (rc)
24162306a36Sopenharmony_ci		return rc;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	for (dar = (addr & ~(page_size - 1)); dar < (addr + size);
24462306a36Sopenharmony_ci	     dar += page_size) {
24562306a36Sopenharmony_ci		if (dar < vma_start || dar >= vma_end) {
24662306a36Sopenharmony_ci			/*
24762306a36Sopenharmony_ci			 * We don't hold mm->mmap_lock while iterating, since
24862306a36Sopenharmony_ci			 * the lock is required by one of the lower-level page
24962306a36Sopenharmony_ci			 * fault processing functions and it could
25062306a36Sopenharmony_ci			 * create a deadlock.
25162306a36Sopenharmony_ci			 *
25262306a36Sopenharmony_ci			 * It means the VMAs can be altered between 2
25362306a36Sopenharmony_ci			 * loop iterations and we could theoretically
25462306a36Sopenharmony_ci			 * miss a page (however unlikely). But that's
25562306a36Sopenharmony_ci			 * not really a problem, as the driver will
25662306a36Sopenharmony_ci			 * retry access, get another page fault on the
25762306a36Sopenharmony_ci			 * missing page and call us again.
25862306a36Sopenharmony_ci			 */
25962306a36Sopenharmony_ci			rc = get_vma_info(mm, dar, &vma_start, &vma_end,
26062306a36Sopenharmony_ci					&page_size);
26162306a36Sopenharmony_ci			if (rc)
26262306a36Sopenharmony_ci				return rc;
26362306a36Sopenharmony_ci		}
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci		rc = cxl_handle_mm_fault(mm, flags, dar);
26662306a36Sopenharmony_ci		if (rc)
26762306a36Sopenharmony_ci			return -EFAULT;
26862306a36Sopenharmony_ci	}
26962306a36Sopenharmony_ci	return 0;
27062306a36Sopenharmony_ci}
27162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(cxllib_handle_fault);
272