162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* Copyright(c) 2023 Advanced Micro Devices, Inc. */
362306a36Sopenharmony_ci
462306a36Sopenharmony_ci#include <linux/interval_tree.h>
562306a36Sopenharmony_ci#include <linux/vfio.h>
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/pds/pds_common.h>
862306a36Sopenharmony_ci#include <linux/pds/pds_core_if.h>
962306a36Sopenharmony_ci#include <linux/pds/pds_adminq.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include "vfio_dev.h"
1262306a36Sopenharmony_ci#include "cmds.h"
1362306a36Sopenharmony_ci#include "dirty.h"
1462306a36Sopenharmony_ci
1562306a36Sopenharmony_ci#define READ_SEQ true
1662306a36Sopenharmony_ci#define WRITE_ACK false
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_cibool pds_vfio_dirty_is_enabled(struct pds_vfio_pci_device *pds_vfio)
1962306a36Sopenharmony_ci{
2062306a36Sopenharmony_ci	return pds_vfio->dirty.is_enabled;
2162306a36Sopenharmony_ci}
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_civoid pds_vfio_dirty_set_enabled(struct pds_vfio_pci_device *pds_vfio)
2462306a36Sopenharmony_ci{
2562306a36Sopenharmony_ci	pds_vfio->dirty.is_enabled = true;
2662306a36Sopenharmony_ci}
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_civoid pds_vfio_dirty_set_disabled(struct pds_vfio_pci_device *pds_vfio)
2962306a36Sopenharmony_ci{
3062306a36Sopenharmony_ci	pds_vfio->dirty.is_enabled = false;
3162306a36Sopenharmony_ci}
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_cistatic void
3462306a36Sopenharmony_cipds_vfio_print_guest_region_info(struct pds_vfio_pci_device *pds_vfio,
3562306a36Sopenharmony_ci				 u8 max_regions)
3662306a36Sopenharmony_ci{
3762306a36Sopenharmony_ci	int len = max_regions * sizeof(struct pds_lm_dirty_region_info);
3862306a36Sopenharmony_ci	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
3962306a36Sopenharmony_ci	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
4062306a36Sopenharmony_ci	struct pds_lm_dirty_region_info *region_info;
4162306a36Sopenharmony_ci	dma_addr_t regions_dma;
4262306a36Sopenharmony_ci	u8 num_regions;
4362306a36Sopenharmony_ci	int err;
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_ci	region_info = kcalloc(max_regions,
4662306a36Sopenharmony_ci			      sizeof(struct pds_lm_dirty_region_info),
4762306a36Sopenharmony_ci			      GFP_KERNEL);
4862306a36Sopenharmony_ci	if (!region_info)
4962306a36Sopenharmony_ci		return;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	regions_dma =
5262306a36Sopenharmony_ci		dma_map_single(pdsc_dev, region_info, len, DMA_FROM_DEVICE);
5362306a36Sopenharmony_ci	if (dma_mapping_error(pdsc_dev, regions_dma))
5462306a36Sopenharmony_ci		goto out_free_region_info;
5562306a36Sopenharmony_ci
5662306a36Sopenharmony_ci	err = pds_vfio_dirty_status_cmd(pds_vfio, regions_dma, &max_regions,
5762306a36Sopenharmony_ci					&num_regions);
5862306a36Sopenharmony_ci	dma_unmap_single(pdsc_dev, regions_dma, len, DMA_FROM_DEVICE);
5962306a36Sopenharmony_ci	if (err)
6062306a36Sopenharmony_ci		goto out_free_region_info;
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci	for (unsigned int i = 0; i < num_regions; i++)
6362306a36Sopenharmony_ci		dev_dbg(&pdev->dev,
6462306a36Sopenharmony_ci			"region_info[%d]: dma_base 0x%llx page_count %u page_size_log2 %u\n",
6562306a36Sopenharmony_ci			i, le64_to_cpu(region_info[i].dma_base),
6662306a36Sopenharmony_ci			le32_to_cpu(region_info[i].page_count),
6762306a36Sopenharmony_ci			region_info[i].page_size_log2);
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ciout_free_region_info:
7062306a36Sopenharmony_ci	kfree(region_info);
7162306a36Sopenharmony_ci}
7262306a36Sopenharmony_ci
7362306a36Sopenharmony_cistatic int pds_vfio_dirty_alloc_bitmaps(struct pds_vfio_dirty *dirty,
7462306a36Sopenharmony_ci					unsigned long bytes)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	unsigned long *host_seq_bmp, *host_ack_bmp;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	host_seq_bmp = vzalloc(bytes);
7962306a36Sopenharmony_ci	if (!host_seq_bmp)
8062306a36Sopenharmony_ci		return -ENOMEM;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	host_ack_bmp = vzalloc(bytes);
8362306a36Sopenharmony_ci	if (!host_ack_bmp) {
8462306a36Sopenharmony_ci		bitmap_free(host_seq_bmp);
8562306a36Sopenharmony_ci		return -ENOMEM;
8662306a36Sopenharmony_ci	}
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	dirty->host_seq.bmp = host_seq_bmp;
8962306a36Sopenharmony_ci	dirty->host_ack.bmp = host_ack_bmp;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci	return 0;
9262306a36Sopenharmony_ci}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_cistatic void pds_vfio_dirty_free_bitmaps(struct pds_vfio_dirty *dirty)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	vfree(dirty->host_seq.bmp);
9762306a36Sopenharmony_ci	vfree(dirty->host_ack.bmp);
9862306a36Sopenharmony_ci	dirty->host_seq.bmp = NULL;
9962306a36Sopenharmony_ci	dirty->host_ack.bmp = NULL;
10062306a36Sopenharmony_ci}
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_cistatic void __pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio,
10362306a36Sopenharmony_ci				      struct pds_vfio_bmp_info *bmp_info)
10462306a36Sopenharmony_ci{
10562306a36Sopenharmony_ci	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
10662306a36Sopenharmony_ci	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	dma_unmap_single(pdsc_dev, bmp_info->sgl_addr,
10962306a36Sopenharmony_ci			 bmp_info->num_sge * sizeof(struct pds_lm_sg_elem),
11062306a36Sopenharmony_ci			 DMA_BIDIRECTIONAL);
11162306a36Sopenharmony_ci	kfree(bmp_info->sgl);
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	bmp_info->num_sge = 0;
11462306a36Sopenharmony_ci	bmp_info->sgl = NULL;
11562306a36Sopenharmony_ci	bmp_info->sgl_addr = 0;
11662306a36Sopenharmony_ci}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_cistatic void pds_vfio_dirty_free_sgl(struct pds_vfio_pci_device *pds_vfio)
11962306a36Sopenharmony_ci{
12062306a36Sopenharmony_ci	if (pds_vfio->dirty.host_seq.sgl)
12162306a36Sopenharmony_ci		__pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_seq);
12262306a36Sopenharmony_ci	if (pds_vfio->dirty.host_ack.sgl)
12362306a36Sopenharmony_ci		__pds_vfio_dirty_free_sgl(pds_vfio, &pds_vfio->dirty.host_ack);
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_cistatic int __pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio,
12762306a36Sopenharmony_ci				      struct pds_vfio_bmp_info *bmp_info,
12862306a36Sopenharmony_ci				      u32 page_count)
12962306a36Sopenharmony_ci{
13062306a36Sopenharmony_ci	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
13162306a36Sopenharmony_ci	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
13262306a36Sopenharmony_ci	struct pds_lm_sg_elem *sgl;
13362306a36Sopenharmony_ci	dma_addr_t sgl_addr;
13462306a36Sopenharmony_ci	size_t sgl_size;
13562306a36Sopenharmony_ci	u32 max_sge;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	max_sge = DIV_ROUND_UP(page_count, PAGE_SIZE * 8);
13862306a36Sopenharmony_ci	sgl_size = max_sge * sizeof(struct pds_lm_sg_elem);
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci	sgl = kzalloc(sgl_size, GFP_KERNEL);
14162306a36Sopenharmony_ci	if (!sgl)
14262306a36Sopenharmony_ci		return -ENOMEM;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	sgl_addr = dma_map_single(pdsc_dev, sgl, sgl_size, DMA_BIDIRECTIONAL);
14562306a36Sopenharmony_ci	if (dma_mapping_error(pdsc_dev, sgl_addr)) {
14662306a36Sopenharmony_ci		kfree(sgl);
14762306a36Sopenharmony_ci		return -EIO;
14862306a36Sopenharmony_ci	}
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	bmp_info->sgl = sgl;
15162306a36Sopenharmony_ci	bmp_info->num_sge = max_sge;
15262306a36Sopenharmony_ci	bmp_info->sgl_addr = sgl_addr;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	return 0;
15562306a36Sopenharmony_ci}
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistatic int pds_vfio_dirty_alloc_sgl(struct pds_vfio_pci_device *pds_vfio,
15862306a36Sopenharmony_ci				    u32 page_count)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
16162306a36Sopenharmony_ci	int err;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_seq,
16462306a36Sopenharmony_ci					 page_count);
16562306a36Sopenharmony_ci	if (err)
16662306a36Sopenharmony_ci		return err;
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	err = __pds_vfio_dirty_alloc_sgl(pds_vfio, &dirty->host_ack,
16962306a36Sopenharmony_ci					 page_count);
17062306a36Sopenharmony_ci	if (err) {
17162306a36Sopenharmony_ci		__pds_vfio_dirty_free_sgl(pds_vfio, &dirty->host_seq);
17262306a36Sopenharmony_ci		return err;
17362306a36Sopenharmony_ci	}
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	return 0;
17662306a36Sopenharmony_ci}
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_cistatic int pds_vfio_dirty_enable(struct pds_vfio_pci_device *pds_vfio,
17962306a36Sopenharmony_ci				 struct rb_root_cached *ranges, u32 nnodes,
18062306a36Sopenharmony_ci				 u64 *page_size)
18162306a36Sopenharmony_ci{
18262306a36Sopenharmony_ci	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
18362306a36Sopenharmony_ci	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
18462306a36Sopenharmony_ci	struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
18562306a36Sopenharmony_ci	u64 region_start, region_size, region_page_size;
18662306a36Sopenharmony_ci	struct pds_lm_dirty_region_info *region_info;
18762306a36Sopenharmony_ci	struct interval_tree_node *node = NULL;
18862306a36Sopenharmony_ci	u8 max_regions = 0, num_regions;
18962306a36Sopenharmony_ci	dma_addr_t regions_dma = 0;
19062306a36Sopenharmony_ci	u32 num_ranges = nnodes;
19162306a36Sopenharmony_ci	u32 page_count;
19262306a36Sopenharmony_ci	u16 len;
19362306a36Sopenharmony_ci	int err;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	dev_dbg(&pdev->dev, "vf%u: Start dirty page tracking\n",
19662306a36Sopenharmony_ci		pds_vfio->vf_id);
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	if (pds_vfio_dirty_is_enabled(pds_vfio))
19962306a36Sopenharmony_ci		return -EINVAL;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	/* find if dirty tracking is disabled, i.e. num_regions == 0 */
20262306a36Sopenharmony_ci	err = pds_vfio_dirty_status_cmd(pds_vfio, 0, &max_regions,
20362306a36Sopenharmony_ci					&num_regions);
20462306a36Sopenharmony_ci	if (err < 0) {
20562306a36Sopenharmony_ci		dev_err(&pdev->dev, "Failed to get dirty status, err %pe\n",
20662306a36Sopenharmony_ci			ERR_PTR(err));
20762306a36Sopenharmony_ci		return err;
20862306a36Sopenharmony_ci	} else if (num_regions) {
20962306a36Sopenharmony_ci		dev_err(&pdev->dev,
21062306a36Sopenharmony_ci			"Dirty tracking already enabled for %d regions\n",
21162306a36Sopenharmony_ci			num_regions);
21262306a36Sopenharmony_ci		return -EEXIST;
21362306a36Sopenharmony_ci	} else if (!max_regions) {
21462306a36Sopenharmony_ci		dev_err(&pdev->dev,
21562306a36Sopenharmony_ci			"Device doesn't support dirty tracking, max_regions %d\n",
21662306a36Sopenharmony_ci			max_regions);
21762306a36Sopenharmony_ci		return -EOPNOTSUPP;
21862306a36Sopenharmony_ci	}
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	/*
22162306a36Sopenharmony_ci	 * Only support 1 region for now. If there are any large gaps in the
22262306a36Sopenharmony_ci	 * VM's address regions, then this would be a waste of memory as we are
22362306a36Sopenharmony_ci	 * generating 2 bitmaps (ack/seq) from the min address to the max
22462306a36Sopenharmony_ci	 * address of the VM's address regions. In the future, if we support
22562306a36Sopenharmony_ci	 * more than one region in the device/driver we can split the bitmaps
22662306a36Sopenharmony_ci	 * on the largest address region gaps. We can do this split up to the
22762306a36Sopenharmony_ci	 * max_regions times returned from the dirty_status command.
22862306a36Sopenharmony_ci	 */
22962306a36Sopenharmony_ci	max_regions = 1;
23062306a36Sopenharmony_ci	if (num_ranges > max_regions) {
23162306a36Sopenharmony_ci		vfio_combine_iova_ranges(ranges, nnodes, max_regions);
23262306a36Sopenharmony_ci		num_ranges = max_regions;
23362306a36Sopenharmony_ci	}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	node = interval_tree_iter_first(ranges, 0, ULONG_MAX);
23662306a36Sopenharmony_ci	if (!node)
23762306a36Sopenharmony_ci		return -EINVAL;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	region_size = node->last - node->start + 1;
24062306a36Sopenharmony_ci	region_start = node->start;
24162306a36Sopenharmony_ci	region_page_size = *page_size;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	len = sizeof(*region_info);
24462306a36Sopenharmony_ci	region_info = kzalloc(len, GFP_KERNEL);
24562306a36Sopenharmony_ci	if (!region_info)
24662306a36Sopenharmony_ci		return -ENOMEM;
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_ci	page_count = DIV_ROUND_UP(region_size, region_page_size);
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	region_info->dma_base = cpu_to_le64(region_start);
25162306a36Sopenharmony_ci	region_info->page_count = cpu_to_le32(page_count);
25262306a36Sopenharmony_ci	region_info->page_size_log2 = ilog2(region_page_size);
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci	regions_dma = dma_map_single(pdsc_dev, (void *)region_info, len,
25562306a36Sopenharmony_ci				     DMA_BIDIRECTIONAL);
25662306a36Sopenharmony_ci	if (dma_mapping_error(pdsc_dev, regions_dma)) {
25762306a36Sopenharmony_ci		err = -ENOMEM;
25862306a36Sopenharmony_ci		goto out_free_region_info;
25962306a36Sopenharmony_ci	}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	err = pds_vfio_dirty_enable_cmd(pds_vfio, regions_dma, max_regions);
26262306a36Sopenharmony_ci	dma_unmap_single(pdsc_dev, regions_dma, len, DMA_BIDIRECTIONAL);
26362306a36Sopenharmony_ci	if (err)
26462306a36Sopenharmony_ci		goto out_free_region_info;
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci	/*
26762306a36Sopenharmony_ci	 * page_count might be adjusted by the device,
26862306a36Sopenharmony_ci	 * update it before freeing region_info DMA
26962306a36Sopenharmony_ci	 */
27062306a36Sopenharmony_ci	page_count = le32_to_cpu(region_info->page_count);
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	dev_dbg(&pdev->dev,
27362306a36Sopenharmony_ci		"region_info: regions_dma 0x%llx dma_base 0x%llx page_count %u page_size_log2 %u\n",
27462306a36Sopenharmony_ci		regions_dma, region_start, page_count,
27562306a36Sopenharmony_ci		(u8)ilog2(region_page_size));
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	err = pds_vfio_dirty_alloc_bitmaps(dirty, page_count / BITS_PER_BYTE);
27862306a36Sopenharmony_ci	if (err) {
27962306a36Sopenharmony_ci		dev_err(&pdev->dev, "Failed to alloc dirty bitmaps: %pe\n",
28062306a36Sopenharmony_ci			ERR_PTR(err));
28162306a36Sopenharmony_ci		goto out_free_region_info;
28262306a36Sopenharmony_ci	}
28362306a36Sopenharmony_ci
28462306a36Sopenharmony_ci	err = pds_vfio_dirty_alloc_sgl(pds_vfio, page_count);
28562306a36Sopenharmony_ci	if (err) {
28662306a36Sopenharmony_ci		dev_err(&pdev->dev, "Failed to alloc dirty sg lists: %pe\n",
28762306a36Sopenharmony_ci			ERR_PTR(err));
28862306a36Sopenharmony_ci		goto out_free_bitmaps;
28962306a36Sopenharmony_ci	}
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	dirty->region_start = region_start;
29262306a36Sopenharmony_ci	dirty->region_size = region_size;
29362306a36Sopenharmony_ci	dirty->region_page_size = region_page_size;
29462306a36Sopenharmony_ci	pds_vfio_dirty_set_enabled(pds_vfio);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	pds_vfio_print_guest_region_info(pds_vfio, max_regions);
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	kfree(region_info);
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci	return 0;
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ciout_free_bitmaps:
30362306a36Sopenharmony_ci	pds_vfio_dirty_free_bitmaps(dirty);
30462306a36Sopenharmony_ciout_free_region_info:
30562306a36Sopenharmony_ci	kfree(region_info);
30662306a36Sopenharmony_ci	return err;
30762306a36Sopenharmony_ci}
30862306a36Sopenharmony_ci
30962306a36Sopenharmony_civoid pds_vfio_dirty_disable(struct pds_vfio_pci_device *pds_vfio, bool send_cmd)
31062306a36Sopenharmony_ci{
31162306a36Sopenharmony_ci	if (pds_vfio_dirty_is_enabled(pds_vfio)) {
31262306a36Sopenharmony_ci		pds_vfio_dirty_set_disabled(pds_vfio);
31362306a36Sopenharmony_ci		if (send_cmd)
31462306a36Sopenharmony_ci			pds_vfio_dirty_disable_cmd(pds_vfio);
31562306a36Sopenharmony_ci		pds_vfio_dirty_free_sgl(pds_vfio);
31662306a36Sopenharmony_ci		pds_vfio_dirty_free_bitmaps(&pds_vfio->dirty);
31762306a36Sopenharmony_ci	}
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	if (send_cmd)
32062306a36Sopenharmony_ci		pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_NONE);
32162306a36Sopenharmony_ci}
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_cistatic int pds_vfio_dirty_seq_ack(struct pds_vfio_pci_device *pds_vfio,
32462306a36Sopenharmony_ci				  struct pds_vfio_bmp_info *bmp_info,
32562306a36Sopenharmony_ci				  u32 offset, u32 bmp_bytes, bool read_seq)
32662306a36Sopenharmony_ci{
32762306a36Sopenharmony_ci	const char *bmp_type_str = read_seq ? "read_seq" : "write_ack";
32862306a36Sopenharmony_ci	u8 dma_dir = read_seq ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
32962306a36Sopenharmony_ci	struct pci_dev *pdev = pds_vfio->vfio_coredev.pdev;
33062306a36Sopenharmony_ci	struct device *pdsc_dev = &pci_physfn(pdev)->dev;
33162306a36Sopenharmony_ci	unsigned long long npages;
33262306a36Sopenharmony_ci	struct sg_table sg_table;
33362306a36Sopenharmony_ci	struct scatterlist *sg;
33462306a36Sopenharmony_ci	struct page **pages;
33562306a36Sopenharmony_ci	u32 page_offset;
33662306a36Sopenharmony_ci	const void *bmp;
33762306a36Sopenharmony_ci	size_t size;
33862306a36Sopenharmony_ci	u16 num_sge;
33962306a36Sopenharmony_ci	int err;
34062306a36Sopenharmony_ci	int i;
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci	bmp = (void *)((u64)bmp_info->bmp + offset);
34362306a36Sopenharmony_ci	page_offset = offset_in_page(bmp);
34462306a36Sopenharmony_ci	bmp -= page_offset;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	/*
34762306a36Sopenharmony_ci	 * Start and end of bitmap section to seq/ack might not be page
34862306a36Sopenharmony_ci	 * aligned, so use the page_offset to account for that so there
34962306a36Sopenharmony_ci	 * will be enough pages to represent the bmp_bytes
35062306a36Sopenharmony_ci	 */
35162306a36Sopenharmony_ci	npages = DIV_ROUND_UP_ULL(bmp_bytes + page_offset, PAGE_SIZE);
35262306a36Sopenharmony_ci	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
35362306a36Sopenharmony_ci	if (!pages)
35462306a36Sopenharmony_ci		return -ENOMEM;
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	for (unsigned long long i = 0; i < npages; i++) {
35762306a36Sopenharmony_ci		struct page *page = vmalloc_to_page(bmp);
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci		if (!page) {
36062306a36Sopenharmony_ci			err = -EFAULT;
36162306a36Sopenharmony_ci			goto out_free_pages;
36262306a36Sopenharmony_ci		}
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci		pages[i] = page;
36562306a36Sopenharmony_ci		bmp += PAGE_SIZE;
36662306a36Sopenharmony_ci	}
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	err = sg_alloc_table_from_pages(&sg_table, pages, npages, page_offset,
36962306a36Sopenharmony_ci					bmp_bytes, GFP_KERNEL);
37062306a36Sopenharmony_ci	if (err)
37162306a36Sopenharmony_ci		goto out_free_pages;
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	err = dma_map_sgtable(pdsc_dev, &sg_table, dma_dir, 0);
37462306a36Sopenharmony_ci	if (err)
37562306a36Sopenharmony_ci		goto out_free_sg_table;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	for_each_sgtable_dma_sg(&sg_table, sg, i) {
37862306a36Sopenharmony_ci		struct pds_lm_sg_elem *sg_elem = &bmp_info->sgl[i];
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci		sg_elem->addr = cpu_to_le64(sg_dma_address(sg));
38162306a36Sopenharmony_ci		sg_elem->len = cpu_to_le32(sg_dma_len(sg));
38262306a36Sopenharmony_ci	}
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	num_sge = sg_table.nents;
38562306a36Sopenharmony_ci	size = num_sge * sizeof(struct pds_lm_sg_elem);
38662306a36Sopenharmony_ci	dma_sync_single_for_device(pdsc_dev, bmp_info->sgl_addr, size, dma_dir);
38762306a36Sopenharmony_ci	err = pds_vfio_dirty_seq_ack_cmd(pds_vfio, bmp_info->sgl_addr, num_sge,
38862306a36Sopenharmony_ci					 offset, bmp_bytes, read_seq);
38962306a36Sopenharmony_ci	if (err)
39062306a36Sopenharmony_ci		dev_err(&pdev->dev,
39162306a36Sopenharmony_ci			"Dirty bitmap %s failed offset %u bmp_bytes %u num_sge %u DMA 0x%llx: %pe\n",
39262306a36Sopenharmony_ci			bmp_type_str, offset, bmp_bytes,
39362306a36Sopenharmony_ci			num_sge, bmp_info->sgl_addr, ERR_PTR(err));
39462306a36Sopenharmony_ci	dma_sync_single_for_cpu(pdsc_dev, bmp_info->sgl_addr, size, dma_dir);
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	dma_unmap_sgtable(pdsc_dev, &sg_table, dma_dir, 0);
39762306a36Sopenharmony_ciout_free_sg_table:
39862306a36Sopenharmony_ci	sg_free_table(&sg_table);
39962306a36Sopenharmony_ciout_free_pages:
40062306a36Sopenharmony_ci	kfree(pages);
40162306a36Sopenharmony_ci
40262306a36Sopenharmony_ci	return err;
40362306a36Sopenharmony_ci}
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_cistatic int pds_vfio_dirty_write_ack(struct pds_vfio_pci_device *pds_vfio,
40662306a36Sopenharmony_ci				    u32 offset, u32 len)
40762306a36Sopenharmony_ci{
40862306a36Sopenharmony_ci	return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_ack,
40962306a36Sopenharmony_ci				      offset, len, WRITE_ACK);
41062306a36Sopenharmony_ci}
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_cistatic int pds_vfio_dirty_read_seq(struct pds_vfio_pci_device *pds_vfio,
41362306a36Sopenharmony_ci				   u32 offset, u32 len)
41462306a36Sopenharmony_ci{
41562306a36Sopenharmony_ci	return pds_vfio_dirty_seq_ack(pds_vfio, &pds_vfio->dirty.host_seq,
41662306a36Sopenharmony_ci				      offset, len, READ_SEQ);
41762306a36Sopenharmony_ci}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cistatic int pds_vfio_dirty_process_bitmaps(struct pds_vfio_pci_device *pds_vfio,
42062306a36Sopenharmony_ci					  struct iova_bitmap *dirty_bitmap,
42162306a36Sopenharmony_ci					  u32 bmp_offset, u32 len_bytes)
42262306a36Sopenharmony_ci{
42362306a36Sopenharmony_ci	u64 page_size = pds_vfio->dirty.region_page_size;
42462306a36Sopenharmony_ci	u64 region_start = pds_vfio->dirty.region_start;
42562306a36Sopenharmony_ci	u32 bmp_offset_bit;
42662306a36Sopenharmony_ci	__le64 *seq, *ack;
42762306a36Sopenharmony_ci	int dword_count;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	dword_count = len_bytes / sizeof(u64);
43062306a36Sopenharmony_ci	seq = (__le64 *)((u64)pds_vfio->dirty.host_seq.bmp + bmp_offset);
43162306a36Sopenharmony_ci	ack = (__le64 *)((u64)pds_vfio->dirty.host_ack.bmp + bmp_offset);
43262306a36Sopenharmony_ci	bmp_offset_bit = bmp_offset * 8;
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	for (int i = 0; i < dword_count; i++) {
43562306a36Sopenharmony_ci		u64 xor = le64_to_cpu(seq[i]) ^ le64_to_cpu(ack[i]);
43662306a36Sopenharmony_ci
43762306a36Sopenharmony_ci		/* prepare for next write_ack call */
43862306a36Sopenharmony_ci		ack[i] = seq[i];
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci		for (u8 bit_i = 0; bit_i < BITS_PER_TYPE(u64); ++bit_i) {
44162306a36Sopenharmony_ci			if (xor & BIT(bit_i)) {
44262306a36Sopenharmony_ci				u64 abs_bit_i = bmp_offset_bit +
44362306a36Sopenharmony_ci						i * BITS_PER_TYPE(u64) + bit_i;
44462306a36Sopenharmony_ci				u64 addr = abs_bit_i * page_size + region_start;
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci				iova_bitmap_set(dirty_bitmap, addr, page_size);
44762306a36Sopenharmony_ci			}
44862306a36Sopenharmony_ci		}
44962306a36Sopenharmony_ci	}
45062306a36Sopenharmony_ci
45162306a36Sopenharmony_ci	return 0;
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_cistatic int pds_vfio_dirty_sync(struct pds_vfio_pci_device *pds_vfio,
45562306a36Sopenharmony_ci			       struct iova_bitmap *dirty_bitmap,
45662306a36Sopenharmony_ci			       unsigned long iova, unsigned long length)
45762306a36Sopenharmony_ci{
45862306a36Sopenharmony_ci	struct device *dev = &pds_vfio->vfio_coredev.pdev->dev;
45962306a36Sopenharmony_ci	struct pds_vfio_dirty *dirty = &pds_vfio->dirty;
46062306a36Sopenharmony_ci	u64 bmp_offset, bmp_bytes;
46162306a36Sopenharmony_ci	u64 bitmap_size, pages;
46262306a36Sopenharmony_ci	int err;
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci	dev_dbg(dev, "vf%u: Get dirty page bitmap\n", pds_vfio->vf_id);
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci	if (!pds_vfio_dirty_is_enabled(pds_vfio)) {
46762306a36Sopenharmony_ci		dev_err(dev, "vf%u: Sync failed, dirty tracking is disabled\n",
46862306a36Sopenharmony_ci			pds_vfio->vf_id);
46962306a36Sopenharmony_ci		return -EINVAL;
47062306a36Sopenharmony_ci	}
47162306a36Sopenharmony_ci
47262306a36Sopenharmony_ci	pages = DIV_ROUND_UP(length, pds_vfio->dirty.region_page_size);
47362306a36Sopenharmony_ci	bitmap_size =
47462306a36Sopenharmony_ci		round_up(pages, sizeof(u64) * BITS_PER_BYTE) / BITS_PER_BYTE;
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	dev_dbg(dev,
47762306a36Sopenharmony_ci		"vf%u: iova 0x%lx length %lu page_size %llu pages %llu bitmap_size %llu\n",
47862306a36Sopenharmony_ci		pds_vfio->vf_id, iova, length, pds_vfio->dirty.region_page_size,
47962306a36Sopenharmony_ci		pages, bitmap_size);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	if (!length || ((iova - dirty->region_start + length) > dirty->region_size)) {
48262306a36Sopenharmony_ci		dev_err(dev, "Invalid iova 0x%lx and/or length 0x%lx to sync\n",
48362306a36Sopenharmony_ci			iova, length);
48462306a36Sopenharmony_ci		return -EINVAL;
48562306a36Sopenharmony_ci	}
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	/* bitmap is modified in 64 bit chunks */
48862306a36Sopenharmony_ci	bmp_bytes = ALIGN(DIV_ROUND_UP(length / dirty->region_page_size,
48962306a36Sopenharmony_ci				       sizeof(u64)),
49062306a36Sopenharmony_ci			  sizeof(u64));
49162306a36Sopenharmony_ci	if (bmp_bytes != bitmap_size) {
49262306a36Sopenharmony_ci		dev_err(dev,
49362306a36Sopenharmony_ci			"Calculated bitmap bytes %llu not equal to bitmap size %llu\n",
49462306a36Sopenharmony_ci			bmp_bytes, bitmap_size);
49562306a36Sopenharmony_ci		return -EINVAL;
49662306a36Sopenharmony_ci	}
49762306a36Sopenharmony_ci
49862306a36Sopenharmony_ci	bmp_offset = DIV_ROUND_UP((iova - dirty->region_start) /
49962306a36Sopenharmony_ci				  dirty->region_page_size, sizeof(u64));
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	dev_dbg(dev,
50262306a36Sopenharmony_ci		"Syncing dirty bitmap, iova 0x%lx length 0x%lx, bmp_offset %llu bmp_bytes %llu\n",
50362306a36Sopenharmony_ci		iova, length, bmp_offset, bmp_bytes);
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	err = pds_vfio_dirty_read_seq(pds_vfio, bmp_offset, bmp_bytes);
50662306a36Sopenharmony_ci	if (err)
50762306a36Sopenharmony_ci		return err;
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	err = pds_vfio_dirty_process_bitmaps(pds_vfio, dirty_bitmap, bmp_offset,
51062306a36Sopenharmony_ci					     bmp_bytes);
51162306a36Sopenharmony_ci	if (err)
51262306a36Sopenharmony_ci		return err;
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	err = pds_vfio_dirty_write_ack(pds_vfio, bmp_offset, bmp_bytes);
51562306a36Sopenharmony_ci	if (err)
51662306a36Sopenharmony_ci		return err;
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci	return 0;
51962306a36Sopenharmony_ci}
52062306a36Sopenharmony_ci
52162306a36Sopenharmony_ciint pds_vfio_dma_logging_report(struct vfio_device *vdev, unsigned long iova,
52262306a36Sopenharmony_ci				unsigned long length, struct iova_bitmap *dirty)
52362306a36Sopenharmony_ci{
52462306a36Sopenharmony_ci	struct pds_vfio_pci_device *pds_vfio =
52562306a36Sopenharmony_ci		container_of(vdev, struct pds_vfio_pci_device,
52662306a36Sopenharmony_ci			     vfio_coredev.vdev);
52762306a36Sopenharmony_ci	int err;
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	mutex_lock(&pds_vfio->state_mutex);
53062306a36Sopenharmony_ci	err = pds_vfio_dirty_sync(pds_vfio, dirty, iova, length);
53162306a36Sopenharmony_ci	pds_vfio_state_mutex_unlock(pds_vfio);
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	return err;
53462306a36Sopenharmony_ci}
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_ciint pds_vfio_dma_logging_start(struct vfio_device *vdev,
53762306a36Sopenharmony_ci			       struct rb_root_cached *ranges, u32 nnodes,
53862306a36Sopenharmony_ci			       u64 *page_size)
53962306a36Sopenharmony_ci{
54062306a36Sopenharmony_ci	struct pds_vfio_pci_device *pds_vfio =
54162306a36Sopenharmony_ci		container_of(vdev, struct pds_vfio_pci_device,
54262306a36Sopenharmony_ci			     vfio_coredev.vdev);
54362306a36Sopenharmony_ci	int err;
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	mutex_lock(&pds_vfio->state_mutex);
54662306a36Sopenharmony_ci	pds_vfio_send_host_vf_lm_status_cmd(pds_vfio, PDS_LM_STA_IN_PROGRESS);
54762306a36Sopenharmony_ci	err = pds_vfio_dirty_enable(pds_vfio, ranges, nnodes, page_size);
54862306a36Sopenharmony_ci	pds_vfio_state_mutex_unlock(pds_vfio);
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci	return err;
55162306a36Sopenharmony_ci}
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ciint pds_vfio_dma_logging_stop(struct vfio_device *vdev)
55462306a36Sopenharmony_ci{
55562306a36Sopenharmony_ci	struct pds_vfio_pci_device *pds_vfio =
55662306a36Sopenharmony_ci		container_of(vdev, struct pds_vfio_pci_device,
55762306a36Sopenharmony_ci			     vfio_coredev.vdev);
55862306a36Sopenharmony_ci
55962306a36Sopenharmony_ci	mutex_lock(&pds_vfio->state_mutex);
56062306a36Sopenharmony_ci	pds_vfio_dirty_disable(pds_vfio, true);
56162306a36Sopenharmony_ci	pds_vfio_state_mutex_unlock(pds_vfio);
56262306a36Sopenharmony_ci
56362306a36Sopenharmony_ci	return 0;
56462306a36Sopenharmony_ci}
565