18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * NVM Express device driver
48c2ecf20Sopenharmony_ci * Copyright (c) 2011-2014, Intel Corporation.
58c2ecf20Sopenharmony_ci */
68c2ecf20Sopenharmony_ci
78c2ecf20Sopenharmony_ci#include <linux/acpi.h>
88c2ecf20Sopenharmony_ci#include <linux/aer.h>
98c2ecf20Sopenharmony_ci#include <linux/async.h>
108c2ecf20Sopenharmony_ci#include <linux/blkdev.h>
118c2ecf20Sopenharmony_ci#include <linux/blk-mq.h>
128c2ecf20Sopenharmony_ci#include <linux/blk-mq-pci.h>
138c2ecf20Sopenharmony_ci#include <linux/dmi.h>
148c2ecf20Sopenharmony_ci#include <linux/init.h>
158c2ecf20Sopenharmony_ci#include <linux/interrupt.h>
168c2ecf20Sopenharmony_ci#include <linux/io.h>
178c2ecf20Sopenharmony_ci#include <linux/mm.h>
188c2ecf20Sopenharmony_ci#include <linux/module.h>
198c2ecf20Sopenharmony_ci#include <linux/mutex.h>
208c2ecf20Sopenharmony_ci#include <linux/once.h>
218c2ecf20Sopenharmony_ci#include <linux/pci.h>
228c2ecf20Sopenharmony_ci#include <linux/suspend.h>
238c2ecf20Sopenharmony_ci#include <linux/t10-pi.h>
248c2ecf20Sopenharmony_ci#include <linux/types.h>
258c2ecf20Sopenharmony_ci#include <linux/io-64-nonatomic-lo-hi.h>
268c2ecf20Sopenharmony_ci#include <linux/io-64-nonatomic-hi-lo.h>
278c2ecf20Sopenharmony_ci#include <linux/sed-opal.h>
288c2ecf20Sopenharmony_ci#include <linux/pci-p2pdma.h>
298c2ecf20Sopenharmony_ci
308c2ecf20Sopenharmony_ci#include "trace.h"
318c2ecf20Sopenharmony_ci#include "nvme.h"
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#define SQ_SIZE(q)	((q)->q_depth << (q)->sqes)
348c2ecf20Sopenharmony_ci#define CQ_SIZE(q)	((q)->q_depth * sizeof(struct nvme_completion))
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci#define SGES_PER_PAGE	(NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc))
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci/*
398c2ecf20Sopenharmony_ci * These can be higher, but we need to ensure that any command doesn't
408c2ecf20Sopenharmony_ci * require an sg allocation that needs more than a page of data.
418c2ecf20Sopenharmony_ci */
428c2ecf20Sopenharmony_ci#define NVME_MAX_KB_SZ	4096
438c2ecf20Sopenharmony_ci#define NVME_MAX_SEGS	127
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_cistatic int use_threaded_interrupts;
468c2ecf20Sopenharmony_cimodule_param(use_threaded_interrupts, int, 0);
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cistatic bool use_cmb_sqes = true;
498c2ecf20Sopenharmony_cimodule_param(use_cmb_sqes, bool, 0444);
508c2ecf20Sopenharmony_ciMODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes");
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_cistatic unsigned int max_host_mem_size_mb = 128;
538c2ecf20Sopenharmony_cimodule_param(max_host_mem_size_mb, uint, 0444);
548c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_host_mem_size_mb,
558c2ecf20Sopenharmony_ci	"Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
568c2ecf20Sopenharmony_ci
578c2ecf20Sopenharmony_cistatic unsigned int sgl_threshold = SZ_32K;
588c2ecf20Sopenharmony_cimodule_param(sgl_threshold, uint, 0644);
598c2ecf20Sopenharmony_ciMODULE_PARM_DESC(sgl_threshold,
608c2ecf20Sopenharmony_ci		"Use SGLs when average request segment size is larger or equal to "
618c2ecf20Sopenharmony_ci		"this size. Use 0 to disable SGLs.");
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_cistatic int io_queue_depth_set(const char *val, const struct kernel_param *kp);
648c2ecf20Sopenharmony_cistatic const struct kernel_param_ops io_queue_depth_ops = {
658c2ecf20Sopenharmony_ci	.set = io_queue_depth_set,
668c2ecf20Sopenharmony_ci	.get = param_get_uint,
678c2ecf20Sopenharmony_ci};
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_cistatic unsigned int io_queue_depth = 1024;
708c2ecf20Sopenharmony_cimodule_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
718c2ecf20Sopenharmony_ciMODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_cistatic int io_queue_count_set(const char *val, const struct kernel_param *kp)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	unsigned int n;
768c2ecf20Sopenharmony_ci	int ret;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	ret = kstrtouint(val, 10, &n);
798c2ecf20Sopenharmony_ci	if (ret != 0 || n > num_possible_cpus())
808c2ecf20Sopenharmony_ci		return -EINVAL;
818c2ecf20Sopenharmony_ci	return param_set_uint(val, kp);
828c2ecf20Sopenharmony_ci}
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_cistatic const struct kernel_param_ops io_queue_count_ops = {
858c2ecf20Sopenharmony_ci	.set = io_queue_count_set,
868c2ecf20Sopenharmony_ci	.get = param_get_uint,
878c2ecf20Sopenharmony_ci};
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_cistatic unsigned int write_queues;
908c2ecf20Sopenharmony_cimodule_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644);
918c2ecf20Sopenharmony_ciMODULE_PARM_DESC(write_queues,
928c2ecf20Sopenharmony_ci	"Number of queues to use for writes. If not set, reads and writes "
938c2ecf20Sopenharmony_ci	"will share a queue set.");
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_cistatic unsigned int poll_queues;
968c2ecf20Sopenharmony_cimodule_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
978c2ecf20Sopenharmony_ciMODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_cistatic bool noacpi;
1008c2ecf20Sopenharmony_cimodule_param(noacpi, bool, 0444);
1018c2ecf20Sopenharmony_ciMODULE_PARM_DESC(noacpi, "disable acpi bios quirks");
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_cistruct nvme_dev;
1048c2ecf20Sopenharmony_cistruct nvme_queue;
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_cistatic void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
1078c2ecf20Sopenharmony_cistatic bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci/*
1108c2ecf20Sopenharmony_ci * Represents an NVM Express device.  Each nvme_dev is a PCI function.
1118c2ecf20Sopenharmony_ci */
1128c2ecf20Sopenharmony_cistruct nvme_dev {
1138c2ecf20Sopenharmony_ci	struct nvme_queue *queues;
1148c2ecf20Sopenharmony_ci	struct blk_mq_tag_set tagset;
1158c2ecf20Sopenharmony_ci	struct blk_mq_tag_set admin_tagset;
1168c2ecf20Sopenharmony_ci	u32 __iomem *dbs;
1178c2ecf20Sopenharmony_ci	struct device *dev;
1188c2ecf20Sopenharmony_ci	struct dma_pool *prp_page_pool;
1198c2ecf20Sopenharmony_ci	struct dma_pool *prp_small_pool;
1208c2ecf20Sopenharmony_ci	unsigned online_queues;
1218c2ecf20Sopenharmony_ci	unsigned max_qid;
1228c2ecf20Sopenharmony_ci	unsigned io_queues[HCTX_MAX_TYPES];
1238c2ecf20Sopenharmony_ci	unsigned int num_vecs;
1248c2ecf20Sopenharmony_ci	u32 q_depth;
1258c2ecf20Sopenharmony_ci	int io_sqes;
1268c2ecf20Sopenharmony_ci	u32 db_stride;
1278c2ecf20Sopenharmony_ci	void __iomem *bar;
1288c2ecf20Sopenharmony_ci	unsigned long bar_mapped_size;
1298c2ecf20Sopenharmony_ci	struct work_struct remove_work;
1308c2ecf20Sopenharmony_ci	struct mutex shutdown_lock;
1318c2ecf20Sopenharmony_ci	bool subsystem;
1328c2ecf20Sopenharmony_ci	u64 cmb_size;
1338c2ecf20Sopenharmony_ci	bool cmb_use_sqes;
1348c2ecf20Sopenharmony_ci	u32 cmbsz;
1358c2ecf20Sopenharmony_ci	u32 cmbloc;
1368c2ecf20Sopenharmony_ci	struct nvme_ctrl ctrl;
1378c2ecf20Sopenharmony_ci	u32 last_ps;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	mempool_t *iod_mempool;
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	/* shadow doorbell buffer support: */
1428c2ecf20Sopenharmony_ci	__le32 *dbbuf_dbs;
1438c2ecf20Sopenharmony_ci	dma_addr_t dbbuf_dbs_dma_addr;
1448c2ecf20Sopenharmony_ci	__le32 *dbbuf_eis;
1458c2ecf20Sopenharmony_ci	dma_addr_t dbbuf_eis_dma_addr;
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_ci	/* host memory buffer support: */
1488c2ecf20Sopenharmony_ci	u64 host_mem_size;
1498c2ecf20Sopenharmony_ci	u32 nr_host_mem_descs;
1508c2ecf20Sopenharmony_ci	dma_addr_t host_mem_descs_dma;
1518c2ecf20Sopenharmony_ci	struct nvme_host_mem_buf_desc *host_mem_descs;
1528c2ecf20Sopenharmony_ci	void **host_mem_desc_bufs;
1538c2ecf20Sopenharmony_ci	unsigned int nr_allocated_queues;
1548c2ecf20Sopenharmony_ci	unsigned int nr_write_queues;
1558c2ecf20Sopenharmony_ci	unsigned int nr_poll_queues;
1568c2ecf20Sopenharmony_ci};
1578c2ecf20Sopenharmony_ci
1588c2ecf20Sopenharmony_cistatic int io_queue_depth_set(const char *val, const struct kernel_param *kp)
1598c2ecf20Sopenharmony_ci{
1608c2ecf20Sopenharmony_ci	int ret;
1618c2ecf20Sopenharmony_ci	u32 n;
1628c2ecf20Sopenharmony_ci
1638c2ecf20Sopenharmony_ci	ret = kstrtou32(val, 10, &n);
1648c2ecf20Sopenharmony_ci	if (ret != 0 || n < 2)
1658c2ecf20Sopenharmony_ci		return -EINVAL;
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ci	return param_set_uint(val, kp);
1688c2ecf20Sopenharmony_ci}
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_cistatic inline unsigned int sq_idx(unsigned int qid, u32 stride)
1718c2ecf20Sopenharmony_ci{
1728c2ecf20Sopenharmony_ci	return qid * 2 * stride;
1738c2ecf20Sopenharmony_ci}
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_cistatic inline unsigned int cq_idx(unsigned int qid, u32 stride)
1768c2ecf20Sopenharmony_ci{
1778c2ecf20Sopenharmony_ci	return (qid * 2 + 1) * stride;
1788c2ecf20Sopenharmony_ci}
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_cistatic inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
1818c2ecf20Sopenharmony_ci{
1828c2ecf20Sopenharmony_ci	return container_of(ctrl, struct nvme_dev, ctrl);
1838c2ecf20Sopenharmony_ci}
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_ci/*
1868c2ecf20Sopenharmony_ci * An NVM Express queue.  Each device has at least two (one for admin
1878c2ecf20Sopenharmony_ci * commands and one for I/O commands).
1888c2ecf20Sopenharmony_ci */
1898c2ecf20Sopenharmony_cistruct nvme_queue {
1908c2ecf20Sopenharmony_ci	struct nvme_dev *dev;
1918c2ecf20Sopenharmony_ci	spinlock_t sq_lock;
1928c2ecf20Sopenharmony_ci	void *sq_cmds;
1938c2ecf20Sopenharmony_ci	 /* only used for poll queues: */
1948c2ecf20Sopenharmony_ci	spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
1958c2ecf20Sopenharmony_ci	struct nvme_completion *cqes;
1968c2ecf20Sopenharmony_ci	dma_addr_t sq_dma_addr;
1978c2ecf20Sopenharmony_ci	dma_addr_t cq_dma_addr;
1988c2ecf20Sopenharmony_ci	u32 __iomem *q_db;
1998c2ecf20Sopenharmony_ci	u32 q_depth;
2008c2ecf20Sopenharmony_ci	u16 cq_vector;
2018c2ecf20Sopenharmony_ci	u16 sq_tail;
2028c2ecf20Sopenharmony_ci	u16 last_sq_tail;
2038c2ecf20Sopenharmony_ci	u16 cq_head;
2048c2ecf20Sopenharmony_ci	u16 qid;
2058c2ecf20Sopenharmony_ci	u8 cq_phase;
2068c2ecf20Sopenharmony_ci	u8 sqes;
2078c2ecf20Sopenharmony_ci	unsigned long flags;
2088c2ecf20Sopenharmony_ci#define NVMEQ_ENABLED		0
2098c2ecf20Sopenharmony_ci#define NVMEQ_SQ_CMB		1
2108c2ecf20Sopenharmony_ci#define NVMEQ_DELETE_ERROR	2
2118c2ecf20Sopenharmony_ci#define NVMEQ_POLLED		3
2128c2ecf20Sopenharmony_ci	__le32 *dbbuf_sq_db;
2138c2ecf20Sopenharmony_ci	__le32 *dbbuf_cq_db;
2148c2ecf20Sopenharmony_ci	__le32 *dbbuf_sq_ei;
2158c2ecf20Sopenharmony_ci	__le32 *dbbuf_cq_ei;
2168c2ecf20Sopenharmony_ci	struct completion delete_done;
2178c2ecf20Sopenharmony_ci};
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci/*
2208c2ecf20Sopenharmony_ci * The nvme_iod describes the data in an I/O.
2218c2ecf20Sopenharmony_ci *
2228c2ecf20Sopenharmony_ci * The sg pointer contains the list of PRP/SGL chunk allocations in addition
2238c2ecf20Sopenharmony_ci * to the actual struct scatterlist.
2248c2ecf20Sopenharmony_ci */
2258c2ecf20Sopenharmony_cistruct nvme_iod {
2268c2ecf20Sopenharmony_ci	struct nvme_request req;
2278c2ecf20Sopenharmony_ci	struct nvme_command cmd;
2288c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq;
2298c2ecf20Sopenharmony_ci	bool use_sgl;
2308c2ecf20Sopenharmony_ci	int aborted;
2318c2ecf20Sopenharmony_ci	int npages;		/* In the PRP list. 0 means small pool in use */
2328c2ecf20Sopenharmony_ci	int nents;		/* Used in scatterlist */
2338c2ecf20Sopenharmony_ci	dma_addr_t first_dma;
2348c2ecf20Sopenharmony_ci	unsigned int dma_len;	/* length of single DMA segment mapping */
2358c2ecf20Sopenharmony_ci	dma_addr_t meta_dma;
2368c2ecf20Sopenharmony_ci	struct scatterlist *sg;
2378c2ecf20Sopenharmony_ci};
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_cistatic inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
2408c2ecf20Sopenharmony_ci{
2418c2ecf20Sopenharmony_ci	return dev->nr_allocated_queues * 8 * dev->db_stride;
2428c2ecf20Sopenharmony_ci}
2438c2ecf20Sopenharmony_ci
2448c2ecf20Sopenharmony_cistatic int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
2458c2ecf20Sopenharmony_ci{
2468c2ecf20Sopenharmony_ci	unsigned int mem_size = nvme_dbbuf_size(dev);
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	if (dev->dbbuf_dbs)
2498c2ecf20Sopenharmony_ci		return 0;
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_ci	dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size,
2528c2ecf20Sopenharmony_ci					    &dev->dbbuf_dbs_dma_addr,
2538c2ecf20Sopenharmony_ci					    GFP_KERNEL);
2548c2ecf20Sopenharmony_ci	if (!dev->dbbuf_dbs)
2558c2ecf20Sopenharmony_ci		return -ENOMEM;
2568c2ecf20Sopenharmony_ci	dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size,
2578c2ecf20Sopenharmony_ci					    &dev->dbbuf_eis_dma_addr,
2588c2ecf20Sopenharmony_ci					    GFP_KERNEL);
2598c2ecf20Sopenharmony_ci	if (!dev->dbbuf_eis) {
2608c2ecf20Sopenharmony_ci		dma_free_coherent(dev->dev, mem_size,
2618c2ecf20Sopenharmony_ci				  dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
2628c2ecf20Sopenharmony_ci		dev->dbbuf_dbs = NULL;
2638c2ecf20Sopenharmony_ci		return -ENOMEM;
2648c2ecf20Sopenharmony_ci	}
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	return 0;
2678c2ecf20Sopenharmony_ci}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_cistatic void nvme_dbbuf_dma_free(struct nvme_dev *dev)
2708c2ecf20Sopenharmony_ci{
2718c2ecf20Sopenharmony_ci	unsigned int mem_size = nvme_dbbuf_size(dev);
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	if (dev->dbbuf_dbs) {
2748c2ecf20Sopenharmony_ci		dma_free_coherent(dev->dev, mem_size,
2758c2ecf20Sopenharmony_ci				  dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr);
2768c2ecf20Sopenharmony_ci		dev->dbbuf_dbs = NULL;
2778c2ecf20Sopenharmony_ci	}
2788c2ecf20Sopenharmony_ci	if (dev->dbbuf_eis) {
2798c2ecf20Sopenharmony_ci		dma_free_coherent(dev->dev, mem_size,
2808c2ecf20Sopenharmony_ci				  dev->dbbuf_eis, dev->dbbuf_eis_dma_addr);
2818c2ecf20Sopenharmony_ci		dev->dbbuf_eis = NULL;
2828c2ecf20Sopenharmony_ci	}
2838c2ecf20Sopenharmony_ci}
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_cistatic void nvme_dbbuf_init(struct nvme_dev *dev,
2868c2ecf20Sopenharmony_ci			    struct nvme_queue *nvmeq, int qid)
2878c2ecf20Sopenharmony_ci{
2888c2ecf20Sopenharmony_ci	if (!dev->dbbuf_dbs || !qid)
2898c2ecf20Sopenharmony_ci		return;
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)];
2928c2ecf20Sopenharmony_ci	nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)];
2938c2ecf20Sopenharmony_ci	nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)];
2948c2ecf20Sopenharmony_ci	nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)];
2958c2ecf20Sopenharmony_ci}
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_cistatic void nvme_dbbuf_free(struct nvme_queue *nvmeq)
2988c2ecf20Sopenharmony_ci{
2998c2ecf20Sopenharmony_ci	if (!nvmeq->qid)
3008c2ecf20Sopenharmony_ci		return;
3018c2ecf20Sopenharmony_ci
3028c2ecf20Sopenharmony_ci	nvmeq->dbbuf_sq_db = NULL;
3038c2ecf20Sopenharmony_ci	nvmeq->dbbuf_cq_db = NULL;
3048c2ecf20Sopenharmony_ci	nvmeq->dbbuf_sq_ei = NULL;
3058c2ecf20Sopenharmony_ci	nvmeq->dbbuf_cq_ei = NULL;
3068c2ecf20Sopenharmony_ci}
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_cistatic void nvme_dbbuf_set(struct nvme_dev *dev)
3098c2ecf20Sopenharmony_ci{
3108c2ecf20Sopenharmony_ci	struct nvme_command c;
3118c2ecf20Sopenharmony_ci	unsigned int i;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	if (!dev->dbbuf_dbs)
3148c2ecf20Sopenharmony_ci		return;
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_ci	memset(&c, 0, sizeof(c));
3178c2ecf20Sopenharmony_ci	c.dbbuf.opcode = nvme_admin_dbbuf;
3188c2ecf20Sopenharmony_ci	c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr);
3198c2ecf20Sopenharmony_ci	c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr);
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci	if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) {
3228c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device, "unable to set dbbuf\n");
3238c2ecf20Sopenharmony_ci		/* Free memory and continue on */
3248c2ecf20Sopenharmony_ci		nvme_dbbuf_dma_free(dev);
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_ci		for (i = 1; i <= dev->online_queues; i++)
3278c2ecf20Sopenharmony_ci			nvme_dbbuf_free(&dev->queues[i]);
3288c2ecf20Sopenharmony_ci	}
3298c2ecf20Sopenharmony_ci}
3308c2ecf20Sopenharmony_ci
3318c2ecf20Sopenharmony_cistatic inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old)
3328c2ecf20Sopenharmony_ci{
3338c2ecf20Sopenharmony_ci	return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old);
3348c2ecf20Sopenharmony_ci}
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci/* Update dbbuf and return true if an MMIO is required */
3378c2ecf20Sopenharmony_cistatic bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db,
3388c2ecf20Sopenharmony_ci					      volatile __le32 *dbbuf_ei)
3398c2ecf20Sopenharmony_ci{
3408c2ecf20Sopenharmony_ci	if (dbbuf_db) {
3418c2ecf20Sopenharmony_ci		u16 old_value, event_idx;
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci		/*
3448c2ecf20Sopenharmony_ci		 * Ensure that the queue is written before updating
3458c2ecf20Sopenharmony_ci		 * the doorbell in memory
3468c2ecf20Sopenharmony_ci		 */
3478c2ecf20Sopenharmony_ci		wmb();
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci		old_value = le32_to_cpu(*dbbuf_db);
3508c2ecf20Sopenharmony_ci		*dbbuf_db = cpu_to_le32(value);
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci		/*
3538c2ecf20Sopenharmony_ci		 * Ensure that the doorbell is updated before reading the event
3548c2ecf20Sopenharmony_ci		 * index from memory.  The controller needs to provide similar
3558c2ecf20Sopenharmony_ci		 * ordering to ensure the envent index is updated before reading
3568c2ecf20Sopenharmony_ci		 * the doorbell.
3578c2ecf20Sopenharmony_ci		 */
3588c2ecf20Sopenharmony_ci		mb();
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci		event_idx = le32_to_cpu(*dbbuf_ei);
3618c2ecf20Sopenharmony_ci		if (!nvme_dbbuf_need_event(event_idx, value, old_value))
3628c2ecf20Sopenharmony_ci			return false;
3638c2ecf20Sopenharmony_ci	}
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci	return true;
3668c2ecf20Sopenharmony_ci}
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci/*
3698c2ecf20Sopenharmony_ci * Will slightly overestimate the number of pages needed.  This is OK
3708c2ecf20Sopenharmony_ci * as it only leads to a small amount of wasted memory for the lifetime of
3718c2ecf20Sopenharmony_ci * the I/O.
3728c2ecf20Sopenharmony_ci */
3738c2ecf20Sopenharmony_cistatic int nvme_pci_npages_prp(void)
3748c2ecf20Sopenharmony_ci{
3758c2ecf20Sopenharmony_ci	unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE;
3768c2ecf20Sopenharmony_ci	unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE);
3778c2ecf20Sopenharmony_ci	return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8);
3788c2ecf20Sopenharmony_ci}
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci/*
3818c2ecf20Sopenharmony_ci * Calculates the number of pages needed for the SGL segments. For example a 4k
3828c2ecf20Sopenharmony_ci * page can accommodate 256 SGL descriptors.
3838c2ecf20Sopenharmony_ci */
3848c2ecf20Sopenharmony_cistatic int nvme_pci_npages_sgl(void)
3858c2ecf20Sopenharmony_ci{
3868c2ecf20Sopenharmony_ci	return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc),
3878c2ecf20Sopenharmony_ci			NVME_CTRL_PAGE_SIZE);
3888c2ecf20Sopenharmony_ci}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_cistatic int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
3918c2ecf20Sopenharmony_ci				unsigned int hctx_idx)
3928c2ecf20Sopenharmony_ci{
3938c2ecf20Sopenharmony_ci	struct nvme_dev *dev = data;
3948c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = &dev->queues[0];
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci	WARN_ON(hctx_idx != 0);
3978c2ecf20Sopenharmony_ci	WARN_ON(dev->admin_tagset.tags[0] != hctx->tags);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	hctx->driver_data = nvmeq;
4008c2ecf20Sopenharmony_ci	return 0;
4018c2ecf20Sopenharmony_ci}
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_cistatic int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
4048c2ecf20Sopenharmony_ci			  unsigned int hctx_idx)
4058c2ecf20Sopenharmony_ci{
4068c2ecf20Sopenharmony_ci	struct nvme_dev *dev = data;
4078c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1];
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags);
4108c2ecf20Sopenharmony_ci	hctx->driver_data = nvmeq;
4118c2ecf20Sopenharmony_ci	return 0;
4128c2ecf20Sopenharmony_ci}
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_cistatic int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
4158c2ecf20Sopenharmony_ci		unsigned int hctx_idx, unsigned int numa_node)
4168c2ecf20Sopenharmony_ci{
4178c2ecf20Sopenharmony_ci	struct nvme_dev *dev = set->driver_data;
4188c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
4198c2ecf20Sopenharmony_ci	int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0;
4208c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = &dev->queues[queue_idx];
4218c2ecf20Sopenharmony_ci
4228c2ecf20Sopenharmony_ci	BUG_ON(!nvmeq);
4238c2ecf20Sopenharmony_ci	iod->nvmeq = nvmeq;
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	nvme_req(req)->ctrl = &dev->ctrl;
4268c2ecf20Sopenharmony_ci	return 0;
4278c2ecf20Sopenharmony_ci}
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_cistatic int queue_irq_offset(struct nvme_dev *dev)
4308c2ecf20Sopenharmony_ci{
4318c2ecf20Sopenharmony_ci	/* if we have more than 1 vec, admin queue offsets us by 1 */
4328c2ecf20Sopenharmony_ci	if (dev->num_vecs > 1)
4338c2ecf20Sopenharmony_ci		return 1;
4348c2ecf20Sopenharmony_ci
4358c2ecf20Sopenharmony_ci	return 0;
4368c2ecf20Sopenharmony_ci}
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_cistatic int nvme_pci_map_queues(struct blk_mq_tag_set *set)
4398c2ecf20Sopenharmony_ci{
4408c2ecf20Sopenharmony_ci	struct nvme_dev *dev = set->driver_data;
4418c2ecf20Sopenharmony_ci	int i, qoff, offset;
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	offset = queue_irq_offset(dev);
4448c2ecf20Sopenharmony_ci	for (i = 0, qoff = 0; i < set->nr_maps; i++) {
4458c2ecf20Sopenharmony_ci		struct blk_mq_queue_map *map = &set->map[i];
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ci		map->nr_queues = dev->io_queues[i];
4488c2ecf20Sopenharmony_ci		if (!map->nr_queues) {
4498c2ecf20Sopenharmony_ci			BUG_ON(i == HCTX_TYPE_DEFAULT);
4508c2ecf20Sopenharmony_ci			continue;
4518c2ecf20Sopenharmony_ci		}
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci		/*
4548c2ecf20Sopenharmony_ci		 * The poll queue(s) doesn't have an IRQ (and hence IRQ
4558c2ecf20Sopenharmony_ci		 * affinity), so use the regular blk-mq cpu mapping
4568c2ecf20Sopenharmony_ci		 */
4578c2ecf20Sopenharmony_ci		map->queue_offset = qoff;
4588c2ecf20Sopenharmony_ci		if (i != HCTX_TYPE_POLL && offset)
4598c2ecf20Sopenharmony_ci			blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset);
4608c2ecf20Sopenharmony_ci		else
4618c2ecf20Sopenharmony_ci			blk_mq_map_queues(map);
4628c2ecf20Sopenharmony_ci		qoff += map->nr_queues;
4638c2ecf20Sopenharmony_ci		offset += map->nr_queues;
4648c2ecf20Sopenharmony_ci	}
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	return 0;
4678c2ecf20Sopenharmony_ci}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci/*
4708c2ecf20Sopenharmony_ci * Write sq tail if we are asked to, or if the next command would wrap.
4718c2ecf20Sopenharmony_ci */
4728c2ecf20Sopenharmony_cistatic inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
4738c2ecf20Sopenharmony_ci{
4748c2ecf20Sopenharmony_ci	if (!write_sq) {
4758c2ecf20Sopenharmony_ci		u16 next_tail = nvmeq->sq_tail + 1;
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_ci		if (next_tail == nvmeq->q_depth)
4788c2ecf20Sopenharmony_ci			next_tail = 0;
4798c2ecf20Sopenharmony_ci		if (next_tail != nvmeq->last_sq_tail)
4808c2ecf20Sopenharmony_ci			return;
4818c2ecf20Sopenharmony_ci	}
4828c2ecf20Sopenharmony_ci
4838c2ecf20Sopenharmony_ci	if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
4848c2ecf20Sopenharmony_ci			nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
4858c2ecf20Sopenharmony_ci		writel(nvmeq->sq_tail, nvmeq->q_db);
4868c2ecf20Sopenharmony_ci	nvmeq->last_sq_tail = nvmeq->sq_tail;
4878c2ecf20Sopenharmony_ci}
4888c2ecf20Sopenharmony_ci
4898c2ecf20Sopenharmony_ci/**
4908c2ecf20Sopenharmony_ci * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
4918c2ecf20Sopenharmony_ci * @nvmeq: The queue to use
4928c2ecf20Sopenharmony_ci * @cmd: The command to send
4938c2ecf20Sopenharmony_ci * @write_sq: whether to write to the SQ doorbell
4948c2ecf20Sopenharmony_ci */
4958c2ecf20Sopenharmony_cistatic void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
4968c2ecf20Sopenharmony_ci			    bool write_sq)
4978c2ecf20Sopenharmony_ci{
4988c2ecf20Sopenharmony_ci	spin_lock(&nvmeq->sq_lock);
4998c2ecf20Sopenharmony_ci	memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes),
5008c2ecf20Sopenharmony_ci	       cmd, sizeof(*cmd));
5018c2ecf20Sopenharmony_ci	if (++nvmeq->sq_tail == nvmeq->q_depth)
5028c2ecf20Sopenharmony_ci		nvmeq->sq_tail = 0;
5038c2ecf20Sopenharmony_ci	nvme_write_sq_db(nvmeq, write_sq);
5048c2ecf20Sopenharmony_ci	spin_unlock(&nvmeq->sq_lock);
5058c2ecf20Sopenharmony_ci}
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_cistatic void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
5088c2ecf20Sopenharmony_ci{
5098c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = hctx->driver_data;
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_ci	spin_lock(&nvmeq->sq_lock);
5128c2ecf20Sopenharmony_ci	if (nvmeq->sq_tail != nvmeq->last_sq_tail)
5138c2ecf20Sopenharmony_ci		nvme_write_sq_db(nvmeq, true);
5148c2ecf20Sopenharmony_ci	spin_unlock(&nvmeq->sq_lock);
5158c2ecf20Sopenharmony_ci}
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_cistatic void **nvme_pci_iod_list(struct request *req)
5188c2ecf20Sopenharmony_ci{
5198c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
5208c2ecf20Sopenharmony_ci	return (void **)(iod->sg + blk_rq_nr_phys_segments(req));
5218c2ecf20Sopenharmony_ci}
5228c2ecf20Sopenharmony_ci
5238c2ecf20Sopenharmony_cistatic inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
5248c2ecf20Sopenharmony_ci{
5258c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
5268c2ecf20Sopenharmony_ci	int nseg = blk_rq_nr_phys_segments(req);
5278c2ecf20Sopenharmony_ci	unsigned int avg_seg_size;
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci	avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg);
5308c2ecf20Sopenharmony_ci
5318c2ecf20Sopenharmony_ci	if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1))))
5328c2ecf20Sopenharmony_ci		return false;
5338c2ecf20Sopenharmony_ci	if (!iod->nvmeq->qid)
5348c2ecf20Sopenharmony_ci		return false;
5358c2ecf20Sopenharmony_ci	if (!sgl_threshold || avg_seg_size < sgl_threshold)
5368c2ecf20Sopenharmony_ci		return false;
5378c2ecf20Sopenharmony_ci	return true;
5388c2ecf20Sopenharmony_ci}
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_cistatic void nvme_free_prps(struct nvme_dev *dev, struct request *req)
5418c2ecf20Sopenharmony_ci{
5428c2ecf20Sopenharmony_ci	const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
5438c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
5448c2ecf20Sopenharmony_ci	dma_addr_t dma_addr = iod->first_dma;
5458c2ecf20Sopenharmony_ci	int i;
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	for (i = 0; i < iod->npages; i++) {
5488c2ecf20Sopenharmony_ci		__le64 *prp_list = nvme_pci_iod_list(req)[i];
5498c2ecf20Sopenharmony_ci		dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]);
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_ci		dma_pool_free(dev->prp_page_pool, prp_list, dma_addr);
5528c2ecf20Sopenharmony_ci		dma_addr = next_dma_addr;
5538c2ecf20Sopenharmony_ci	}
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci}
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_cistatic void nvme_free_sgls(struct nvme_dev *dev, struct request *req)
5588c2ecf20Sopenharmony_ci{
5598c2ecf20Sopenharmony_ci	const int last_sg = SGES_PER_PAGE - 1;
5608c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
5618c2ecf20Sopenharmony_ci	dma_addr_t dma_addr = iod->first_dma;
5628c2ecf20Sopenharmony_ci	int i;
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	for (i = 0; i < iod->npages; i++) {
5658c2ecf20Sopenharmony_ci		struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i];
5668c2ecf20Sopenharmony_ci		dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr);
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci		dma_pool_free(dev->prp_page_pool, sg_list, dma_addr);
5698c2ecf20Sopenharmony_ci		dma_addr = next_dma_addr;
5708c2ecf20Sopenharmony_ci	}
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_ci}
5738c2ecf20Sopenharmony_ci
5748c2ecf20Sopenharmony_cistatic void nvme_unmap_sg(struct nvme_dev *dev, struct request *req)
5758c2ecf20Sopenharmony_ci{
5768c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_ci	if (is_pci_p2pdma_page(sg_page(iod->sg)))
5798c2ecf20Sopenharmony_ci		pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents,
5808c2ecf20Sopenharmony_ci				    rq_dma_dir(req));
5818c2ecf20Sopenharmony_ci	else
5828c2ecf20Sopenharmony_ci		dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
5838c2ecf20Sopenharmony_ci}
5848c2ecf20Sopenharmony_ci
5858c2ecf20Sopenharmony_cistatic void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
5868c2ecf20Sopenharmony_ci{
5878c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	if (iod->dma_len) {
5908c2ecf20Sopenharmony_ci		dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len,
5918c2ecf20Sopenharmony_ci			       rq_dma_dir(req));
5928c2ecf20Sopenharmony_ci		return;
5938c2ecf20Sopenharmony_ci	}
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ci	WARN_ON_ONCE(!iod->nents);
5968c2ecf20Sopenharmony_ci
5978c2ecf20Sopenharmony_ci	nvme_unmap_sg(dev, req);
5988c2ecf20Sopenharmony_ci	if (iod->npages == 0)
5998c2ecf20Sopenharmony_ci		dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
6008c2ecf20Sopenharmony_ci			      iod->first_dma);
6018c2ecf20Sopenharmony_ci	else if (iod->use_sgl)
6028c2ecf20Sopenharmony_ci		nvme_free_sgls(dev, req);
6038c2ecf20Sopenharmony_ci	else
6048c2ecf20Sopenharmony_ci		nvme_free_prps(dev, req);
6058c2ecf20Sopenharmony_ci	mempool_free(iod->sg, dev->iod_mempool);
6068c2ecf20Sopenharmony_ci}
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_cistatic void nvme_print_sgl(struct scatterlist *sgl, int nents)
6098c2ecf20Sopenharmony_ci{
6108c2ecf20Sopenharmony_ci	int i;
6118c2ecf20Sopenharmony_ci	struct scatterlist *sg;
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci	for_each_sg(sgl, sg, nents, i) {
6148c2ecf20Sopenharmony_ci		dma_addr_t phys = sg_phys(sg);
6158c2ecf20Sopenharmony_ci		pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
6168c2ecf20Sopenharmony_ci			"dma_address:%pad dma_length:%d\n",
6178c2ecf20Sopenharmony_ci			i, &phys, sg->offset, sg->length, &sg_dma_address(sg),
6188c2ecf20Sopenharmony_ci			sg_dma_len(sg));
6198c2ecf20Sopenharmony_ci	}
6208c2ecf20Sopenharmony_ci}
6218c2ecf20Sopenharmony_ci
6228c2ecf20Sopenharmony_cistatic blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
6238c2ecf20Sopenharmony_ci		struct request *req, struct nvme_rw_command *cmnd)
6248c2ecf20Sopenharmony_ci{
6258c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
6268c2ecf20Sopenharmony_ci	struct dma_pool *pool;
6278c2ecf20Sopenharmony_ci	int length = blk_rq_payload_bytes(req);
6288c2ecf20Sopenharmony_ci	struct scatterlist *sg = iod->sg;
6298c2ecf20Sopenharmony_ci	int dma_len = sg_dma_len(sg);
6308c2ecf20Sopenharmony_ci	u64 dma_addr = sg_dma_address(sg);
6318c2ecf20Sopenharmony_ci	int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1);
6328c2ecf20Sopenharmony_ci	__le64 *prp_list;
6338c2ecf20Sopenharmony_ci	void **list = nvme_pci_iod_list(req);
6348c2ecf20Sopenharmony_ci	dma_addr_t prp_dma;
6358c2ecf20Sopenharmony_ci	int nprps, i;
6368c2ecf20Sopenharmony_ci
6378c2ecf20Sopenharmony_ci	length -= (NVME_CTRL_PAGE_SIZE - offset);
6388c2ecf20Sopenharmony_ci	if (length <= 0) {
6398c2ecf20Sopenharmony_ci		iod->first_dma = 0;
6408c2ecf20Sopenharmony_ci		goto done;
6418c2ecf20Sopenharmony_ci	}
6428c2ecf20Sopenharmony_ci
6438c2ecf20Sopenharmony_ci	dma_len -= (NVME_CTRL_PAGE_SIZE - offset);
6448c2ecf20Sopenharmony_ci	if (dma_len) {
6458c2ecf20Sopenharmony_ci		dma_addr += (NVME_CTRL_PAGE_SIZE - offset);
6468c2ecf20Sopenharmony_ci	} else {
6478c2ecf20Sopenharmony_ci		sg = sg_next(sg);
6488c2ecf20Sopenharmony_ci		dma_addr = sg_dma_address(sg);
6498c2ecf20Sopenharmony_ci		dma_len = sg_dma_len(sg);
6508c2ecf20Sopenharmony_ci	}
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	if (length <= NVME_CTRL_PAGE_SIZE) {
6538c2ecf20Sopenharmony_ci		iod->first_dma = dma_addr;
6548c2ecf20Sopenharmony_ci		goto done;
6558c2ecf20Sopenharmony_ci	}
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci	nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
6588c2ecf20Sopenharmony_ci	if (nprps <= (256 / 8)) {
6598c2ecf20Sopenharmony_ci		pool = dev->prp_small_pool;
6608c2ecf20Sopenharmony_ci		iod->npages = 0;
6618c2ecf20Sopenharmony_ci	} else {
6628c2ecf20Sopenharmony_ci		pool = dev->prp_page_pool;
6638c2ecf20Sopenharmony_ci		iod->npages = 1;
6648c2ecf20Sopenharmony_ci	}
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
6678c2ecf20Sopenharmony_ci	if (!prp_list) {
6688c2ecf20Sopenharmony_ci		iod->first_dma = dma_addr;
6698c2ecf20Sopenharmony_ci		iod->npages = -1;
6708c2ecf20Sopenharmony_ci		return BLK_STS_RESOURCE;
6718c2ecf20Sopenharmony_ci	}
6728c2ecf20Sopenharmony_ci	list[0] = prp_list;
6738c2ecf20Sopenharmony_ci	iod->first_dma = prp_dma;
6748c2ecf20Sopenharmony_ci	i = 0;
6758c2ecf20Sopenharmony_ci	for (;;) {
6768c2ecf20Sopenharmony_ci		if (i == NVME_CTRL_PAGE_SIZE >> 3) {
6778c2ecf20Sopenharmony_ci			__le64 *old_prp_list = prp_list;
6788c2ecf20Sopenharmony_ci			prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
6798c2ecf20Sopenharmony_ci			if (!prp_list)
6808c2ecf20Sopenharmony_ci				goto free_prps;
6818c2ecf20Sopenharmony_ci			list[iod->npages++] = prp_list;
6828c2ecf20Sopenharmony_ci			prp_list[0] = old_prp_list[i - 1];
6838c2ecf20Sopenharmony_ci			old_prp_list[i - 1] = cpu_to_le64(prp_dma);
6848c2ecf20Sopenharmony_ci			i = 1;
6858c2ecf20Sopenharmony_ci		}
6868c2ecf20Sopenharmony_ci		prp_list[i++] = cpu_to_le64(dma_addr);
6878c2ecf20Sopenharmony_ci		dma_len -= NVME_CTRL_PAGE_SIZE;
6888c2ecf20Sopenharmony_ci		dma_addr += NVME_CTRL_PAGE_SIZE;
6898c2ecf20Sopenharmony_ci		length -= NVME_CTRL_PAGE_SIZE;
6908c2ecf20Sopenharmony_ci		if (length <= 0)
6918c2ecf20Sopenharmony_ci			break;
6928c2ecf20Sopenharmony_ci		if (dma_len > 0)
6938c2ecf20Sopenharmony_ci			continue;
6948c2ecf20Sopenharmony_ci		if (unlikely(dma_len < 0))
6958c2ecf20Sopenharmony_ci			goto bad_sgl;
6968c2ecf20Sopenharmony_ci		sg = sg_next(sg);
6978c2ecf20Sopenharmony_ci		dma_addr = sg_dma_address(sg);
6988c2ecf20Sopenharmony_ci		dma_len = sg_dma_len(sg);
6998c2ecf20Sopenharmony_ci	}
7008c2ecf20Sopenharmony_cidone:
7018c2ecf20Sopenharmony_ci	cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
7028c2ecf20Sopenharmony_ci	cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma);
7038c2ecf20Sopenharmony_ci	return BLK_STS_OK;
7048c2ecf20Sopenharmony_cifree_prps:
7058c2ecf20Sopenharmony_ci	nvme_free_prps(dev, req);
7068c2ecf20Sopenharmony_ci	return BLK_STS_RESOURCE;
7078c2ecf20Sopenharmony_cibad_sgl:
7088c2ecf20Sopenharmony_ci	WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents),
7098c2ecf20Sopenharmony_ci			"Invalid SGL for payload:%d nents:%d\n",
7108c2ecf20Sopenharmony_ci			blk_rq_payload_bytes(req), iod->nents);
7118c2ecf20Sopenharmony_ci	return BLK_STS_IOERR;
7128c2ecf20Sopenharmony_ci}
7138c2ecf20Sopenharmony_ci
7148c2ecf20Sopenharmony_cistatic void nvme_pci_sgl_set_data(struct nvme_sgl_desc *sge,
7158c2ecf20Sopenharmony_ci		struct scatterlist *sg)
7168c2ecf20Sopenharmony_ci{
7178c2ecf20Sopenharmony_ci	sge->addr = cpu_to_le64(sg_dma_address(sg));
7188c2ecf20Sopenharmony_ci	sge->length = cpu_to_le32(sg_dma_len(sg));
7198c2ecf20Sopenharmony_ci	sge->type = NVME_SGL_FMT_DATA_DESC << 4;
7208c2ecf20Sopenharmony_ci}
7218c2ecf20Sopenharmony_ci
7228c2ecf20Sopenharmony_cistatic void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge,
7238c2ecf20Sopenharmony_ci		dma_addr_t dma_addr, int entries)
7248c2ecf20Sopenharmony_ci{
7258c2ecf20Sopenharmony_ci	sge->addr = cpu_to_le64(dma_addr);
7268c2ecf20Sopenharmony_ci	if (entries < SGES_PER_PAGE) {
7278c2ecf20Sopenharmony_ci		sge->length = cpu_to_le32(entries * sizeof(*sge));
7288c2ecf20Sopenharmony_ci		sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4;
7298c2ecf20Sopenharmony_ci	} else {
7308c2ecf20Sopenharmony_ci		sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE);
7318c2ecf20Sopenharmony_ci		sge->type = NVME_SGL_FMT_SEG_DESC << 4;
7328c2ecf20Sopenharmony_ci	}
7338c2ecf20Sopenharmony_ci}
7348c2ecf20Sopenharmony_ci
7358c2ecf20Sopenharmony_cistatic blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
7368c2ecf20Sopenharmony_ci		struct request *req, struct nvme_rw_command *cmd, int entries)
7378c2ecf20Sopenharmony_ci{
7388c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
7398c2ecf20Sopenharmony_ci	struct dma_pool *pool;
7408c2ecf20Sopenharmony_ci	struct nvme_sgl_desc *sg_list;
7418c2ecf20Sopenharmony_ci	struct scatterlist *sg = iod->sg;
7428c2ecf20Sopenharmony_ci	dma_addr_t sgl_dma;
7438c2ecf20Sopenharmony_ci	int i = 0;
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci	/* setting the transfer type as SGL */
7468c2ecf20Sopenharmony_ci	cmd->flags = NVME_CMD_SGL_METABUF;
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_ci	if (entries == 1) {
7498c2ecf20Sopenharmony_ci		nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg);
7508c2ecf20Sopenharmony_ci		return BLK_STS_OK;
7518c2ecf20Sopenharmony_ci	}
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ci	if (entries <= (256 / sizeof(struct nvme_sgl_desc))) {
7548c2ecf20Sopenharmony_ci		pool = dev->prp_small_pool;
7558c2ecf20Sopenharmony_ci		iod->npages = 0;
7568c2ecf20Sopenharmony_ci	} else {
7578c2ecf20Sopenharmony_ci		pool = dev->prp_page_pool;
7588c2ecf20Sopenharmony_ci		iod->npages = 1;
7598c2ecf20Sopenharmony_ci	}
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_ci	sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
7628c2ecf20Sopenharmony_ci	if (!sg_list) {
7638c2ecf20Sopenharmony_ci		iod->npages = -1;
7648c2ecf20Sopenharmony_ci		return BLK_STS_RESOURCE;
7658c2ecf20Sopenharmony_ci	}
7668c2ecf20Sopenharmony_ci
7678c2ecf20Sopenharmony_ci	nvme_pci_iod_list(req)[0] = sg_list;
7688c2ecf20Sopenharmony_ci	iod->first_dma = sgl_dma;
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_ci	nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries);
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	do {
7738c2ecf20Sopenharmony_ci		if (i == SGES_PER_PAGE) {
7748c2ecf20Sopenharmony_ci			struct nvme_sgl_desc *old_sg_desc = sg_list;
7758c2ecf20Sopenharmony_ci			struct nvme_sgl_desc *link = &old_sg_desc[i - 1];
7768c2ecf20Sopenharmony_ci
7778c2ecf20Sopenharmony_ci			sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma);
7788c2ecf20Sopenharmony_ci			if (!sg_list)
7798c2ecf20Sopenharmony_ci				goto free_sgls;
7808c2ecf20Sopenharmony_ci
7818c2ecf20Sopenharmony_ci			i = 0;
7828c2ecf20Sopenharmony_ci			nvme_pci_iod_list(req)[iod->npages++] = sg_list;
7838c2ecf20Sopenharmony_ci			sg_list[i++] = *link;
7848c2ecf20Sopenharmony_ci			nvme_pci_sgl_set_seg(link, sgl_dma, entries);
7858c2ecf20Sopenharmony_ci		}
7868c2ecf20Sopenharmony_ci
7878c2ecf20Sopenharmony_ci		nvme_pci_sgl_set_data(&sg_list[i++], sg);
7888c2ecf20Sopenharmony_ci		sg = sg_next(sg);
7898c2ecf20Sopenharmony_ci	} while (--entries > 0);
7908c2ecf20Sopenharmony_ci
7918c2ecf20Sopenharmony_ci	return BLK_STS_OK;
7928c2ecf20Sopenharmony_cifree_sgls:
7938c2ecf20Sopenharmony_ci	nvme_free_sgls(dev, req);
7948c2ecf20Sopenharmony_ci	return BLK_STS_RESOURCE;
7958c2ecf20Sopenharmony_ci}
7968c2ecf20Sopenharmony_ci
7978c2ecf20Sopenharmony_cistatic blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
7988c2ecf20Sopenharmony_ci		struct request *req, struct nvme_rw_command *cmnd,
7998c2ecf20Sopenharmony_ci		struct bio_vec *bv)
8008c2ecf20Sopenharmony_ci{
8018c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
8028c2ecf20Sopenharmony_ci	unsigned int offset = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1);
8038c2ecf20Sopenharmony_ci	unsigned int first_prp_len = NVME_CTRL_PAGE_SIZE - offset;
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
8068c2ecf20Sopenharmony_ci	if (dma_mapping_error(dev->dev, iod->first_dma))
8078c2ecf20Sopenharmony_ci		return BLK_STS_RESOURCE;
8088c2ecf20Sopenharmony_ci	iod->dma_len = bv->bv_len;
8098c2ecf20Sopenharmony_ci
8108c2ecf20Sopenharmony_ci	cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
8118c2ecf20Sopenharmony_ci	if (bv->bv_len > first_prp_len)
8128c2ecf20Sopenharmony_ci		cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
8138c2ecf20Sopenharmony_ci	else
8148c2ecf20Sopenharmony_ci		cmnd->dptr.prp2 = 0;
8158c2ecf20Sopenharmony_ci	return BLK_STS_OK;
8168c2ecf20Sopenharmony_ci}
8178c2ecf20Sopenharmony_ci
8188c2ecf20Sopenharmony_cistatic blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
8198c2ecf20Sopenharmony_ci		struct request *req, struct nvme_rw_command *cmnd,
8208c2ecf20Sopenharmony_ci		struct bio_vec *bv)
8218c2ecf20Sopenharmony_ci{
8228c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci	iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
8258c2ecf20Sopenharmony_ci	if (dma_mapping_error(dev->dev, iod->first_dma))
8268c2ecf20Sopenharmony_ci		return BLK_STS_RESOURCE;
8278c2ecf20Sopenharmony_ci	iod->dma_len = bv->bv_len;
8288c2ecf20Sopenharmony_ci
8298c2ecf20Sopenharmony_ci	cmnd->flags = NVME_CMD_SGL_METABUF;
8308c2ecf20Sopenharmony_ci	cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma);
8318c2ecf20Sopenharmony_ci	cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len);
8328c2ecf20Sopenharmony_ci	cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4;
8338c2ecf20Sopenharmony_ci	return BLK_STS_OK;
8348c2ecf20Sopenharmony_ci}
8358c2ecf20Sopenharmony_ci
8368c2ecf20Sopenharmony_cistatic blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
8378c2ecf20Sopenharmony_ci		struct nvme_command *cmnd)
8388c2ecf20Sopenharmony_ci{
8398c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
8408c2ecf20Sopenharmony_ci	blk_status_t ret = BLK_STS_RESOURCE;
8418c2ecf20Sopenharmony_ci	int nr_mapped;
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_ci	if (blk_rq_nr_phys_segments(req) == 1) {
8448c2ecf20Sopenharmony_ci		struct bio_vec bv = req_bvec(req);
8458c2ecf20Sopenharmony_ci
8468c2ecf20Sopenharmony_ci		if (!is_pci_p2pdma_page(bv.bv_page)) {
8478c2ecf20Sopenharmony_ci			if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
8488c2ecf20Sopenharmony_ci				return nvme_setup_prp_simple(dev, req,
8498c2ecf20Sopenharmony_ci							     &cmnd->rw, &bv);
8508c2ecf20Sopenharmony_ci
8518c2ecf20Sopenharmony_ci			if (iod->nvmeq->qid && sgl_threshold &&
8528c2ecf20Sopenharmony_ci			    dev->ctrl.sgls & ((1 << 0) | (1 << 1)))
8538c2ecf20Sopenharmony_ci				return nvme_setup_sgl_simple(dev, req,
8548c2ecf20Sopenharmony_ci							     &cmnd->rw, &bv);
8558c2ecf20Sopenharmony_ci		}
8568c2ecf20Sopenharmony_ci	}
8578c2ecf20Sopenharmony_ci
8588c2ecf20Sopenharmony_ci	iod->dma_len = 0;
8598c2ecf20Sopenharmony_ci	iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
8608c2ecf20Sopenharmony_ci	if (!iod->sg)
8618c2ecf20Sopenharmony_ci		return BLK_STS_RESOURCE;
8628c2ecf20Sopenharmony_ci	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
8638c2ecf20Sopenharmony_ci	iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
8648c2ecf20Sopenharmony_ci	if (!iod->nents)
8658c2ecf20Sopenharmony_ci		goto out_free_sg;
8668c2ecf20Sopenharmony_ci
8678c2ecf20Sopenharmony_ci	if (is_pci_p2pdma_page(sg_page(iod->sg)))
8688c2ecf20Sopenharmony_ci		nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg,
8698c2ecf20Sopenharmony_ci				iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN);
8708c2ecf20Sopenharmony_ci	else
8718c2ecf20Sopenharmony_ci		nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
8728c2ecf20Sopenharmony_ci					     rq_dma_dir(req), DMA_ATTR_NO_WARN);
8738c2ecf20Sopenharmony_ci	if (!nr_mapped)
8748c2ecf20Sopenharmony_ci		goto out_free_sg;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci	iod->use_sgl = nvme_pci_use_sgls(dev, req);
8778c2ecf20Sopenharmony_ci	if (iod->use_sgl)
8788c2ecf20Sopenharmony_ci		ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
8798c2ecf20Sopenharmony_ci	else
8808c2ecf20Sopenharmony_ci		ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
8818c2ecf20Sopenharmony_ci	if (ret != BLK_STS_OK)
8828c2ecf20Sopenharmony_ci		goto out_unmap_sg;
8838c2ecf20Sopenharmony_ci	return BLK_STS_OK;
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ciout_unmap_sg:
8868c2ecf20Sopenharmony_ci	nvme_unmap_sg(dev, req);
8878c2ecf20Sopenharmony_ciout_free_sg:
8888c2ecf20Sopenharmony_ci	mempool_free(iod->sg, dev->iod_mempool);
8898c2ecf20Sopenharmony_ci	return ret;
8908c2ecf20Sopenharmony_ci}
8918c2ecf20Sopenharmony_ci
8928c2ecf20Sopenharmony_cistatic blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
8938c2ecf20Sopenharmony_ci		struct nvme_command *cmnd)
8948c2ecf20Sopenharmony_ci{
8958c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
8968c2ecf20Sopenharmony_ci
8978c2ecf20Sopenharmony_ci	iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
8988c2ecf20Sopenharmony_ci			rq_dma_dir(req), 0);
8998c2ecf20Sopenharmony_ci	if (dma_mapping_error(dev->dev, iod->meta_dma))
9008c2ecf20Sopenharmony_ci		return BLK_STS_IOERR;
9018c2ecf20Sopenharmony_ci	cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
9028c2ecf20Sopenharmony_ci	return BLK_STS_OK;
9038c2ecf20Sopenharmony_ci}
9048c2ecf20Sopenharmony_ci
9058c2ecf20Sopenharmony_ci/*
9068c2ecf20Sopenharmony_ci * NOTE: ns is NULL when called on the admin queue.
9078c2ecf20Sopenharmony_ci */
9088c2ecf20Sopenharmony_cistatic blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
9098c2ecf20Sopenharmony_ci			 const struct blk_mq_queue_data *bd)
9108c2ecf20Sopenharmony_ci{
9118c2ecf20Sopenharmony_ci	struct nvme_ns *ns = hctx->queue->queuedata;
9128c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = hctx->driver_data;
9138c2ecf20Sopenharmony_ci	struct nvme_dev *dev = nvmeq->dev;
9148c2ecf20Sopenharmony_ci	struct request *req = bd->rq;
9158c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
9168c2ecf20Sopenharmony_ci	struct nvme_command *cmnd = &iod->cmd;
9178c2ecf20Sopenharmony_ci	blk_status_t ret;
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_ci	iod->aborted = 0;
9208c2ecf20Sopenharmony_ci	iod->npages = -1;
9218c2ecf20Sopenharmony_ci	iod->nents = 0;
9228c2ecf20Sopenharmony_ci
9238c2ecf20Sopenharmony_ci	/*
9248c2ecf20Sopenharmony_ci	 * We should not need to do this, but we're still using this to
9258c2ecf20Sopenharmony_ci	 * ensure we can drain requests on a dying queue.
9268c2ecf20Sopenharmony_ci	 */
9278c2ecf20Sopenharmony_ci	if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
9288c2ecf20Sopenharmony_ci		return BLK_STS_IOERR;
9298c2ecf20Sopenharmony_ci
9308c2ecf20Sopenharmony_ci	ret = nvme_setup_cmd(ns, req, cmnd);
9318c2ecf20Sopenharmony_ci	if (ret)
9328c2ecf20Sopenharmony_ci		return ret;
9338c2ecf20Sopenharmony_ci
9348c2ecf20Sopenharmony_ci	if (blk_rq_nr_phys_segments(req)) {
9358c2ecf20Sopenharmony_ci		ret = nvme_map_data(dev, req, cmnd);
9368c2ecf20Sopenharmony_ci		if (ret)
9378c2ecf20Sopenharmony_ci			goto out_free_cmd;
9388c2ecf20Sopenharmony_ci	}
9398c2ecf20Sopenharmony_ci
9408c2ecf20Sopenharmony_ci	if (blk_integrity_rq(req)) {
9418c2ecf20Sopenharmony_ci		ret = nvme_map_metadata(dev, req, cmnd);
9428c2ecf20Sopenharmony_ci		if (ret)
9438c2ecf20Sopenharmony_ci			goto out_unmap_data;
9448c2ecf20Sopenharmony_ci	}
9458c2ecf20Sopenharmony_ci
9468c2ecf20Sopenharmony_ci	blk_mq_start_request(req);
9478c2ecf20Sopenharmony_ci	nvme_submit_cmd(nvmeq, cmnd, bd->last);
9488c2ecf20Sopenharmony_ci	return BLK_STS_OK;
9498c2ecf20Sopenharmony_ciout_unmap_data:
9508c2ecf20Sopenharmony_ci	if (blk_rq_nr_phys_segments(req))
9518c2ecf20Sopenharmony_ci		nvme_unmap_data(dev, req);
9528c2ecf20Sopenharmony_ciout_free_cmd:
9538c2ecf20Sopenharmony_ci	nvme_cleanup_cmd(req);
9548c2ecf20Sopenharmony_ci	return ret;
9558c2ecf20Sopenharmony_ci}
9568c2ecf20Sopenharmony_ci
9578c2ecf20Sopenharmony_cistatic void nvme_pci_complete_rq(struct request *req)
9588c2ecf20Sopenharmony_ci{
9598c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
9608c2ecf20Sopenharmony_ci	struct nvme_dev *dev = iod->nvmeq->dev;
9618c2ecf20Sopenharmony_ci
9628c2ecf20Sopenharmony_ci	if (blk_integrity_rq(req))
9638c2ecf20Sopenharmony_ci		dma_unmap_page(dev->dev, iod->meta_dma,
9648c2ecf20Sopenharmony_ci			       rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
9658c2ecf20Sopenharmony_ci
9668c2ecf20Sopenharmony_ci	if (blk_rq_nr_phys_segments(req))
9678c2ecf20Sopenharmony_ci		nvme_unmap_data(dev, req);
9688c2ecf20Sopenharmony_ci	nvme_complete_rq(req);
9698c2ecf20Sopenharmony_ci}
9708c2ecf20Sopenharmony_ci
9718c2ecf20Sopenharmony_ci/* We read the CQE phase first to check if the rest of the entry is valid */
9728c2ecf20Sopenharmony_cistatic inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
9738c2ecf20Sopenharmony_ci{
9748c2ecf20Sopenharmony_ci	struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head];
9758c2ecf20Sopenharmony_ci
9768c2ecf20Sopenharmony_ci	return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase;
9778c2ecf20Sopenharmony_ci}
9788c2ecf20Sopenharmony_ci
9798c2ecf20Sopenharmony_cistatic inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
9808c2ecf20Sopenharmony_ci{
9818c2ecf20Sopenharmony_ci	u16 head = nvmeq->cq_head;
9828c2ecf20Sopenharmony_ci
9838c2ecf20Sopenharmony_ci	if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db,
9848c2ecf20Sopenharmony_ci					      nvmeq->dbbuf_cq_ei))
9858c2ecf20Sopenharmony_ci		writel(head, nvmeq->q_db + nvmeq->dev->db_stride);
9868c2ecf20Sopenharmony_ci}
9878c2ecf20Sopenharmony_ci
9888c2ecf20Sopenharmony_cistatic inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
9898c2ecf20Sopenharmony_ci{
9908c2ecf20Sopenharmony_ci	if (!nvmeq->qid)
9918c2ecf20Sopenharmony_ci		return nvmeq->dev->admin_tagset.tags[0];
9928c2ecf20Sopenharmony_ci	return nvmeq->dev->tagset.tags[nvmeq->qid - 1];
9938c2ecf20Sopenharmony_ci}
9948c2ecf20Sopenharmony_ci
9958c2ecf20Sopenharmony_cistatic inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
9968c2ecf20Sopenharmony_ci{
9978c2ecf20Sopenharmony_ci	struct nvme_completion *cqe = &nvmeq->cqes[idx];
9988c2ecf20Sopenharmony_ci	__u16 command_id = READ_ONCE(cqe->command_id);
9998c2ecf20Sopenharmony_ci	struct request *req;
10008c2ecf20Sopenharmony_ci
10018c2ecf20Sopenharmony_ci	/*
10028c2ecf20Sopenharmony_ci	 * AEN requests are special as they don't time out and can
10038c2ecf20Sopenharmony_ci	 * survive any kind of queue freeze and often don't respond to
10048c2ecf20Sopenharmony_ci	 * aborts.  We don't even bother to allocate a struct request
10058c2ecf20Sopenharmony_ci	 * for them but rather special case them here.
10068c2ecf20Sopenharmony_ci	 */
10078c2ecf20Sopenharmony_ci	if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) {
10088c2ecf20Sopenharmony_ci		nvme_complete_async_event(&nvmeq->dev->ctrl,
10098c2ecf20Sopenharmony_ci				cqe->status, &cqe->result);
10108c2ecf20Sopenharmony_ci		return;
10118c2ecf20Sopenharmony_ci	}
10128c2ecf20Sopenharmony_ci
10138c2ecf20Sopenharmony_ci	req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id);
10148c2ecf20Sopenharmony_ci	if (unlikely(!req)) {
10158c2ecf20Sopenharmony_ci		dev_warn(nvmeq->dev->ctrl.device,
10168c2ecf20Sopenharmony_ci			"invalid id %d completed on queue %d\n",
10178c2ecf20Sopenharmony_ci			command_id, le16_to_cpu(cqe->sq_id));
10188c2ecf20Sopenharmony_ci		return;
10198c2ecf20Sopenharmony_ci	}
10208c2ecf20Sopenharmony_ci
10218c2ecf20Sopenharmony_ci	trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail);
10228c2ecf20Sopenharmony_ci	if (!nvme_try_complete_req(req, cqe->status, cqe->result))
10238c2ecf20Sopenharmony_ci		nvme_pci_complete_rq(req);
10248c2ecf20Sopenharmony_ci}
10258c2ecf20Sopenharmony_ci
10268c2ecf20Sopenharmony_cistatic inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
10278c2ecf20Sopenharmony_ci{
10288c2ecf20Sopenharmony_ci	u32 tmp = nvmeq->cq_head + 1;
10298c2ecf20Sopenharmony_ci
10308c2ecf20Sopenharmony_ci	if (tmp == nvmeq->q_depth) {
10318c2ecf20Sopenharmony_ci		nvmeq->cq_head = 0;
10328c2ecf20Sopenharmony_ci		nvmeq->cq_phase ^= 1;
10338c2ecf20Sopenharmony_ci	} else {
10348c2ecf20Sopenharmony_ci		nvmeq->cq_head = tmp;
10358c2ecf20Sopenharmony_ci	}
10368c2ecf20Sopenharmony_ci}
10378c2ecf20Sopenharmony_ci
10388c2ecf20Sopenharmony_cistatic inline int nvme_process_cq(struct nvme_queue *nvmeq)
10398c2ecf20Sopenharmony_ci{
10408c2ecf20Sopenharmony_ci	int found = 0;
10418c2ecf20Sopenharmony_ci
10428c2ecf20Sopenharmony_ci	while (nvme_cqe_pending(nvmeq)) {
10438c2ecf20Sopenharmony_ci		found++;
10448c2ecf20Sopenharmony_ci		/*
10458c2ecf20Sopenharmony_ci		 * load-load control dependency between phase and the rest of
10468c2ecf20Sopenharmony_ci		 * the cqe requires a full read memory barrier
10478c2ecf20Sopenharmony_ci		 */
10488c2ecf20Sopenharmony_ci		dma_rmb();
10498c2ecf20Sopenharmony_ci		nvme_handle_cqe(nvmeq, nvmeq->cq_head);
10508c2ecf20Sopenharmony_ci		nvme_update_cq_head(nvmeq);
10518c2ecf20Sopenharmony_ci	}
10528c2ecf20Sopenharmony_ci
10538c2ecf20Sopenharmony_ci	if (found)
10548c2ecf20Sopenharmony_ci		nvme_ring_cq_doorbell(nvmeq);
10558c2ecf20Sopenharmony_ci	return found;
10568c2ecf20Sopenharmony_ci}
10578c2ecf20Sopenharmony_ci
10588c2ecf20Sopenharmony_cistatic irqreturn_t nvme_irq(int irq, void *data)
10598c2ecf20Sopenharmony_ci{
10608c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = data;
10618c2ecf20Sopenharmony_ci	irqreturn_t ret = IRQ_NONE;
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci	/*
10648c2ecf20Sopenharmony_ci	 * The rmb/wmb pair ensures we see all updates from a previous run of
10658c2ecf20Sopenharmony_ci	 * the irq handler, even if that was on another CPU.
10668c2ecf20Sopenharmony_ci	 */
10678c2ecf20Sopenharmony_ci	rmb();
10688c2ecf20Sopenharmony_ci	if (nvme_process_cq(nvmeq))
10698c2ecf20Sopenharmony_ci		ret = IRQ_HANDLED;
10708c2ecf20Sopenharmony_ci	wmb();
10718c2ecf20Sopenharmony_ci
10728c2ecf20Sopenharmony_ci	return ret;
10738c2ecf20Sopenharmony_ci}
10748c2ecf20Sopenharmony_ci
10758c2ecf20Sopenharmony_cistatic irqreturn_t nvme_irq_check(int irq, void *data)
10768c2ecf20Sopenharmony_ci{
10778c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = data;
10788c2ecf20Sopenharmony_ci
10798c2ecf20Sopenharmony_ci	if (nvme_cqe_pending(nvmeq))
10808c2ecf20Sopenharmony_ci		return IRQ_WAKE_THREAD;
10818c2ecf20Sopenharmony_ci	return IRQ_NONE;
10828c2ecf20Sopenharmony_ci}
10838c2ecf20Sopenharmony_ci
10848c2ecf20Sopenharmony_ci/*
10858c2ecf20Sopenharmony_ci * Poll for completions for any interrupt driven queue
10868c2ecf20Sopenharmony_ci * Can be called from any context.
10878c2ecf20Sopenharmony_ci */
10888c2ecf20Sopenharmony_cistatic void nvme_poll_irqdisable(struct nvme_queue *nvmeq)
10898c2ecf20Sopenharmony_ci{
10908c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
10918c2ecf20Sopenharmony_ci
10928c2ecf20Sopenharmony_ci	WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags));
10938c2ecf20Sopenharmony_ci
10948c2ecf20Sopenharmony_ci	disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
10958c2ecf20Sopenharmony_ci	nvme_process_cq(nvmeq);
10968c2ecf20Sopenharmony_ci	enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector));
10978c2ecf20Sopenharmony_ci}
10988c2ecf20Sopenharmony_ci
10998c2ecf20Sopenharmony_cistatic int nvme_poll(struct blk_mq_hw_ctx *hctx)
11008c2ecf20Sopenharmony_ci{
11018c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = hctx->driver_data;
11028c2ecf20Sopenharmony_ci	bool found;
11038c2ecf20Sopenharmony_ci
11048c2ecf20Sopenharmony_ci	if (!nvme_cqe_pending(nvmeq))
11058c2ecf20Sopenharmony_ci		return 0;
11068c2ecf20Sopenharmony_ci
11078c2ecf20Sopenharmony_ci	spin_lock(&nvmeq->cq_poll_lock);
11088c2ecf20Sopenharmony_ci	found = nvme_process_cq(nvmeq);
11098c2ecf20Sopenharmony_ci	spin_unlock(&nvmeq->cq_poll_lock);
11108c2ecf20Sopenharmony_ci
11118c2ecf20Sopenharmony_ci	return found;
11128c2ecf20Sopenharmony_ci}
11138c2ecf20Sopenharmony_ci
11148c2ecf20Sopenharmony_cistatic void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
11158c2ecf20Sopenharmony_ci{
11168c2ecf20Sopenharmony_ci	struct nvme_dev *dev = to_nvme_dev(ctrl);
11178c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = &dev->queues[0];
11188c2ecf20Sopenharmony_ci	struct nvme_command c;
11198c2ecf20Sopenharmony_ci
11208c2ecf20Sopenharmony_ci	memset(&c, 0, sizeof(c));
11218c2ecf20Sopenharmony_ci	c.common.opcode = nvme_admin_async_event;
11228c2ecf20Sopenharmony_ci	c.common.command_id = NVME_AQ_BLK_MQ_DEPTH;
11238c2ecf20Sopenharmony_ci	nvme_submit_cmd(nvmeq, &c, true);
11248c2ecf20Sopenharmony_ci}
11258c2ecf20Sopenharmony_ci
11268c2ecf20Sopenharmony_cistatic int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
11278c2ecf20Sopenharmony_ci{
11288c2ecf20Sopenharmony_ci	struct nvme_command c;
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_ci	memset(&c, 0, sizeof(c));
11318c2ecf20Sopenharmony_ci	c.delete_queue.opcode = opcode;
11328c2ecf20Sopenharmony_ci	c.delete_queue.qid = cpu_to_le16(id);
11338c2ecf20Sopenharmony_ci
11348c2ecf20Sopenharmony_ci	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
11358c2ecf20Sopenharmony_ci}
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_cistatic int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
11388c2ecf20Sopenharmony_ci		struct nvme_queue *nvmeq, s16 vector)
11398c2ecf20Sopenharmony_ci{
11408c2ecf20Sopenharmony_ci	struct nvme_command c;
11418c2ecf20Sopenharmony_ci	int flags = NVME_QUEUE_PHYS_CONTIG;
11428c2ecf20Sopenharmony_ci
11438c2ecf20Sopenharmony_ci	if (!test_bit(NVMEQ_POLLED, &nvmeq->flags))
11448c2ecf20Sopenharmony_ci		flags |= NVME_CQ_IRQ_ENABLED;
11458c2ecf20Sopenharmony_ci
11468c2ecf20Sopenharmony_ci	/*
11478c2ecf20Sopenharmony_ci	 * Note: we (ab)use the fact that the prp fields survive if no data
11488c2ecf20Sopenharmony_ci	 * is attached to the request.
11498c2ecf20Sopenharmony_ci	 */
11508c2ecf20Sopenharmony_ci	memset(&c, 0, sizeof(c));
11518c2ecf20Sopenharmony_ci	c.create_cq.opcode = nvme_admin_create_cq;
11528c2ecf20Sopenharmony_ci	c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr);
11538c2ecf20Sopenharmony_ci	c.create_cq.cqid = cpu_to_le16(qid);
11548c2ecf20Sopenharmony_ci	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
11558c2ecf20Sopenharmony_ci	c.create_cq.cq_flags = cpu_to_le16(flags);
11568c2ecf20Sopenharmony_ci	c.create_cq.irq_vector = cpu_to_le16(vector);
11578c2ecf20Sopenharmony_ci
11588c2ecf20Sopenharmony_ci	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
11598c2ecf20Sopenharmony_ci}
11608c2ecf20Sopenharmony_ci
11618c2ecf20Sopenharmony_cistatic int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
11628c2ecf20Sopenharmony_ci						struct nvme_queue *nvmeq)
11638c2ecf20Sopenharmony_ci{
11648c2ecf20Sopenharmony_ci	struct nvme_ctrl *ctrl = &dev->ctrl;
11658c2ecf20Sopenharmony_ci	struct nvme_command c;
11668c2ecf20Sopenharmony_ci	int flags = NVME_QUEUE_PHYS_CONTIG;
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_ci	/*
11698c2ecf20Sopenharmony_ci	 * Some drives have a bug that auto-enables WRRU if MEDIUM isn't
11708c2ecf20Sopenharmony_ci	 * set. Since URGENT priority is zeroes, it makes all queues
11718c2ecf20Sopenharmony_ci	 * URGENT.
11728c2ecf20Sopenharmony_ci	 */
11738c2ecf20Sopenharmony_ci	if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ)
11748c2ecf20Sopenharmony_ci		flags |= NVME_SQ_PRIO_MEDIUM;
11758c2ecf20Sopenharmony_ci
11768c2ecf20Sopenharmony_ci	/*
11778c2ecf20Sopenharmony_ci	 * Note: we (ab)use the fact that the prp fields survive if no data
11788c2ecf20Sopenharmony_ci	 * is attached to the request.
11798c2ecf20Sopenharmony_ci	 */
11808c2ecf20Sopenharmony_ci	memset(&c, 0, sizeof(c));
11818c2ecf20Sopenharmony_ci	c.create_sq.opcode = nvme_admin_create_sq;
11828c2ecf20Sopenharmony_ci	c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr);
11838c2ecf20Sopenharmony_ci	c.create_sq.sqid = cpu_to_le16(qid);
11848c2ecf20Sopenharmony_ci	c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
11858c2ecf20Sopenharmony_ci	c.create_sq.sq_flags = cpu_to_le16(flags);
11868c2ecf20Sopenharmony_ci	c.create_sq.cqid = cpu_to_le16(qid);
11878c2ecf20Sopenharmony_ci
11888c2ecf20Sopenharmony_ci	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
11898c2ecf20Sopenharmony_ci}
11908c2ecf20Sopenharmony_ci
11918c2ecf20Sopenharmony_cistatic int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
11928c2ecf20Sopenharmony_ci{
11938c2ecf20Sopenharmony_ci	return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid);
11948c2ecf20Sopenharmony_ci}
11958c2ecf20Sopenharmony_ci
11968c2ecf20Sopenharmony_cistatic int adapter_delete_sq(struct nvme_dev *dev, u16 sqid)
11978c2ecf20Sopenharmony_ci{
11988c2ecf20Sopenharmony_ci	return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid);
11998c2ecf20Sopenharmony_ci}
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_cistatic void abort_endio(struct request *req, blk_status_t error)
12028c2ecf20Sopenharmony_ci{
12038c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
12048c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = iod->nvmeq;
12058c2ecf20Sopenharmony_ci
12068c2ecf20Sopenharmony_ci	dev_warn(nvmeq->dev->ctrl.device,
12078c2ecf20Sopenharmony_ci		 "Abort status: 0x%x", nvme_req(req)->status);
12088c2ecf20Sopenharmony_ci	atomic_inc(&nvmeq->dev->ctrl.abort_limit);
12098c2ecf20Sopenharmony_ci	blk_mq_free_request(req);
12108c2ecf20Sopenharmony_ci}
12118c2ecf20Sopenharmony_ci
12128c2ecf20Sopenharmony_cistatic bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
12138c2ecf20Sopenharmony_ci{
12148c2ecf20Sopenharmony_ci	/* If true, indicates loss of adapter communication, possibly by a
12158c2ecf20Sopenharmony_ci	 * NVMe Subsystem reset.
12168c2ecf20Sopenharmony_ci	 */
12178c2ecf20Sopenharmony_ci	bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO);
12188c2ecf20Sopenharmony_ci
12198c2ecf20Sopenharmony_ci	/* If there is a reset/reinit ongoing, we shouldn't reset again. */
12208c2ecf20Sopenharmony_ci	switch (dev->ctrl.state) {
12218c2ecf20Sopenharmony_ci	case NVME_CTRL_RESETTING:
12228c2ecf20Sopenharmony_ci	case NVME_CTRL_CONNECTING:
12238c2ecf20Sopenharmony_ci		return false;
12248c2ecf20Sopenharmony_ci	default:
12258c2ecf20Sopenharmony_ci		break;
12268c2ecf20Sopenharmony_ci	}
12278c2ecf20Sopenharmony_ci
12288c2ecf20Sopenharmony_ci	/* We shouldn't reset unless the controller is on fatal error state
12298c2ecf20Sopenharmony_ci	 * _or_ if we lost the communication with it.
12308c2ecf20Sopenharmony_ci	 */
12318c2ecf20Sopenharmony_ci	if (!(csts & NVME_CSTS_CFS) && !nssro)
12328c2ecf20Sopenharmony_ci		return false;
12338c2ecf20Sopenharmony_ci
12348c2ecf20Sopenharmony_ci	return true;
12358c2ecf20Sopenharmony_ci}
12368c2ecf20Sopenharmony_ci
12378c2ecf20Sopenharmony_cistatic void nvme_warn_reset(struct nvme_dev *dev, u32 csts)
12388c2ecf20Sopenharmony_ci{
12398c2ecf20Sopenharmony_ci	/* Read a config register to help see what died. */
12408c2ecf20Sopenharmony_ci	u16 pci_status;
12418c2ecf20Sopenharmony_ci	int result;
12428c2ecf20Sopenharmony_ci
12438c2ecf20Sopenharmony_ci	result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS,
12448c2ecf20Sopenharmony_ci				      &pci_status);
12458c2ecf20Sopenharmony_ci	if (result == PCIBIOS_SUCCESSFUL)
12468c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
12478c2ecf20Sopenharmony_ci			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n",
12488c2ecf20Sopenharmony_ci			 csts, pci_status);
12498c2ecf20Sopenharmony_ci	else
12508c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
12518c2ecf20Sopenharmony_ci			 "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
12528c2ecf20Sopenharmony_ci			 csts, result);
12538c2ecf20Sopenharmony_ci}
12548c2ecf20Sopenharmony_ci
12558c2ecf20Sopenharmony_cistatic enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
12568c2ecf20Sopenharmony_ci{
12578c2ecf20Sopenharmony_ci	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
12588c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = iod->nvmeq;
12598c2ecf20Sopenharmony_ci	struct nvme_dev *dev = nvmeq->dev;
12608c2ecf20Sopenharmony_ci	struct request *abort_req;
12618c2ecf20Sopenharmony_ci	struct nvme_command cmd;
12628c2ecf20Sopenharmony_ci	u32 csts = readl(dev->bar + NVME_REG_CSTS);
12638c2ecf20Sopenharmony_ci
12648c2ecf20Sopenharmony_ci	/* If PCI error recovery process is happening, we cannot reset or
12658c2ecf20Sopenharmony_ci	 * the recovery mechanism will surely fail.
12668c2ecf20Sopenharmony_ci	 */
12678c2ecf20Sopenharmony_ci	mb();
12688c2ecf20Sopenharmony_ci	if (pci_channel_offline(to_pci_dev(dev->dev)))
12698c2ecf20Sopenharmony_ci		return BLK_EH_RESET_TIMER;
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_ci	/*
12728c2ecf20Sopenharmony_ci	 * Reset immediately if the controller is failed
12738c2ecf20Sopenharmony_ci	 */
12748c2ecf20Sopenharmony_ci	if (nvme_should_reset(dev, csts)) {
12758c2ecf20Sopenharmony_ci		nvme_warn_reset(dev, csts);
12768c2ecf20Sopenharmony_ci		nvme_dev_disable(dev, false);
12778c2ecf20Sopenharmony_ci		nvme_reset_ctrl(&dev->ctrl);
12788c2ecf20Sopenharmony_ci		return BLK_EH_DONE;
12798c2ecf20Sopenharmony_ci	}
12808c2ecf20Sopenharmony_ci
12818c2ecf20Sopenharmony_ci	/*
12828c2ecf20Sopenharmony_ci	 * Did we miss an interrupt?
12838c2ecf20Sopenharmony_ci	 */
12848c2ecf20Sopenharmony_ci	if (test_bit(NVMEQ_POLLED, &nvmeq->flags))
12858c2ecf20Sopenharmony_ci		nvme_poll(req->mq_hctx);
12868c2ecf20Sopenharmony_ci	else
12878c2ecf20Sopenharmony_ci		nvme_poll_irqdisable(nvmeq);
12888c2ecf20Sopenharmony_ci
12898c2ecf20Sopenharmony_ci	if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) {
12908c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
12918c2ecf20Sopenharmony_ci			 "I/O %d QID %d timeout, completion polled\n",
12928c2ecf20Sopenharmony_ci			 req->tag, nvmeq->qid);
12938c2ecf20Sopenharmony_ci		return BLK_EH_DONE;
12948c2ecf20Sopenharmony_ci	}
12958c2ecf20Sopenharmony_ci
12968c2ecf20Sopenharmony_ci	/*
12978c2ecf20Sopenharmony_ci	 * Shutdown immediately if controller times out while starting. The
12988c2ecf20Sopenharmony_ci	 * reset work will see the pci device disabled when it gets the forced
12998c2ecf20Sopenharmony_ci	 * cancellation error. All outstanding requests are completed on
13008c2ecf20Sopenharmony_ci	 * shutdown, so we return BLK_EH_DONE.
13018c2ecf20Sopenharmony_ci	 */
13028c2ecf20Sopenharmony_ci	switch (dev->ctrl.state) {
13038c2ecf20Sopenharmony_ci	case NVME_CTRL_CONNECTING:
13048c2ecf20Sopenharmony_ci		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
13058c2ecf20Sopenharmony_ci		fallthrough;
13068c2ecf20Sopenharmony_ci	case NVME_CTRL_DELETING:
13078c2ecf20Sopenharmony_ci		dev_warn_ratelimited(dev->ctrl.device,
13088c2ecf20Sopenharmony_ci			 "I/O %d QID %d timeout, disable controller\n",
13098c2ecf20Sopenharmony_ci			 req->tag, nvmeq->qid);
13108c2ecf20Sopenharmony_ci		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
13118c2ecf20Sopenharmony_ci		nvme_dev_disable(dev, true);
13128c2ecf20Sopenharmony_ci		return BLK_EH_DONE;
13138c2ecf20Sopenharmony_ci	case NVME_CTRL_RESETTING:
13148c2ecf20Sopenharmony_ci		return BLK_EH_RESET_TIMER;
13158c2ecf20Sopenharmony_ci	default:
13168c2ecf20Sopenharmony_ci		break;
13178c2ecf20Sopenharmony_ci	}
13188c2ecf20Sopenharmony_ci
13198c2ecf20Sopenharmony_ci	/*
13208c2ecf20Sopenharmony_ci	 * Shutdown the controller immediately and schedule a reset if the
13218c2ecf20Sopenharmony_ci	 * command was already aborted once before and still hasn't been
13228c2ecf20Sopenharmony_ci	 * returned to the driver, or if this is the admin queue.
13238c2ecf20Sopenharmony_ci	 */
13248c2ecf20Sopenharmony_ci	if (!nvmeq->qid || iod->aborted) {
13258c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
13268c2ecf20Sopenharmony_ci			 "I/O %d QID %d timeout, reset controller\n",
13278c2ecf20Sopenharmony_ci			 req->tag, nvmeq->qid);
13288c2ecf20Sopenharmony_ci		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
13298c2ecf20Sopenharmony_ci		nvme_dev_disable(dev, false);
13308c2ecf20Sopenharmony_ci		nvme_reset_ctrl(&dev->ctrl);
13318c2ecf20Sopenharmony_ci
13328c2ecf20Sopenharmony_ci		return BLK_EH_DONE;
13338c2ecf20Sopenharmony_ci	}
13348c2ecf20Sopenharmony_ci
13358c2ecf20Sopenharmony_ci	if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) {
13368c2ecf20Sopenharmony_ci		atomic_inc(&dev->ctrl.abort_limit);
13378c2ecf20Sopenharmony_ci		return BLK_EH_RESET_TIMER;
13388c2ecf20Sopenharmony_ci	}
13398c2ecf20Sopenharmony_ci	iod->aborted = 1;
13408c2ecf20Sopenharmony_ci
13418c2ecf20Sopenharmony_ci	memset(&cmd, 0, sizeof(cmd));
13428c2ecf20Sopenharmony_ci	cmd.abort.opcode = nvme_admin_abort_cmd;
13438c2ecf20Sopenharmony_ci	cmd.abort.cid = nvme_cid(req);
13448c2ecf20Sopenharmony_ci	cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
13458c2ecf20Sopenharmony_ci
13468c2ecf20Sopenharmony_ci	dev_warn(nvmeq->dev->ctrl.device,
13478c2ecf20Sopenharmony_ci		"I/O %d QID %d timeout, aborting\n",
13488c2ecf20Sopenharmony_ci		 req->tag, nvmeq->qid);
13498c2ecf20Sopenharmony_ci
13508c2ecf20Sopenharmony_ci	abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd,
13518c2ecf20Sopenharmony_ci			BLK_MQ_REQ_NOWAIT);
13528c2ecf20Sopenharmony_ci	if (IS_ERR(abort_req)) {
13538c2ecf20Sopenharmony_ci		atomic_inc(&dev->ctrl.abort_limit);
13548c2ecf20Sopenharmony_ci		return BLK_EH_RESET_TIMER;
13558c2ecf20Sopenharmony_ci	}
13568c2ecf20Sopenharmony_ci
13578c2ecf20Sopenharmony_ci	abort_req->end_io_data = NULL;
13588c2ecf20Sopenharmony_ci	blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio);
13598c2ecf20Sopenharmony_ci
13608c2ecf20Sopenharmony_ci	/*
13618c2ecf20Sopenharmony_ci	 * The aborted req will be completed on receiving the abort req.
13628c2ecf20Sopenharmony_ci	 * We enable the timer again. If hit twice, it'll cause a device reset,
13638c2ecf20Sopenharmony_ci	 * as the device then is in a faulty state.
13648c2ecf20Sopenharmony_ci	 */
13658c2ecf20Sopenharmony_ci	return BLK_EH_RESET_TIMER;
13668c2ecf20Sopenharmony_ci}
13678c2ecf20Sopenharmony_ci
13688c2ecf20Sopenharmony_cistatic void nvme_free_queue(struct nvme_queue *nvmeq)
13698c2ecf20Sopenharmony_ci{
13708c2ecf20Sopenharmony_ci	dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq),
13718c2ecf20Sopenharmony_ci				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
13728c2ecf20Sopenharmony_ci	if (!nvmeq->sq_cmds)
13738c2ecf20Sopenharmony_ci		return;
13748c2ecf20Sopenharmony_ci
13758c2ecf20Sopenharmony_ci	if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) {
13768c2ecf20Sopenharmony_ci		pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev),
13778c2ecf20Sopenharmony_ci				nvmeq->sq_cmds, SQ_SIZE(nvmeq));
13788c2ecf20Sopenharmony_ci	} else {
13798c2ecf20Sopenharmony_ci		dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq),
13808c2ecf20Sopenharmony_ci				nvmeq->sq_cmds, nvmeq->sq_dma_addr);
13818c2ecf20Sopenharmony_ci	}
13828c2ecf20Sopenharmony_ci}
13838c2ecf20Sopenharmony_ci
13848c2ecf20Sopenharmony_cistatic void nvme_free_queues(struct nvme_dev *dev, int lowest)
13858c2ecf20Sopenharmony_ci{
13868c2ecf20Sopenharmony_ci	int i;
13878c2ecf20Sopenharmony_ci
13888c2ecf20Sopenharmony_ci	for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
13898c2ecf20Sopenharmony_ci		dev->ctrl.queue_count--;
13908c2ecf20Sopenharmony_ci		nvme_free_queue(&dev->queues[i]);
13918c2ecf20Sopenharmony_ci	}
13928c2ecf20Sopenharmony_ci}
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_ci/**
13958c2ecf20Sopenharmony_ci * nvme_suspend_queue - put queue into suspended state
13968c2ecf20Sopenharmony_ci * @nvmeq: queue to suspend
13978c2ecf20Sopenharmony_ci */
13988c2ecf20Sopenharmony_cistatic int nvme_suspend_queue(struct nvme_queue *nvmeq)
13998c2ecf20Sopenharmony_ci{
14008c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags))
14018c2ecf20Sopenharmony_ci		return 1;
14028c2ecf20Sopenharmony_ci
14038c2ecf20Sopenharmony_ci	/* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */
14048c2ecf20Sopenharmony_ci	mb();
14058c2ecf20Sopenharmony_ci
14068c2ecf20Sopenharmony_ci	nvmeq->dev->online_queues--;
14078c2ecf20Sopenharmony_ci	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
14088c2ecf20Sopenharmony_ci		nvme_stop_admin_queue(&nvmeq->dev->ctrl);
14098c2ecf20Sopenharmony_ci	if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags))
14108c2ecf20Sopenharmony_ci		pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq);
14118c2ecf20Sopenharmony_ci	return 0;
14128c2ecf20Sopenharmony_ci}
14138c2ecf20Sopenharmony_ci
14148c2ecf20Sopenharmony_cistatic void nvme_suspend_io_queues(struct nvme_dev *dev)
14158c2ecf20Sopenharmony_ci{
14168c2ecf20Sopenharmony_ci	int i;
14178c2ecf20Sopenharmony_ci
14188c2ecf20Sopenharmony_ci	for (i = dev->ctrl.queue_count - 1; i > 0; i--)
14198c2ecf20Sopenharmony_ci		nvme_suspend_queue(&dev->queues[i]);
14208c2ecf20Sopenharmony_ci}
14218c2ecf20Sopenharmony_ci
14228c2ecf20Sopenharmony_cistatic void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
14238c2ecf20Sopenharmony_ci{
14248c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = &dev->queues[0];
14258c2ecf20Sopenharmony_ci
14268c2ecf20Sopenharmony_ci	if (shutdown)
14278c2ecf20Sopenharmony_ci		nvme_shutdown_ctrl(&dev->ctrl);
14288c2ecf20Sopenharmony_ci	else
14298c2ecf20Sopenharmony_ci		nvme_disable_ctrl(&dev->ctrl);
14308c2ecf20Sopenharmony_ci
14318c2ecf20Sopenharmony_ci	nvme_poll_irqdisable(nvmeq);
14328c2ecf20Sopenharmony_ci}
14338c2ecf20Sopenharmony_ci
14348c2ecf20Sopenharmony_ci/*
14358c2ecf20Sopenharmony_ci * Called only on a device that has been disabled and after all other threads
14368c2ecf20Sopenharmony_ci * that can check this device's completion queues have synced, except
14378c2ecf20Sopenharmony_ci * nvme_poll(). This is the last chance for the driver to see a natural
14388c2ecf20Sopenharmony_ci * completion before nvme_cancel_request() terminates all incomplete requests.
14398c2ecf20Sopenharmony_ci */
14408c2ecf20Sopenharmony_cistatic void nvme_reap_pending_cqes(struct nvme_dev *dev)
14418c2ecf20Sopenharmony_ci{
14428c2ecf20Sopenharmony_ci	int i;
14438c2ecf20Sopenharmony_ci
14448c2ecf20Sopenharmony_ci	for (i = dev->ctrl.queue_count - 1; i > 0; i--) {
14458c2ecf20Sopenharmony_ci		spin_lock(&dev->queues[i].cq_poll_lock);
14468c2ecf20Sopenharmony_ci		nvme_process_cq(&dev->queues[i]);
14478c2ecf20Sopenharmony_ci		spin_unlock(&dev->queues[i].cq_poll_lock);
14488c2ecf20Sopenharmony_ci	}
14498c2ecf20Sopenharmony_ci}
14508c2ecf20Sopenharmony_ci
14518c2ecf20Sopenharmony_cistatic int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
14528c2ecf20Sopenharmony_ci				int entry_size)
14538c2ecf20Sopenharmony_ci{
14548c2ecf20Sopenharmony_ci	int q_depth = dev->q_depth;
14558c2ecf20Sopenharmony_ci	unsigned q_size_aligned = roundup(q_depth * entry_size,
14568c2ecf20Sopenharmony_ci					  NVME_CTRL_PAGE_SIZE);
14578c2ecf20Sopenharmony_ci
14588c2ecf20Sopenharmony_ci	if (q_size_aligned * nr_io_queues > dev->cmb_size) {
14598c2ecf20Sopenharmony_ci		u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
14608c2ecf20Sopenharmony_ci
14618c2ecf20Sopenharmony_ci		mem_per_q = round_down(mem_per_q, NVME_CTRL_PAGE_SIZE);
14628c2ecf20Sopenharmony_ci		q_depth = div_u64(mem_per_q, entry_size);
14638c2ecf20Sopenharmony_ci
14648c2ecf20Sopenharmony_ci		/*
14658c2ecf20Sopenharmony_ci		 * Ensure the reduced q_depth is above some threshold where it
14668c2ecf20Sopenharmony_ci		 * would be better to map queues in system memory with the
14678c2ecf20Sopenharmony_ci		 * original depth
14688c2ecf20Sopenharmony_ci		 */
14698c2ecf20Sopenharmony_ci		if (q_depth < 64)
14708c2ecf20Sopenharmony_ci			return -ENOMEM;
14718c2ecf20Sopenharmony_ci	}
14728c2ecf20Sopenharmony_ci
14738c2ecf20Sopenharmony_ci	return q_depth;
14748c2ecf20Sopenharmony_ci}
14758c2ecf20Sopenharmony_ci
14768c2ecf20Sopenharmony_cistatic int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq,
14778c2ecf20Sopenharmony_ci				int qid)
14788c2ecf20Sopenharmony_ci{
14798c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
14808c2ecf20Sopenharmony_ci
14818c2ecf20Sopenharmony_ci	if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) {
14828c2ecf20Sopenharmony_ci		nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq));
14838c2ecf20Sopenharmony_ci		if (nvmeq->sq_cmds) {
14848c2ecf20Sopenharmony_ci			nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev,
14858c2ecf20Sopenharmony_ci							nvmeq->sq_cmds);
14868c2ecf20Sopenharmony_ci			if (nvmeq->sq_dma_addr) {
14878c2ecf20Sopenharmony_ci				set_bit(NVMEQ_SQ_CMB, &nvmeq->flags);
14888c2ecf20Sopenharmony_ci				return 0;
14898c2ecf20Sopenharmony_ci			}
14908c2ecf20Sopenharmony_ci
14918c2ecf20Sopenharmony_ci			pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq));
14928c2ecf20Sopenharmony_ci		}
14938c2ecf20Sopenharmony_ci	}
14948c2ecf20Sopenharmony_ci
14958c2ecf20Sopenharmony_ci	nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq),
14968c2ecf20Sopenharmony_ci				&nvmeq->sq_dma_addr, GFP_KERNEL);
14978c2ecf20Sopenharmony_ci	if (!nvmeq->sq_cmds)
14988c2ecf20Sopenharmony_ci		return -ENOMEM;
14998c2ecf20Sopenharmony_ci	return 0;
15008c2ecf20Sopenharmony_ci}
15018c2ecf20Sopenharmony_ci
15028c2ecf20Sopenharmony_cistatic int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
15038c2ecf20Sopenharmony_ci{
15048c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = &dev->queues[qid];
15058c2ecf20Sopenharmony_ci
15068c2ecf20Sopenharmony_ci	if (dev->ctrl.queue_count > qid)
15078c2ecf20Sopenharmony_ci		return 0;
15088c2ecf20Sopenharmony_ci
15098c2ecf20Sopenharmony_ci	nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES;
15108c2ecf20Sopenharmony_ci	nvmeq->q_depth = depth;
15118c2ecf20Sopenharmony_ci	nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq),
15128c2ecf20Sopenharmony_ci					 &nvmeq->cq_dma_addr, GFP_KERNEL);
15138c2ecf20Sopenharmony_ci	if (!nvmeq->cqes)
15148c2ecf20Sopenharmony_ci		goto free_nvmeq;
15158c2ecf20Sopenharmony_ci
15168c2ecf20Sopenharmony_ci	if (nvme_alloc_sq_cmds(dev, nvmeq, qid))
15178c2ecf20Sopenharmony_ci		goto free_cqdma;
15188c2ecf20Sopenharmony_ci
15198c2ecf20Sopenharmony_ci	nvmeq->dev = dev;
15208c2ecf20Sopenharmony_ci	spin_lock_init(&nvmeq->sq_lock);
15218c2ecf20Sopenharmony_ci	spin_lock_init(&nvmeq->cq_poll_lock);
15228c2ecf20Sopenharmony_ci	nvmeq->cq_head = 0;
15238c2ecf20Sopenharmony_ci	nvmeq->cq_phase = 1;
15248c2ecf20Sopenharmony_ci	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
15258c2ecf20Sopenharmony_ci	nvmeq->qid = qid;
15268c2ecf20Sopenharmony_ci	dev->ctrl.queue_count++;
15278c2ecf20Sopenharmony_ci
15288c2ecf20Sopenharmony_ci	return 0;
15298c2ecf20Sopenharmony_ci
15308c2ecf20Sopenharmony_ci free_cqdma:
15318c2ecf20Sopenharmony_ci	dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes,
15328c2ecf20Sopenharmony_ci			  nvmeq->cq_dma_addr);
15338c2ecf20Sopenharmony_ci free_nvmeq:
15348c2ecf20Sopenharmony_ci	return -ENOMEM;
15358c2ecf20Sopenharmony_ci}
15368c2ecf20Sopenharmony_ci
15378c2ecf20Sopenharmony_cistatic int queue_request_irq(struct nvme_queue *nvmeq)
15388c2ecf20Sopenharmony_ci{
15398c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev);
15408c2ecf20Sopenharmony_ci	int nr = nvmeq->dev->ctrl.instance;
15418c2ecf20Sopenharmony_ci
15428c2ecf20Sopenharmony_ci	if (use_threaded_interrupts) {
15438c2ecf20Sopenharmony_ci		return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq_check,
15448c2ecf20Sopenharmony_ci				nvme_irq, nvmeq, "nvme%dq%d", nr, nvmeq->qid);
15458c2ecf20Sopenharmony_ci	} else {
15468c2ecf20Sopenharmony_ci		return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq,
15478c2ecf20Sopenharmony_ci				NULL, nvmeq, "nvme%dq%d", nr, nvmeq->qid);
15488c2ecf20Sopenharmony_ci	}
15498c2ecf20Sopenharmony_ci}
15508c2ecf20Sopenharmony_ci
15518c2ecf20Sopenharmony_cistatic void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
15528c2ecf20Sopenharmony_ci{
15538c2ecf20Sopenharmony_ci	struct nvme_dev *dev = nvmeq->dev;
15548c2ecf20Sopenharmony_ci
15558c2ecf20Sopenharmony_ci	nvmeq->sq_tail = 0;
15568c2ecf20Sopenharmony_ci	nvmeq->last_sq_tail = 0;
15578c2ecf20Sopenharmony_ci	nvmeq->cq_head = 0;
15588c2ecf20Sopenharmony_ci	nvmeq->cq_phase = 1;
15598c2ecf20Sopenharmony_ci	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
15608c2ecf20Sopenharmony_ci	memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq));
15618c2ecf20Sopenharmony_ci	nvme_dbbuf_init(dev, nvmeq, qid);
15628c2ecf20Sopenharmony_ci	dev->online_queues++;
15638c2ecf20Sopenharmony_ci	wmb(); /* ensure the first interrupt sees the initialization */
15648c2ecf20Sopenharmony_ci}
15658c2ecf20Sopenharmony_ci
15668c2ecf20Sopenharmony_cistatic int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
15678c2ecf20Sopenharmony_ci{
15688c2ecf20Sopenharmony_ci	struct nvme_dev *dev = nvmeq->dev;
15698c2ecf20Sopenharmony_ci	int result;
15708c2ecf20Sopenharmony_ci	u16 vector = 0;
15718c2ecf20Sopenharmony_ci
15728c2ecf20Sopenharmony_ci	clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);
15738c2ecf20Sopenharmony_ci
15748c2ecf20Sopenharmony_ci	/*
15758c2ecf20Sopenharmony_ci	 * A queue's vector matches the queue identifier unless the controller
15768c2ecf20Sopenharmony_ci	 * has only one vector available.
15778c2ecf20Sopenharmony_ci	 */
15788c2ecf20Sopenharmony_ci	if (!polled)
15798c2ecf20Sopenharmony_ci		vector = dev->num_vecs == 1 ? 0 : qid;
15808c2ecf20Sopenharmony_ci	else
15818c2ecf20Sopenharmony_ci		set_bit(NVMEQ_POLLED, &nvmeq->flags);
15828c2ecf20Sopenharmony_ci
15838c2ecf20Sopenharmony_ci	result = adapter_alloc_cq(dev, qid, nvmeq, vector);
15848c2ecf20Sopenharmony_ci	if (result)
15858c2ecf20Sopenharmony_ci		return result;
15868c2ecf20Sopenharmony_ci
15878c2ecf20Sopenharmony_ci	result = adapter_alloc_sq(dev, qid, nvmeq);
15888c2ecf20Sopenharmony_ci	if (result < 0)
15898c2ecf20Sopenharmony_ci		return result;
15908c2ecf20Sopenharmony_ci	if (result)
15918c2ecf20Sopenharmony_ci		goto release_cq;
15928c2ecf20Sopenharmony_ci
15938c2ecf20Sopenharmony_ci	nvmeq->cq_vector = vector;
15948c2ecf20Sopenharmony_ci	nvme_init_queue(nvmeq, qid);
15958c2ecf20Sopenharmony_ci
15968c2ecf20Sopenharmony_ci	if (!polled) {
15978c2ecf20Sopenharmony_ci		result = queue_request_irq(nvmeq);
15988c2ecf20Sopenharmony_ci		if (result < 0)
15998c2ecf20Sopenharmony_ci			goto release_sq;
16008c2ecf20Sopenharmony_ci	}
16018c2ecf20Sopenharmony_ci
16028c2ecf20Sopenharmony_ci	set_bit(NVMEQ_ENABLED, &nvmeq->flags);
16038c2ecf20Sopenharmony_ci	return result;
16048c2ecf20Sopenharmony_ci
16058c2ecf20Sopenharmony_cirelease_sq:
16068c2ecf20Sopenharmony_ci	dev->online_queues--;
16078c2ecf20Sopenharmony_ci	adapter_delete_sq(dev, qid);
16088c2ecf20Sopenharmony_cirelease_cq:
16098c2ecf20Sopenharmony_ci	adapter_delete_cq(dev, qid);
16108c2ecf20Sopenharmony_ci	return result;
16118c2ecf20Sopenharmony_ci}
16128c2ecf20Sopenharmony_ci
16138c2ecf20Sopenharmony_cistatic const struct blk_mq_ops nvme_mq_admin_ops = {
16148c2ecf20Sopenharmony_ci	.queue_rq	= nvme_queue_rq,
16158c2ecf20Sopenharmony_ci	.complete	= nvme_pci_complete_rq,
16168c2ecf20Sopenharmony_ci	.init_hctx	= nvme_admin_init_hctx,
16178c2ecf20Sopenharmony_ci	.init_request	= nvme_init_request,
16188c2ecf20Sopenharmony_ci	.timeout	= nvme_timeout,
16198c2ecf20Sopenharmony_ci};
16208c2ecf20Sopenharmony_ci
16218c2ecf20Sopenharmony_cistatic const struct blk_mq_ops nvme_mq_ops = {
16228c2ecf20Sopenharmony_ci	.queue_rq	= nvme_queue_rq,
16238c2ecf20Sopenharmony_ci	.complete	= nvme_pci_complete_rq,
16248c2ecf20Sopenharmony_ci	.commit_rqs	= nvme_commit_rqs,
16258c2ecf20Sopenharmony_ci	.init_hctx	= nvme_init_hctx,
16268c2ecf20Sopenharmony_ci	.init_request	= nvme_init_request,
16278c2ecf20Sopenharmony_ci	.map_queues	= nvme_pci_map_queues,
16288c2ecf20Sopenharmony_ci	.timeout	= nvme_timeout,
16298c2ecf20Sopenharmony_ci	.poll		= nvme_poll,
16308c2ecf20Sopenharmony_ci};
16318c2ecf20Sopenharmony_ci
16328c2ecf20Sopenharmony_cistatic void nvme_dev_remove_admin(struct nvme_dev *dev)
16338c2ecf20Sopenharmony_ci{
16348c2ecf20Sopenharmony_ci	if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) {
16358c2ecf20Sopenharmony_ci		/*
16368c2ecf20Sopenharmony_ci		 * If the controller was reset during removal, it's possible
16378c2ecf20Sopenharmony_ci		 * user requests may be waiting on a stopped queue. Start the
16388c2ecf20Sopenharmony_ci		 * queue to flush these to completion.
16398c2ecf20Sopenharmony_ci		 */
16408c2ecf20Sopenharmony_ci		nvme_start_admin_queue(&dev->ctrl);
16418c2ecf20Sopenharmony_ci		blk_cleanup_queue(dev->ctrl.admin_q);
16428c2ecf20Sopenharmony_ci		blk_mq_free_tag_set(&dev->admin_tagset);
16438c2ecf20Sopenharmony_ci	}
16448c2ecf20Sopenharmony_ci}
16458c2ecf20Sopenharmony_ci
16468c2ecf20Sopenharmony_cistatic int nvme_alloc_admin_tags(struct nvme_dev *dev)
16478c2ecf20Sopenharmony_ci{
16488c2ecf20Sopenharmony_ci	if (!dev->ctrl.admin_q) {
16498c2ecf20Sopenharmony_ci		dev->admin_tagset.ops = &nvme_mq_admin_ops;
16508c2ecf20Sopenharmony_ci		dev->admin_tagset.nr_hw_queues = 1;
16518c2ecf20Sopenharmony_ci
16528c2ecf20Sopenharmony_ci		dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
16538c2ecf20Sopenharmony_ci		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
16548c2ecf20Sopenharmony_ci		dev->admin_tagset.numa_node = dev->ctrl.numa_node;
16558c2ecf20Sopenharmony_ci		dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
16568c2ecf20Sopenharmony_ci		dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
16578c2ecf20Sopenharmony_ci		dev->admin_tagset.driver_data = dev;
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_ci		if (blk_mq_alloc_tag_set(&dev->admin_tagset))
16608c2ecf20Sopenharmony_ci			return -ENOMEM;
16618c2ecf20Sopenharmony_ci		dev->ctrl.admin_tagset = &dev->admin_tagset;
16628c2ecf20Sopenharmony_ci
16638c2ecf20Sopenharmony_ci		dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset);
16648c2ecf20Sopenharmony_ci		if (IS_ERR(dev->ctrl.admin_q)) {
16658c2ecf20Sopenharmony_ci			blk_mq_free_tag_set(&dev->admin_tagset);
16668c2ecf20Sopenharmony_ci			dev->ctrl.admin_q = NULL;
16678c2ecf20Sopenharmony_ci			return -ENOMEM;
16688c2ecf20Sopenharmony_ci		}
16698c2ecf20Sopenharmony_ci		if (!blk_get_queue(dev->ctrl.admin_q)) {
16708c2ecf20Sopenharmony_ci			nvme_dev_remove_admin(dev);
16718c2ecf20Sopenharmony_ci			dev->ctrl.admin_q = NULL;
16728c2ecf20Sopenharmony_ci			return -ENODEV;
16738c2ecf20Sopenharmony_ci		}
16748c2ecf20Sopenharmony_ci	} else
16758c2ecf20Sopenharmony_ci		nvme_start_admin_queue(&dev->ctrl);
16768c2ecf20Sopenharmony_ci
16778c2ecf20Sopenharmony_ci	return 0;
16788c2ecf20Sopenharmony_ci}
16798c2ecf20Sopenharmony_ci
16808c2ecf20Sopenharmony_cistatic unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
16818c2ecf20Sopenharmony_ci{
16828c2ecf20Sopenharmony_ci	return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride);
16838c2ecf20Sopenharmony_ci}
16848c2ecf20Sopenharmony_ci
16858c2ecf20Sopenharmony_cistatic int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
16868c2ecf20Sopenharmony_ci{
16878c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
16888c2ecf20Sopenharmony_ci
16898c2ecf20Sopenharmony_ci	if (size <= dev->bar_mapped_size)
16908c2ecf20Sopenharmony_ci		return 0;
16918c2ecf20Sopenharmony_ci	if (size > pci_resource_len(pdev, 0))
16928c2ecf20Sopenharmony_ci		return -ENOMEM;
16938c2ecf20Sopenharmony_ci	if (dev->bar)
16948c2ecf20Sopenharmony_ci		iounmap(dev->bar);
16958c2ecf20Sopenharmony_ci	dev->bar = ioremap(pci_resource_start(pdev, 0), size);
16968c2ecf20Sopenharmony_ci	if (!dev->bar) {
16978c2ecf20Sopenharmony_ci		dev->bar_mapped_size = 0;
16988c2ecf20Sopenharmony_ci		return -ENOMEM;
16998c2ecf20Sopenharmony_ci	}
17008c2ecf20Sopenharmony_ci	dev->bar_mapped_size = size;
17018c2ecf20Sopenharmony_ci	dev->dbs = dev->bar + NVME_REG_DBS;
17028c2ecf20Sopenharmony_ci
17038c2ecf20Sopenharmony_ci	return 0;
17048c2ecf20Sopenharmony_ci}
17058c2ecf20Sopenharmony_ci
17068c2ecf20Sopenharmony_cistatic int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
17078c2ecf20Sopenharmony_ci{
17088c2ecf20Sopenharmony_ci	int result;
17098c2ecf20Sopenharmony_ci	u32 aqa;
17108c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq;
17118c2ecf20Sopenharmony_ci
17128c2ecf20Sopenharmony_ci	result = nvme_remap_bar(dev, db_bar_size(dev, 0));
17138c2ecf20Sopenharmony_ci	if (result < 0)
17148c2ecf20Sopenharmony_ci		return result;
17158c2ecf20Sopenharmony_ci
17168c2ecf20Sopenharmony_ci	dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
17178c2ecf20Sopenharmony_ci				NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
17188c2ecf20Sopenharmony_ci
17198c2ecf20Sopenharmony_ci	if (dev->subsystem &&
17208c2ecf20Sopenharmony_ci	    (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
17218c2ecf20Sopenharmony_ci		writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
17228c2ecf20Sopenharmony_ci
17238c2ecf20Sopenharmony_ci	result = nvme_disable_ctrl(&dev->ctrl);
17248c2ecf20Sopenharmony_ci	if (result < 0)
17258c2ecf20Sopenharmony_ci		return result;
17268c2ecf20Sopenharmony_ci
17278c2ecf20Sopenharmony_ci	result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH);
17288c2ecf20Sopenharmony_ci	if (result)
17298c2ecf20Sopenharmony_ci		return result;
17308c2ecf20Sopenharmony_ci
17318c2ecf20Sopenharmony_ci	dev->ctrl.numa_node = dev_to_node(dev->dev);
17328c2ecf20Sopenharmony_ci
17338c2ecf20Sopenharmony_ci	nvmeq = &dev->queues[0];
17348c2ecf20Sopenharmony_ci	aqa = nvmeq->q_depth - 1;
17358c2ecf20Sopenharmony_ci	aqa |= aqa << 16;
17368c2ecf20Sopenharmony_ci
17378c2ecf20Sopenharmony_ci	writel(aqa, dev->bar + NVME_REG_AQA);
17388c2ecf20Sopenharmony_ci	lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
17398c2ecf20Sopenharmony_ci	lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
17408c2ecf20Sopenharmony_ci
17418c2ecf20Sopenharmony_ci	result = nvme_enable_ctrl(&dev->ctrl);
17428c2ecf20Sopenharmony_ci	if (result)
17438c2ecf20Sopenharmony_ci		return result;
17448c2ecf20Sopenharmony_ci
17458c2ecf20Sopenharmony_ci	nvmeq->cq_vector = 0;
17468c2ecf20Sopenharmony_ci	nvme_init_queue(nvmeq, 0);
17478c2ecf20Sopenharmony_ci	result = queue_request_irq(nvmeq);
17488c2ecf20Sopenharmony_ci	if (result) {
17498c2ecf20Sopenharmony_ci		dev->online_queues--;
17508c2ecf20Sopenharmony_ci		return result;
17518c2ecf20Sopenharmony_ci	}
17528c2ecf20Sopenharmony_ci
17538c2ecf20Sopenharmony_ci	set_bit(NVMEQ_ENABLED, &nvmeq->flags);
17548c2ecf20Sopenharmony_ci	return result;
17558c2ecf20Sopenharmony_ci}
17568c2ecf20Sopenharmony_ci
17578c2ecf20Sopenharmony_cistatic int nvme_create_io_queues(struct nvme_dev *dev)
17588c2ecf20Sopenharmony_ci{
17598c2ecf20Sopenharmony_ci	unsigned i, max, rw_queues;
17608c2ecf20Sopenharmony_ci	int ret = 0;
17618c2ecf20Sopenharmony_ci
17628c2ecf20Sopenharmony_ci	for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
17638c2ecf20Sopenharmony_ci		if (nvme_alloc_queue(dev, i, dev->q_depth)) {
17648c2ecf20Sopenharmony_ci			ret = -ENOMEM;
17658c2ecf20Sopenharmony_ci			break;
17668c2ecf20Sopenharmony_ci		}
17678c2ecf20Sopenharmony_ci	}
17688c2ecf20Sopenharmony_ci
17698c2ecf20Sopenharmony_ci	max = min(dev->max_qid, dev->ctrl.queue_count - 1);
17708c2ecf20Sopenharmony_ci	if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) {
17718c2ecf20Sopenharmony_ci		rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] +
17728c2ecf20Sopenharmony_ci				dev->io_queues[HCTX_TYPE_READ];
17738c2ecf20Sopenharmony_ci	} else {
17748c2ecf20Sopenharmony_ci		rw_queues = max;
17758c2ecf20Sopenharmony_ci	}
17768c2ecf20Sopenharmony_ci
17778c2ecf20Sopenharmony_ci	for (i = dev->online_queues; i <= max; i++) {
17788c2ecf20Sopenharmony_ci		bool polled = i > rw_queues;
17798c2ecf20Sopenharmony_ci
17808c2ecf20Sopenharmony_ci		ret = nvme_create_queue(&dev->queues[i], i, polled);
17818c2ecf20Sopenharmony_ci		if (ret)
17828c2ecf20Sopenharmony_ci			break;
17838c2ecf20Sopenharmony_ci	}
17848c2ecf20Sopenharmony_ci
17858c2ecf20Sopenharmony_ci	/*
17868c2ecf20Sopenharmony_ci	 * Ignore failing Create SQ/CQ commands, we can continue with less
17878c2ecf20Sopenharmony_ci	 * than the desired amount of queues, and even a controller without
17888c2ecf20Sopenharmony_ci	 * I/O queues can still be used to issue admin commands.  This might
17898c2ecf20Sopenharmony_ci	 * be useful to upgrade a buggy firmware for example.
17908c2ecf20Sopenharmony_ci	 */
17918c2ecf20Sopenharmony_ci	return ret >= 0 ? 0 : ret;
17928c2ecf20Sopenharmony_ci}
17938c2ecf20Sopenharmony_ci
17948c2ecf20Sopenharmony_cistatic ssize_t nvme_cmb_show(struct device *dev,
17958c2ecf20Sopenharmony_ci			     struct device_attribute *attr,
17968c2ecf20Sopenharmony_ci			     char *buf)
17978c2ecf20Sopenharmony_ci{
17988c2ecf20Sopenharmony_ci	struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
17998c2ecf20Sopenharmony_ci
18008c2ecf20Sopenharmony_ci	return scnprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz  : x%08x\n",
18018c2ecf20Sopenharmony_ci		       ndev->cmbloc, ndev->cmbsz);
18028c2ecf20Sopenharmony_ci}
18038c2ecf20Sopenharmony_cistatic DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
18048c2ecf20Sopenharmony_ci
18058c2ecf20Sopenharmony_cistatic u64 nvme_cmb_size_unit(struct nvme_dev *dev)
18068c2ecf20Sopenharmony_ci{
18078c2ecf20Sopenharmony_ci	u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK;
18088c2ecf20Sopenharmony_ci
18098c2ecf20Sopenharmony_ci	return 1ULL << (12 + 4 * szu);
18108c2ecf20Sopenharmony_ci}
18118c2ecf20Sopenharmony_ci
18128c2ecf20Sopenharmony_cistatic u32 nvme_cmb_size(struct nvme_dev *dev)
18138c2ecf20Sopenharmony_ci{
18148c2ecf20Sopenharmony_ci	return (dev->cmbsz >> NVME_CMBSZ_SZ_SHIFT) & NVME_CMBSZ_SZ_MASK;
18158c2ecf20Sopenharmony_ci}
18168c2ecf20Sopenharmony_ci
18178c2ecf20Sopenharmony_cistatic void nvme_map_cmb(struct nvme_dev *dev)
18188c2ecf20Sopenharmony_ci{
18198c2ecf20Sopenharmony_ci	u64 size, offset;
18208c2ecf20Sopenharmony_ci	resource_size_t bar_size;
18218c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
18228c2ecf20Sopenharmony_ci	int bar;
18238c2ecf20Sopenharmony_ci
18248c2ecf20Sopenharmony_ci	if (dev->cmb_size)
18258c2ecf20Sopenharmony_ci		return;
18268c2ecf20Sopenharmony_ci
18278c2ecf20Sopenharmony_ci	if (NVME_CAP_CMBS(dev->ctrl.cap))
18288c2ecf20Sopenharmony_ci		writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC);
18298c2ecf20Sopenharmony_ci
18308c2ecf20Sopenharmony_ci	dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
18318c2ecf20Sopenharmony_ci	if (!dev->cmbsz)
18328c2ecf20Sopenharmony_ci		return;
18338c2ecf20Sopenharmony_ci	dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
18348c2ecf20Sopenharmony_ci
18358c2ecf20Sopenharmony_ci	size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev);
18368c2ecf20Sopenharmony_ci	offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc);
18378c2ecf20Sopenharmony_ci	bar = NVME_CMB_BIR(dev->cmbloc);
18388c2ecf20Sopenharmony_ci	bar_size = pci_resource_len(pdev, bar);
18398c2ecf20Sopenharmony_ci
18408c2ecf20Sopenharmony_ci	if (offset > bar_size)
18418c2ecf20Sopenharmony_ci		return;
18428c2ecf20Sopenharmony_ci
18438c2ecf20Sopenharmony_ci	/*
18448c2ecf20Sopenharmony_ci	 * Tell the controller about the host side address mapping the CMB,
18458c2ecf20Sopenharmony_ci	 * and enable CMB decoding for the NVMe 1.4+ scheme:
18468c2ecf20Sopenharmony_ci	 */
18478c2ecf20Sopenharmony_ci	if (NVME_CAP_CMBS(dev->ctrl.cap)) {
18488c2ecf20Sopenharmony_ci		hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE |
18498c2ecf20Sopenharmony_ci			     (pci_bus_address(pdev, bar) + offset),
18508c2ecf20Sopenharmony_ci			     dev->bar + NVME_REG_CMBMSC);
18518c2ecf20Sopenharmony_ci	}
18528c2ecf20Sopenharmony_ci
18538c2ecf20Sopenharmony_ci	/*
18548c2ecf20Sopenharmony_ci	 * Controllers may support a CMB size larger than their BAR,
18558c2ecf20Sopenharmony_ci	 * for example, due to being behind a bridge. Reduce the CMB to
18568c2ecf20Sopenharmony_ci	 * the reported size of the BAR
18578c2ecf20Sopenharmony_ci	 */
18588c2ecf20Sopenharmony_ci	if (size > bar_size - offset)
18598c2ecf20Sopenharmony_ci		size = bar_size - offset;
18608c2ecf20Sopenharmony_ci
18618c2ecf20Sopenharmony_ci	if (pci_p2pdma_add_resource(pdev, bar, size, offset)) {
18628c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
18638c2ecf20Sopenharmony_ci			 "failed to register the CMB\n");
18648c2ecf20Sopenharmony_ci		return;
18658c2ecf20Sopenharmony_ci	}
18668c2ecf20Sopenharmony_ci
18678c2ecf20Sopenharmony_ci	dev->cmb_size = size;
18688c2ecf20Sopenharmony_ci	dev->cmb_use_sqes = use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS);
18698c2ecf20Sopenharmony_ci
18708c2ecf20Sopenharmony_ci	if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) ==
18718c2ecf20Sopenharmony_ci			(NVME_CMBSZ_WDS | NVME_CMBSZ_RDS))
18728c2ecf20Sopenharmony_ci		pci_p2pmem_publish(pdev, true);
18738c2ecf20Sopenharmony_ci
18748c2ecf20Sopenharmony_ci	if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
18758c2ecf20Sopenharmony_ci				    &dev_attr_cmb.attr, NULL))
18768c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
18778c2ecf20Sopenharmony_ci			 "failed to add sysfs attribute for CMB\n");
18788c2ecf20Sopenharmony_ci}
18798c2ecf20Sopenharmony_ci
18808c2ecf20Sopenharmony_cistatic inline void nvme_release_cmb(struct nvme_dev *dev)
18818c2ecf20Sopenharmony_ci{
18828c2ecf20Sopenharmony_ci	if (dev->cmb_size) {
18838c2ecf20Sopenharmony_ci		sysfs_remove_file_from_group(&dev->ctrl.device->kobj,
18848c2ecf20Sopenharmony_ci					     &dev_attr_cmb.attr, NULL);
18858c2ecf20Sopenharmony_ci		dev->cmb_size = 0;
18868c2ecf20Sopenharmony_ci	}
18878c2ecf20Sopenharmony_ci}
18888c2ecf20Sopenharmony_ci
18898c2ecf20Sopenharmony_cistatic int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
18908c2ecf20Sopenharmony_ci{
18918c2ecf20Sopenharmony_ci	u32 host_mem_size = dev->host_mem_size >> NVME_CTRL_PAGE_SHIFT;
18928c2ecf20Sopenharmony_ci	u64 dma_addr = dev->host_mem_descs_dma;
18938c2ecf20Sopenharmony_ci	struct nvme_command c;
18948c2ecf20Sopenharmony_ci	int ret;
18958c2ecf20Sopenharmony_ci
18968c2ecf20Sopenharmony_ci	memset(&c, 0, sizeof(c));
18978c2ecf20Sopenharmony_ci	c.features.opcode	= nvme_admin_set_features;
18988c2ecf20Sopenharmony_ci	c.features.fid		= cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
18998c2ecf20Sopenharmony_ci	c.features.dword11	= cpu_to_le32(bits);
19008c2ecf20Sopenharmony_ci	c.features.dword12	= cpu_to_le32(host_mem_size);
19018c2ecf20Sopenharmony_ci	c.features.dword13	= cpu_to_le32(lower_32_bits(dma_addr));
19028c2ecf20Sopenharmony_ci	c.features.dword14	= cpu_to_le32(upper_32_bits(dma_addr));
19038c2ecf20Sopenharmony_ci	c.features.dword15	= cpu_to_le32(dev->nr_host_mem_descs);
19048c2ecf20Sopenharmony_ci
19058c2ecf20Sopenharmony_ci	ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
19068c2ecf20Sopenharmony_ci	if (ret) {
19078c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
19088c2ecf20Sopenharmony_ci			 "failed to set host mem (err %d, flags %#x).\n",
19098c2ecf20Sopenharmony_ci			 ret, bits);
19108c2ecf20Sopenharmony_ci	}
19118c2ecf20Sopenharmony_ci	return ret;
19128c2ecf20Sopenharmony_ci}
19138c2ecf20Sopenharmony_ci
19148c2ecf20Sopenharmony_cistatic void nvme_free_host_mem(struct nvme_dev *dev)
19158c2ecf20Sopenharmony_ci{
19168c2ecf20Sopenharmony_ci	int i;
19178c2ecf20Sopenharmony_ci
19188c2ecf20Sopenharmony_ci	for (i = 0; i < dev->nr_host_mem_descs; i++) {
19198c2ecf20Sopenharmony_ci		struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
19208c2ecf20Sopenharmony_ci		size_t size = le32_to_cpu(desc->size) * NVME_CTRL_PAGE_SIZE;
19218c2ecf20Sopenharmony_ci
19228c2ecf20Sopenharmony_ci		dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i],
19238c2ecf20Sopenharmony_ci			       le64_to_cpu(desc->addr),
19248c2ecf20Sopenharmony_ci			       DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
19258c2ecf20Sopenharmony_ci	}
19268c2ecf20Sopenharmony_ci
19278c2ecf20Sopenharmony_ci	kfree(dev->host_mem_desc_bufs);
19288c2ecf20Sopenharmony_ci	dev->host_mem_desc_bufs = NULL;
19298c2ecf20Sopenharmony_ci	dma_free_coherent(dev->dev,
19308c2ecf20Sopenharmony_ci			dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
19318c2ecf20Sopenharmony_ci			dev->host_mem_descs, dev->host_mem_descs_dma);
19328c2ecf20Sopenharmony_ci	dev->host_mem_descs = NULL;
19338c2ecf20Sopenharmony_ci	dev->nr_host_mem_descs = 0;
19348c2ecf20Sopenharmony_ci}
19358c2ecf20Sopenharmony_ci
19368c2ecf20Sopenharmony_cistatic int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
19378c2ecf20Sopenharmony_ci		u32 chunk_size)
19388c2ecf20Sopenharmony_ci{
19398c2ecf20Sopenharmony_ci	struct nvme_host_mem_buf_desc *descs;
19408c2ecf20Sopenharmony_ci	u32 max_entries, len;
19418c2ecf20Sopenharmony_ci	dma_addr_t descs_dma;
19428c2ecf20Sopenharmony_ci	int i = 0;
19438c2ecf20Sopenharmony_ci	void **bufs;
19448c2ecf20Sopenharmony_ci	u64 size, tmp;
19458c2ecf20Sopenharmony_ci
19468c2ecf20Sopenharmony_ci	tmp = (preferred + chunk_size - 1);
19478c2ecf20Sopenharmony_ci	do_div(tmp, chunk_size);
19488c2ecf20Sopenharmony_ci	max_entries = tmp;
19498c2ecf20Sopenharmony_ci
19508c2ecf20Sopenharmony_ci	if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
19518c2ecf20Sopenharmony_ci		max_entries = dev->ctrl.hmmaxd;
19528c2ecf20Sopenharmony_ci
19538c2ecf20Sopenharmony_ci	descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
19548c2ecf20Sopenharmony_ci				   &descs_dma, GFP_KERNEL);
19558c2ecf20Sopenharmony_ci	if (!descs)
19568c2ecf20Sopenharmony_ci		goto out;
19578c2ecf20Sopenharmony_ci
19588c2ecf20Sopenharmony_ci	bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL);
19598c2ecf20Sopenharmony_ci	if (!bufs)
19608c2ecf20Sopenharmony_ci		goto out_free_descs;
19618c2ecf20Sopenharmony_ci
19628c2ecf20Sopenharmony_ci	for (size = 0; size < preferred && i < max_entries; size += len) {
19638c2ecf20Sopenharmony_ci		dma_addr_t dma_addr;
19648c2ecf20Sopenharmony_ci
19658c2ecf20Sopenharmony_ci		len = min_t(u64, chunk_size, preferred - size);
19668c2ecf20Sopenharmony_ci		bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL,
19678c2ecf20Sopenharmony_ci				DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
19688c2ecf20Sopenharmony_ci		if (!bufs[i])
19698c2ecf20Sopenharmony_ci			break;
19708c2ecf20Sopenharmony_ci
19718c2ecf20Sopenharmony_ci		descs[i].addr = cpu_to_le64(dma_addr);
19728c2ecf20Sopenharmony_ci		descs[i].size = cpu_to_le32(len / NVME_CTRL_PAGE_SIZE);
19738c2ecf20Sopenharmony_ci		i++;
19748c2ecf20Sopenharmony_ci	}
19758c2ecf20Sopenharmony_ci
19768c2ecf20Sopenharmony_ci	if (!size)
19778c2ecf20Sopenharmony_ci		goto out_free_bufs;
19788c2ecf20Sopenharmony_ci
19798c2ecf20Sopenharmony_ci	dev->nr_host_mem_descs = i;
19808c2ecf20Sopenharmony_ci	dev->host_mem_size = size;
19818c2ecf20Sopenharmony_ci	dev->host_mem_descs = descs;
19828c2ecf20Sopenharmony_ci	dev->host_mem_descs_dma = descs_dma;
19838c2ecf20Sopenharmony_ci	dev->host_mem_desc_bufs = bufs;
19848c2ecf20Sopenharmony_ci	return 0;
19858c2ecf20Sopenharmony_ci
19868c2ecf20Sopenharmony_ciout_free_bufs:
19878c2ecf20Sopenharmony_ci	while (--i >= 0) {
19888c2ecf20Sopenharmony_ci		size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE;
19898c2ecf20Sopenharmony_ci
19908c2ecf20Sopenharmony_ci		dma_free_attrs(dev->dev, size, bufs[i],
19918c2ecf20Sopenharmony_ci			       le64_to_cpu(descs[i].addr),
19928c2ecf20Sopenharmony_ci			       DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
19938c2ecf20Sopenharmony_ci	}
19948c2ecf20Sopenharmony_ci
19958c2ecf20Sopenharmony_ci	kfree(bufs);
19968c2ecf20Sopenharmony_ciout_free_descs:
19978c2ecf20Sopenharmony_ci	dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs,
19988c2ecf20Sopenharmony_ci			descs_dma);
19998c2ecf20Sopenharmony_ciout:
20008c2ecf20Sopenharmony_ci	dev->host_mem_descs = NULL;
20018c2ecf20Sopenharmony_ci	return -ENOMEM;
20028c2ecf20Sopenharmony_ci}
20038c2ecf20Sopenharmony_ci
20048c2ecf20Sopenharmony_cistatic int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
20058c2ecf20Sopenharmony_ci{
20068c2ecf20Sopenharmony_ci	u64 min_chunk = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES);
20078c2ecf20Sopenharmony_ci	u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2);
20088c2ecf20Sopenharmony_ci	u64 chunk_size;
20098c2ecf20Sopenharmony_ci
20108c2ecf20Sopenharmony_ci	/* start big and work our way down */
20118c2ecf20Sopenharmony_ci	for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) {
20128c2ecf20Sopenharmony_ci		if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) {
20138c2ecf20Sopenharmony_ci			if (!min || dev->host_mem_size >= min)
20148c2ecf20Sopenharmony_ci				return 0;
20158c2ecf20Sopenharmony_ci			nvme_free_host_mem(dev);
20168c2ecf20Sopenharmony_ci		}
20178c2ecf20Sopenharmony_ci	}
20188c2ecf20Sopenharmony_ci
20198c2ecf20Sopenharmony_ci	return -ENOMEM;
20208c2ecf20Sopenharmony_ci}
20218c2ecf20Sopenharmony_ci
20228c2ecf20Sopenharmony_cistatic int nvme_setup_host_mem(struct nvme_dev *dev)
20238c2ecf20Sopenharmony_ci{
20248c2ecf20Sopenharmony_ci	u64 max = (u64)max_host_mem_size_mb * SZ_1M;
20258c2ecf20Sopenharmony_ci	u64 preferred = (u64)dev->ctrl.hmpre * 4096;
20268c2ecf20Sopenharmony_ci	u64 min = (u64)dev->ctrl.hmmin * 4096;
20278c2ecf20Sopenharmony_ci	u32 enable_bits = NVME_HOST_MEM_ENABLE;
20288c2ecf20Sopenharmony_ci	int ret;
20298c2ecf20Sopenharmony_ci
20308c2ecf20Sopenharmony_ci	preferred = min(preferred, max);
20318c2ecf20Sopenharmony_ci	if (min > max) {
20328c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
20338c2ecf20Sopenharmony_ci			"min host memory (%lld MiB) above limit (%d MiB).\n",
20348c2ecf20Sopenharmony_ci			min >> ilog2(SZ_1M), max_host_mem_size_mb);
20358c2ecf20Sopenharmony_ci		nvme_free_host_mem(dev);
20368c2ecf20Sopenharmony_ci		return 0;
20378c2ecf20Sopenharmony_ci	}
20388c2ecf20Sopenharmony_ci
20398c2ecf20Sopenharmony_ci	/*
20408c2ecf20Sopenharmony_ci	 * If we already have a buffer allocated check if we can reuse it.
20418c2ecf20Sopenharmony_ci	 */
20428c2ecf20Sopenharmony_ci	if (dev->host_mem_descs) {
20438c2ecf20Sopenharmony_ci		if (dev->host_mem_size >= min)
20448c2ecf20Sopenharmony_ci			enable_bits |= NVME_HOST_MEM_RETURN;
20458c2ecf20Sopenharmony_ci		else
20468c2ecf20Sopenharmony_ci			nvme_free_host_mem(dev);
20478c2ecf20Sopenharmony_ci	}
20488c2ecf20Sopenharmony_ci
20498c2ecf20Sopenharmony_ci	if (!dev->host_mem_descs) {
20508c2ecf20Sopenharmony_ci		if (nvme_alloc_host_mem(dev, min, preferred)) {
20518c2ecf20Sopenharmony_ci			dev_warn(dev->ctrl.device,
20528c2ecf20Sopenharmony_ci				"failed to allocate host memory buffer.\n");
20538c2ecf20Sopenharmony_ci			return 0; /* controller must work without HMB */
20548c2ecf20Sopenharmony_ci		}
20558c2ecf20Sopenharmony_ci
20568c2ecf20Sopenharmony_ci		dev_info(dev->ctrl.device,
20578c2ecf20Sopenharmony_ci			"allocated %lld MiB host memory buffer.\n",
20588c2ecf20Sopenharmony_ci			dev->host_mem_size >> ilog2(SZ_1M));
20598c2ecf20Sopenharmony_ci	}
20608c2ecf20Sopenharmony_ci
20618c2ecf20Sopenharmony_ci	ret = nvme_set_host_mem(dev, enable_bits);
20628c2ecf20Sopenharmony_ci	if (ret)
20638c2ecf20Sopenharmony_ci		nvme_free_host_mem(dev);
20648c2ecf20Sopenharmony_ci	return ret;
20658c2ecf20Sopenharmony_ci}
20668c2ecf20Sopenharmony_ci
20678c2ecf20Sopenharmony_ci/*
20688c2ecf20Sopenharmony_ci * nirqs is the number of interrupts available for write and read
20698c2ecf20Sopenharmony_ci * queues. The core already reserved an interrupt for the admin queue.
20708c2ecf20Sopenharmony_ci */
20718c2ecf20Sopenharmony_cistatic void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
20728c2ecf20Sopenharmony_ci{
20738c2ecf20Sopenharmony_ci	struct nvme_dev *dev = affd->priv;
20748c2ecf20Sopenharmony_ci	unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues;
20758c2ecf20Sopenharmony_ci
20768c2ecf20Sopenharmony_ci	/*
20778c2ecf20Sopenharmony_ci	 * If there is no interrupt available for queues, ensure that
20788c2ecf20Sopenharmony_ci	 * the default queue is set to 1. The affinity set size is
20798c2ecf20Sopenharmony_ci	 * also set to one, but the irq core ignores it for this case.
20808c2ecf20Sopenharmony_ci	 *
20818c2ecf20Sopenharmony_ci	 * If only one interrupt is available or 'write_queue' == 0, combine
20828c2ecf20Sopenharmony_ci	 * write and read queues.
20838c2ecf20Sopenharmony_ci	 *
20848c2ecf20Sopenharmony_ci	 * If 'write_queues' > 0, ensure it leaves room for at least one read
20858c2ecf20Sopenharmony_ci	 * queue.
20868c2ecf20Sopenharmony_ci	 */
20878c2ecf20Sopenharmony_ci	if (!nrirqs) {
20888c2ecf20Sopenharmony_ci		nrirqs = 1;
20898c2ecf20Sopenharmony_ci		nr_read_queues = 0;
20908c2ecf20Sopenharmony_ci	} else if (nrirqs == 1 || !nr_write_queues) {
20918c2ecf20Sopenharmony_ci		nr_read_queues = 0;
20928c2ecf20Sopenharmony_ci	} else if (nr_write_queues >= nrirqs) {
20938c2ecf20Sopenharmony_ci		nr_read_queues = 1;
20948c2ecf20Sopenharmony_ci	} else {
20958c2ecf20Sopenharmony_ci		nr_read_queues = nrirqs - nr_write_queues;
20968c2ecf20Sopenharmony_ci	}
20978c2ecf20Sopenharmony_ci
20988c2ecf20Sopenharmony_ci	dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
20998c2ecf20Sopenharmony_ci	affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
21008c2ecf20Sopenharmony_ci	dev->io_queues[HCTX_TYPE_READ] = nr_read_queues;
21018c2ecf20Sopenharmony_ci	affd->set_size[HCTX_TYPE_READ] = nr_read_queues;
21028c2ecf20Sopenharmony_ci	affd->nr_sets = nr_read_queues ? 2 : 1;
21038c2ecf20Sopenharmony_ci}
21048c2ecf20Sopenharmony_ci
21058c2ecf20Sopenharmony_cistatic int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
21068c2ecf20Sopenharmony_ci{
21078c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
21088c2ecf20Sopenharmony_ci	struct irq_affinity affd = {
21098c2ecf20Sopenharmony_ci		.pre_vectors	= 1,
21108c2ecf20Sopenharmony_ci		.calc_sets	= nvme_calc_irq_sets,
21118c2ecf20Sopenharmony_ci		.priv		= dev,
21128c2ecf20Sopenharmony_ci	};
21138c2ecf20Sopenharmony_ci	unsigned int irq_queues, poll_queues;
21148c2ecf20Sopenharmony_ci
21158c2ecf20Sopenharmony_ci	/*
21168c2ecf20Sopenharmony_ci	 * Poll queues don't need interrupts, but we need at least one I/O queue
21178c2ecf20Sopenharmony_ci	 * left over for non-polled I/O.
21188c2ecf20Sopenharmony_ci	 */
21198c2ecf20Sopenharmony_ci	poll_queues = min(dev->nr_poll_queues, nr_io_queues - 1);
21208c2ecf20Sopenharmony_ci	dev->io_queues[HCTX_TYPE_POLL] = poll_queues;
21218c2ecf20Sopenharmony_ci
21228c2ecf20Sopenharmony_ci	/*
21238c2ecf20Sopenharmony_ci	 * Initialize for the single interrupt case, will be updated in
21248c2ecf20Sopenharmony_ci	 * nvme_calc_irq_sets().
21258c2ecf20Sopenharmony_ci	 */
21268c2ecf20Sopenharmony_ci	dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
21278c2ecf20Sopenharmony_ci	dev->io_queues[HCTX_TYPE_READ] = 0;
21288c2ecf20Sopenharmony_ci
21298c2ecf20Sopenharmony_ci	/*
21308c2ecf20Sopenharmony_ci	 * We need interrupts for the admin queue and each non-polled I/O queue,
21318c2ecf20Sopenharmony_ci	 * but some Apple controllers require all queues to use the first
21328c2ecf20Sopenharmony_ci	 * vector.
21338c2ecf20Sopenharmony_ci	 */
21348c2ecf20Sopenharmony_ci	irq_queues = 1;
21358c2ecf20Sopenharmony_ci	if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR))
21368c2ecf20Sopenharmony_ci		irq_queues += (nr_io_queues - poll_queues);
21378c2ecf20Sopenharmony_ci	return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues,
21388c2ecf20Sopenharmony_ci			      PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
21398c2ecf20Sopenharmony_ci}
21408c2ecf20Sopenharmony_ci
21418c2ecf20Sopenharmony_cistatic void nvme_disable_io_queues(struct nvme_dev *dev)
21428c2ecf20Sopenharmony_ci{
21438c2ecf20Sopenharmony_ci	if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq))
21448c2ecf20Sopenharmony_ci		__nvme_disable_io_queues(dev, nvme_admin_delete_cq);
21458c2ecf20Sopenharmony_ci}
21468c2ecf20Sopenharmony_ci
21478c2ecf20Sopenharmony_cistatic unsigned int nvme_max_io_queues(struct nvme_dev *dev)
21488c2ecf20Sopenharmony_ci{
21498c2ecf20Sopenharmony_ci	return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues;
21508c2ecf20Sopenharmony_ci}
21518c2ecf20Sopenharmony_ci
21528c2ecf20Sopenharmony_cistatic int nvme_setup_io_queues(struct nvme_dev *dev)
21538c2ecf20Sopenharmony_ci{
21548c2ecf20Sopenharmony_ci	struct nvme_queue *adminq = &dev->queues[0];
21558c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
21568c2ecf20Sopenharmony_ci	unsigned int nr_io_queues;
21578c2ecf20Sopenharmony_ci	unsigned long size;
21588c2ecf20Sopenharmony_ci	int result;
21598c2ecf20Sopenharmony_ci
21608c2ecf20Sopenharmony_ci	/*
21618c2ecf20Sopenharmony_ci	 * Sample the module parameters once at reset time so that we have
21628c2ecf20Sopenharmony_ci	 * stable values to work with.
21638c2ecf20Sopenharmony_ci	 */
21648c2ecf20Sopenharmony_ci	dev->nr_write_queues = write_queues;
21658c2ecf20Sopenharmony_ci	dev->nr_poll_queues = poll_queues;
21668c2ecf20Sopenharmony_ci
21678c2ecf20Sopenharmony_ci	/*
21688c2ecf20Sopenharmony_ci	 * If tags are shared with admin queue (Apple bug), then
21698c2ecf20Sopenharmony_ci	 * make sure we only use one IO queue.
21708c2ecf20Sopenharmony_ci	 */
21718c2ecf20Sopenharmony_ci	if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
21728c2ecf20Sopenharmony_ci		nr_io_queues = 1;
21738c2ecf20Sopenharmony_ci	else
21748c2ecf20Sopenharmony_ci		nr_io_queues = min(nvme_max_io_queues(dev),
21758c2ecf20Sopenharmony_ci				   dev->nr_allocated_queues - 1);
21768c2ecf20Sopenharmony_ci
21778c2ecf20Sopenharmony_ci	result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
21788c2ecf20Sopenharmony_ci	if (result < 0)
21798c2ecf20Sopenharmony_ci		return result;
21808c2ecf20Sopenharmony_ci
21818c2ecf20Sopenharmony_ci	if (nr_io_queues == 0)
21828c2ecf20Sopenharmony_ci		return 0;
21838c2ecf20Sopenharmony_ci
21848c2ecf20Sopenharmony_ci	clear_bit(NVMEQ_ENABLED, &adminq->flags);
21858c2ecf20Sopenharmony_ci
21868c2ecf20Sopenharmony_ci	if (dev->cmb_use_sqes) {
21878c2ecf20Sopenharmony_ci		result = nvme_cmb_qdepth(dev, nr_io_queues,
21888c2ecf20Sopenharmony_ci				sizeof(struct nvme_command));
21898c2ecf20Sopenharmony_ci		if (result > 0)
21908c2ecf20Sopenharmony_ci			dev->q_depth = result;
21918c2ecf20Sopenharmony_ci		else
21928c2ecf20Sopenharmony_ci			dev->cmb_use_sqes = false;
21938c2ecf20Sopenharmony_ci	}
21948c2ecf20Sopenharmony_ci
21958c2ecf20Sopenharmony_ci	do {
21968c2ecf20Sopenharmony_ci		size = db_bar_size(dev, nr_io_queues);
21978c2ecf20Sopenharmony_ci		result = nvme_remap_bar(dev, size);
21988c2ecf20Sopenharmony_ci		if (!result)
21998c2ecf20Sopenharmony_ci			break;
22008c2ecf20Sopenharmony_ci		if (!--nr_io_queues)
22018c2ecf20Sopenharmony_ci			return -ENOMEM;
22028c2ecf20Sopenharmony_ci	} while (1);
22038c2ecf20Sopenharmony_ci	adminq->q_db = dev->dbs;
22048c2ecf20Sopenharmony_ci
22058c2ecf20Sopenharmony_ci retry:
22068c2ecf20Sopenharmony_ci	/* Deregister the admin queue's interrupt */
22078c2ecf20Sopenharmony_ci	pci_free_irq(pdev, 0, adminq);
22088c2ecf20Sopenharmony_ci
22098c2ecf20Sopenharmony_ci	/*
22108c2ecf20Sopenharmony_ci	 * If we enable msix early due to not intx, disable it again before
22118c2ecf20Sopenharmony_ci	 * setting up the full range we need.
22128c2ecf20Sopenharmony_ci	 */
22138c2ecf20Sopenharmony_ci	pci_free_irq_vectors(pdev);
22148c2ecf20Sopenharmony_ci
22158c2ecf20Sopenharmony_ci	result = nvme_setup_irqs(dev, nr_io_queues);
22168c2ecf20Sopenharmony_ci	if (result <= 0)
22178c2ecf20Sopenharmony_ci		return -EIO;
22188c2ecf20Sopenharmony_ci
22198c2ecf20Sopenharmony_ci	dev->num_vecs = result;
22208c2ecf20Sopenharmony_ci	result = max(result - 1, 1);
22218c2ecf20Sopenharmony_ci	dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
22228c2ecf20Sopenharmony_ci
22238c2ecf20Sopenharmony_ci	/*
22248c2ecf20Sopenharmony_ci	 * Should investigate if there's a performance win from allocating
22258c2ecf20Sopenharmony_ci	 * more queues than interrupt vectors; it might allow the submission
22268c2ecf20Sopenharmony_ci	 * path to scale better, even if the receive path is limited by the
22278c2ecf20Sopenharmony_ci	 * number of interrupts.
22288c2ecf20Sopenharmony_ci	 */
22298c2ecf20Sopenharmony_ci	result = queue_request_irq(adminq);
22308c2ecf20Sopenharmony_ci	if (result)
22318c2ecf20Sopenharmony_ci		return result;
22328c2ecf20Sopenharmony_ci	set_bit(NVMEQ_ENABLED, &adminq->flags);
22338c2ecf20Sopenharmony_ci
22348c2ecf20Sopenharmony_ci	result = nvme_create_io_queues(dev);
22358c2ecf20Sopenharmony_ci	if (result || dev->online_queues < 2)
22368c2ecf20Sopenharmony_ci		return result;
22378c2ecf20Sopenharmony_ci
22388c2ecf20Sopenharmony_ci	if (dev->online_queues - 1 < dev->max_qid) {
22398c2ecf20Sopenharmony_ci		nr_io_queues = dev->online_queues - 1;
22408c2ecf20Sopenharmony_ci		nvme_disable_io_queues(dev);
22418c2ecf20Sopenharmony_ci		nvme_suspend_io_queues(dev);
22428c2ecf20Sopenharmony_ci		goto retry;
22438c2ecf20Sopenharmony_ci	}
22448c2ecf20Sopenharmony_ci	dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
22458c2ecf20Sopenharmony_ci					dev->io_queues[HCTX_TYPE_DEFAULT],
22468c2ecf20Sopenharmony_ci					dev->io_queues[HCTX_TYPE_READ],
22478c2ecf20Sopenharmony_ci					dev->io_queues[HCTX_TYPE_POLL]);
22488c2ecf20Sopenharmony_ci	return 0;
22498c2ecf20Sopenharmony_ci}
22508c2ecf20Sopenharmony_ci
22518c2ecf20Sopenharmony_cistatic void nvme_del_queue_end(struct request *req, blk_status_t error)
22528c2ecf20Sopenharmony_ci{
22538c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = req->end_io_data;
22548c2ecf20Sopenharmony_ci
22558c2ecf20Sopenharmony_ci	blk_mq_free_request(req);
22568c2ecf20Sopenharmony_ci	complete(&nvmeq->delete_done);
22578c2ecf20Sopenharmony_ci}
22588c2ecf20Sopenharmony_ci
22598c2ecf20Sopenharmony_cistatic void nvme_del_cq_end(struct request *req, blk_status_t error)
22608c2ecf20Sopenharmony_ci{
22618c2ecf20Sopenharmony_ci	struct nvme_queue *nvmeq = req->end_io_data;
22628c2ecf20Sopenharmony_ci
22638c2ecf20Sopenharmony_ci	if (error)
22648c2ecf20Sopenharmony_ci		set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);
22658c2ecf20Sopenharmony_ci
22668c2ecf20Sopenharmony_ci	nvme_del_queue_end(req, error);
22678c2ecf20Sopenharmony_ci}
22688c2ecf20Sopenharmony_ci
22698c2ecf20Sopenharmony_cistatic int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
22708c2ecf20Sopenharmony_ci{
22718c2ecf20Sopenharmony_ci	struct request_queue *q = nvmeq->dev->ctrl.admin_q;
22728c2ecf20Sopenharmony_ci	struct request *req;
22738c2ecf20Sopenharmony_ci	struct nvme_command cmd;
22748c2ecf20Sopenharmony_ci
22758c2ecf20Sopenharmony_ci	memset(&cmd, 0, sizeof(cmd));
22768c2ecf20Sopenharmony_ci	cmd.delete_queue.opcode = opcode;
22778c2ecf20Sopenharmony_ci	cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid);
22788c2ecf20Sopenharmony_ci
22798c2ecf20Sopenharmony_ci	req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT);
22808c2ecf20Sopenharmony_ci	if (IS_ERR(req))
22818c2ecf20Sopenharmony_ci		return PTR_ERR(req);
22828c2ecf20Sopenharmony_ci
22838c2ecf20Sopenharmony_ci	req->end_io_data = nvmeq;
22848c2ecf20Sopenharmony_ci
22858c2ecf20Sopenharmony_ci	init_completion(&nvmeq->delete_done);
22868c2ecf20Sopenharmony_ci	blk_execute_rq_nowait(q, NULL, req, false,
22878c2ecf20Sopenharmony_ci			opcode == nvme_admin_delete_cq ?
22888c2ecf20Sopenharmony_ci				nvme_del_cq_end : nvme_del_queue_end);
22898c2ecf20Sopenharmony_ci	return 0;
22908c2ecf20Sopenharmony_ci}
22918c2ecf20Sopenharmony_ci
22928c2ecf20Sopenharmony_cistatic bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
22938c2ecf20Sopenharmony_ci{
22948c2ecf20Sopenharmony_ci	int nr_queues = dev->online_queues - 1, sent = 0;
22958c2ecf20Sopenharmony_ci	unsigned long timeout;
22968c2ecf20Sopenharmony_ci
22978c2ecf20Sopenharmony_ci retry:
22988c2ecf20Sopenharmony_ci	timeout = ADMIN_TIMEOUT;
22998c2ecf20Sopenharmony_ci	while (nr_queues > 0) {
23008c2ecf20Sopenharmony_ci		if (nvme_delete_queue(&dev->queues[nr_queues], opcode))
23018c2ecf20Sopenharmony_ci			break;
23028c2ecf20Sopenharmony_ci		nr_queues--;
23038c2ecf20Sopenharmony_ci		sent++;
23048c2ecf20Sopenharmony_ci	}
23058c2ecf20Sopenharmony_ci	while (sent) {
23068c2ecf20Sopenharmony_ci		struct nvme_queue *nvmeq = &dev->queues[nr_queues + sent];
23078c2ecf20Sopenharmony_ci
23088c2ecf20Sopenharmony_ci		timeout = wait_for_completion_io_timeout(&nvmeq->delete_done,
23098c2ecf20Sopenharmony_ci				timeout);
23108c2ecf20Sopenharmony_ci		if (timeout == 0)
23118c2ecf20Sopenharmony_ci			return false;
23128c2ecf20Sopenharmony_ci
23138c2ecf20Sopenharmony_ci		sent--;
23148c2ecf20Sopenharmony_ci		if (nr_queues)
23158c2ecf20Sopenharmony_ci			goto retry;
23168c2ecf20Sopenharmony_ci	}
23178c2ecf20Sopenharmony_ci	return true;
23188c2ecf20Sopenharmony_ci}
23198c2ecf20Sopenharmony_ci
23208c2ecf20Sopenharmony_cistatic void nvme_dev_add(struct nvme_dev *dev)
23218c2ecf20Sopenharmony_ci{
23228c2ecf20Sopenharmony_ci	int ret;
23238c2ecf20Sopenharmony_ci
23248c2ecf20Sopenharmony_ci	if (!dev->ctrl.tagset) {
23258c2ecf20Sopenharmony_ci		dev->tagset.ops = &nvme_mq_ops;
23268c2ecf20Sopenharmony_ci		dev->tagset.nr_hw_queues = dev->online_queues - 1;
23278c2ecf20Sopenharmony_ci		dev->tagset.nr_maps = 2; /* default + read */
23288c2ecf20Sopenharmony_ci		if (dev->io_queues[HCTX_TYPE_POLL])
23298c2ecf20Sopenharmony_ci			dev->tagset.nr_maps++;
23308c2ecf20Sopenharmony_ci		dev->tagset.timeout = NVME_IO_TIMEOUT;
23318c2ecf20Sopenharmony_ci		dev->tagset.numa_node = dev->ctrl.numa_node;
23328c2ecf20Sopenharmony_ci		dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth,
23338c2ecf20Sopenharmony_ci						BLK_MQ_MAX_DEPTH) - 1;
23348c2ecf20Sopenharmony_ci		dev->tagset.cmd_size = sizeof(struct nvme_iod);
23358c2ecf20Sopenharmony_ci		dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
23368c2ecf20Sopenharmony_ci		dev->tagset.driver_data = dev;
23378c2ecf20Sopenharmony_ci
23388c2ecf20Sopenharmony_ci		/*
23398c2ecf20Sopenharmony_ci		 * Some Apple controllers requires tags to be unique
23408c2ecf20Sopenharmony_ci		 * across admin and IO queue, so reserve the first 32
23418c2ecf20Sopenharmony_ci		 * tags of the IO queue.
23428c2ecf20Sopenharmony_ci		 */
23438c2ecf20Sopenharmony_ci		if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
23448c2ecf20Sopenharmony_ci			dev->tagset.reserved_tags = NVME_AQ_DEPTH;
23458c2ecf20Sopenharmony_ci
23468c2ecf20Sopenharmony_ci		ret = blk_mq_alloc_tag_set(&dev->tagset);
23478c2ecf20Sopenharmony_ci		if (ret) {
23488c2ecf20Sopenharmony_ci			dev_warn(dev->ctrl.device,
23498c2ecf20Sopenharmony_ci				"IO queues tagset allocation failed %d\n", ret);
23508c2ecf20Sopenharmony_ci			return;
23518c2ecf20Sopenharmony_ci		}
23528c2ecf20Sopenharmony_ci		dev->ctrl.tagset = &dev->tagset;
23538c2ecf20Sopenharmony_ci	} else {
23548c2ecf20Sopenharmony_ci		blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1);
23558c2ecf20Sopenharmony_ci
23568c2ecf20Sopenharmony_ci		/* Free previously allocated queues that are no longer usable */
23578c2ecf20Sopenharmony_ci		nvme_free_queues(dev, dev->online_queues);
23588c2ecf20Sopenharmony_ci	}
23598c2ecf20Sopenharmony_ci
23608c2ecf20Sopenharmony_ci	nvme_dbbuf_set(dev);
23618c2ecf20Sopenharmony_ci}
23628c2ecf20Sopenharmony_ci
23638c2ecf20Sopenharmony_cistatic int nvme_pci_enable(struct nvme_dev *dev)
23648c2ecf20Sopenharmony_ci{
23658c2ecf20Sopenharmony_ci	int result = -ENOMEM;
23668c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
23678c2ecf20Sopenharmony_ci
23688c2ecf20Sopenharmony_ci	if (pci_enable_device_mem(pdev))
23698c2ecf20Sopenharmony_ci		return result;
23708c2ecf20Sopenharmony_ci
23718c2ecf20Sopenharmony_ci	pci_set_master(pdev);
23728c2ecf20Sopenharmony_ci
23738c2ecf20Sopenharmony_ci	if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64)))
23748c2ecf20Sopenharmony_ci		goto disable;
23758c2ecf20Sopenharmony_ci
23768c2ecf20Sopenharmony_ci	if (readl(dev->bar + NVME_REG_CSTS) == -1) {
23778c2ecf20Sopenharmony_ci		result = -ENODEV;
23788c2ecf20Sopenharmony_ci		goto disable;
23798c2ecf20Sopenharmony_ci	}
23808c2ecf20Sopenharmony_ci
23818c2ecf20Sopenharmony_ci	/*
23828c2ecf20Sopenharmony_ci	 * Some devices and/or platforms don't advertise or work with INTx
23838c2ecf20Sopenharmony_ci	 * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
23848c2ecf20Sopenharmony_ci	 * adjust this later.
23858c2ecf20Sopenharmony_ci	 */
23868c2ecf20Sopenharmony_ci	result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
23878c2ecf20Sopenharmony_ci	if (result < 0)
23888c2ecf20Sopenharmony_ci		return result;
23898c2ecf20Sopenharmony_ci
23908c2ecf20Sopenharmony_ci	dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
23918c2ecf20Sopenharmony_ci
23928c2ecf20Sopenharmony_ci	dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1,
23938c2ecf20Sopenharmony_ci				io_queue_depth);
23948c2ecf20Sopenharmony_ci	dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */
23958c2ecf20Sopenharmony_ci	dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
23968c2ecf20Sopenharmony_ci	dev->dbs = dev->bar + 4096;
23978c2ecf20Sopenharmony_ci
23988c2ecf20Sopenharmony_ci	/*
23998c2ecf20Sopenharmony_ci	 * Some Apple controllers require a non-standard SQE size.
24008c2ecf20Sopenharmony_ci	 * Interestingly they also seem to ignore the CC:IOSQES register
24018c2ecf20Sopenharmony_ci	 * so we don't bother updating it here.
24028c2ecf20Sopenharmony_ci	 */
24038c2ecf20Sopenharmony_ci	if (dev->ctrl.quirks & NVME_QUIRK_128_BYTES_SQES)
24048c2ecf20Sopenharmony_ci		dev->io_sqes = 7;
24058c2ecf20Sopenharmony_ci	else
24068c2ecf20Sopenharmony_ci		dev->io_sqes = NVME_NVM_IOSQES;
24078c2ecf20Sopenharmony_ci
24088c2ecf20Sopenharmony_ci	/*
24098c2ecf20Sopenharmony_ci	 * Temporary fix for the Apple controller found in the MacBook8,1 and
24108c2ecf20Sopenharmony_ci	 * some MacBook7,1 to avoid controller resets and data loss.
24118c2ecf20Sopenharmony_ci	 */
24128c2ecf20Sopenharmony_ci	if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) {
24138c2ecf20Sopenharmony_ci		dev->q_depth = 2;
24148c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
24158c2ecf20Sopenharmony_ci			"set queue depth=%u to work around controller resets\n",
24168c2ecf20Sopenharmony_ci			dev->q_depth);
24178c2ecf20Sopenharmony_ci	} else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
24188c2ecf20Sopenharmony_ci		   (pdev->device == 0xa821 || pdev->device == 0xa822) &&
24198c2ecf20Sopenharmony_ci		   NVME_CAP_MQES(dev->ctrl.cap) == 0) {
24208c2ecf20Sopenharmony_ci		dev->q_depth = 64;
24218c2ecf20Sopenharmony_ci		dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
24228c2ecf20Sopenharmony_ci                        "set queue depth=%u\n", dev->q_depth);
24238c2ecf20Sopenharmony_ci	}
24248c2ecf20Sopenharmony_ci
24258c2ecf20Sopenharmony_ci	/*
24268c2ecf20Sopenharmony_ci	 * Controllers with the shared tags quirk need the IO queue to be
24278c2ecf20Sopenharmony_ci	 * big enough so that we get 32 tags for the admin queue
24288c2ecf20Sopenharmony_ci	 */
24298c2ecf20Sopenharmony_ci	if ((dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) &&
24308c2ecf20Sopenharmony_ci	    (dev->q_depth < (NVME_AQ_DEPTH + 2))) {
24318c2ecf20Sopenharmony_ci		dev->q_depth = NVME_AQ_DEPTH + 2;
24328c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n",
24338c2ecf20Sopenharmony_ci			 dev->q_depth);
24348c2ecf20Sopenharmony_ci	}
24358c2ecf20Sopenharmony_ci
24368c2ecf20Sopenharmony_ci
24378c2ecf20Sopenharmony_ci	nvme_map_cmb(dev);
24388c2ecf20Sopenharmony_ci
24398c2ecf20Sopenharmony_ci	pci_enable_pcie_error_reporting(pdev);
24408c2ecf20Sopenharmony_ci	pci_save_state(pdev);
24418c2ecf20Sopenharmony_ci	return 0;
24428c2ecf20Sopenharmony_ci
24438c2ecf20Sopenharmony_ci disable:
24448c2ecf20Sopenharmony_ci	pci_disable_device(pdev);
24458c2ecf20Sopenharmony_ci	return result;
24468c2ecf20Sopenharmony_ci}
24478c2ecf20Sopenharmony_ci
24488c2ecf20Sopenharmony_cistatic void nvme_dev_unmap(struct nvme_dev *dev)
24498c2ecf20Sopenharmony_ci{
24508c2ecf20Sopenharmony_ci	if (dev->bar)
24518c2ecf20Sopenharmony_ci		iounmap(dev->bar);
24528c2ecf20Sopenharmony_ci	pci_release_mem_regions(to_pci_dev(dev->dev));
24538c2ecf20Sopenharmony_ci}
24548c2ecf20Sopenharmony_ci
24558c2ecf20Sopenharmony_cistatic void nvme_pci_disable(struct nvme_dev *dev)
24568c2ecf20Sopenharmony_ci{
24578c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
24588c2ecf20Sopenharmony_ci
24598c2ecf20Sopenharmony_ci	pci_free_irq_vectors(pdev);
24608c2ecf20Sopenharmony_ci
24618c2ecf20Sopenharmony_ci	if (pci_is_enabled(pdev)) {
24628c2ecf20Sopenharmony_ci		pci_disable_pcie_error_reporting(pdev);
24638c2ecf20Sopenharmony_ci		pci_disable_device(pdev);
24648c2ecf20Sopenharmony_ci	}
24658c2ecf20Sopenharmony_ci}
24668c2ecf20Sopenharmony_ci
24678c2ecf20Sopenharmony_cistatic void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
24688c2ecf20Sopenharmony_ci{
24698c2ecf20Sopenharmony_ci	bool dead = true, freeze = false;
24708c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
24718c2ecf20Sopenharmony_ci
24728c2ecf20Sopenharmony_ci	mutex_lock(&dev->shutdown_lock);
24738c2ecf20Sopenharmony_ci	if (pci_is_enabled(pdev)) {
24748c2ecf20Sopenharmony_ci		u32 csts = readl(dev->bar + NVME_REG_CSTS);
24758c2ecf20Sopenharmony_ci
24768c2ecf20Sopenharmony_ci		if (dev->ctrl.state == NVME_CTRL_LIVE ||
24778c2ecf20Sopenharmony_ci		    dev->ctrl.state == NVME_CTRL_RESETTING) {
24788c2ecf20Sopenharmony_ci			freeze = true;
24798c2ecf20Sopenharmony_ci			nvme_start_freeze(&dev->ctrl);
24808c2ecf20Sopenharmony_ci		}
24818c2ecf20Sopenharmony_ci		dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) ||
24828c2ecf20Sopenharmony_ci			pdev->error_state  != pci_channel_io_normal);
24838c2ecf20Sopenharmony_ci	}
24848c2ecf20Sopenharmony_ci
24858c2ecf20Sopenharmony_ci	/*
24868c2ecf20Sopenharmony_ci	 * Give the controller a chance to complete all entered requests if
24878c2ecf20Sopenharmony_ci	 * doing a safe shutdown.
24888c2ecf20Sopenharmony_ci	 */
24898c2ecf20Sopenharmony_ci	if (!dead && shutdown && freeze)
24908c2ecf20Sopenharmony_ci		nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT);
24918c2ecf20Sopenharmony_ci
24928c2ecf20Sopenharmony_ci	nvme_stop_queues(&dev->ctrl);
24938c2ecf20Sopenharmony_ci
24948c2ecf20Sopenharmony_ci	if (!dead && dev->ctrl.queue_count > 0) {
24958c2ecf20Sopenharmony_ci		nvme_disable_io_queues(dev);
24968c2ecf20Sopenharmony_ci		nvme_disable_admin_queue(dev, shutdown);
24978c2ecf20Sopenharmony_ci	}
24988c2ecf20Sopenharmony_ci	nvme_suspend_io_queues(dev);
24998c2ecf20Sopenharmony_ci	nvme_suspend_queue(&dev->queues[0]);
25008c2ecf20Sopenharmony_ci	nvme_pci_disable(dev);
25018c2ecf20Sopenharmony_ci	nvme_reap_pending_cqes(dev);
25028c2ecf20Sopenharmony_ci
25038c2ecf20Sopenharmony_ci	blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
25048c2ecf20Sopenharmony_ci	blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl);
25058c2ecf20Sopenharmony_ci	blk_mq_tagset_wait_completed_request(&dev->tagset);
25068c2ecf20Sopenharmony_ci	blk_mq_tagset_wait_completed_request(&dev->admin_tagset);
25078c2ecf20Sopenharmony_ci
25088c2ecf20Sopenharmony_ci	/*
25098c2ecf20Sopenharmony_ci	 * The driver will not be starting up queues again if shutting down so
25108c2ecf20Sopenharmony_ci	 * must flush all entered requests to their failed completion to avoid
25118c2ecf20Sopenharmony_ci	 * deadlocking blk-mq hot-cpu notifier.
25128c2ecf20Sopenharmony_ci	 */
25138c2ecf20Sopenharmony_ci	if (shutdown) {
25148c2ecf20Sopenharmony_ci		nvme_start_queues(&dev->ctrl);
25158c2ecf20Sopenharmony_ci		if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q))
25168c2ecf20Sopenharmony_ci			nvme_start_admin_queue(&dev->ctrl);
25178c2ecf20Sopenharmony_ci	}
25188c2ecf20Sopenharmony_ci	mutex_unlock(&dev->shutdown_lock);
25198c2ecf20Sopenharmony_ci}
25208c2ecf20Sopenharmony_ci
25218c2ecf20Sopenharmony_cistatic int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown)
25228c2ecf20Sopenharmony_ci{
25238c2ecf20Sopenharmony_ci	if (!nvme_wait_reset(&dev->ctrl))
25248c2ecf20Sopenharmony_ci		return -EBUSY;
25258c2ecf20Sopenharmony_ci	nvme_dev_disable(dev, shutdown);
25268c2ecf20Sopenharmony_ci	return 0;
25278c2ecf20Sopenharmony_ci}
25288c2ecf20Sopenharmony_ci
25298c2ecf20Sopenharmony_cistatic int nvme_setup_prp_pools(struct nvme_dev *dev)
25308c2ecf20Sopenharmony_ci{
25318c2ecf20Sopenharmony_ci	dev->prp_page_pool = dma_pool_create("prp list page", dev->dev,
25328c2ecf20Sopenharmony_ci						NVME_CTRL_PAGE_SIZE,
25338c2ecf20Sopenharmony_ci						NVME_CTRL_PAGE_SIZE, 0);
25348c2ecf20Sopenharmony_ci	if (!dev->prp_page_pool)
25358c2ecf20Sopenharmony_ci		return -ENOMEM;
25368c2ecf20Sopenharmony_ci
25378c2ecf20Sopenharmony_ci	/* Optimisation for I/Os between 4k and 128k */
25388c2ecf20Sopenharmony_ci	dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev,
25398c2ecf20Sopenharmony_ci						256, 256, 0);
25408c2ecf20Sopenharmony_ci	if (!dev->prp_small_pool) {
25418c2ecf20Sopenharmony_ci		dma_pool_destroy(dev->prp_page_pool);
25428c2ecf20Sopenharmony_ci		return -ENOMEM;
25438c2ecf20Sopenharmony_ci	}
25448c2ecf20Sopenharmony_ci	return 0;
25458c2ecf20Sopenharmony_ci}
25468c2ecf20Sopenharmony_ci
25478c2ecf20Sopenharmony_cistatic void nvme_release_prp_pools(struct nvme_dev *dev)
25488c2ecf20Sopenharmony_ci{
25498c2ecf20Sopenharmony_ci	dma_pool_destroy(dev->prp_page_pool);
25508c2ecf20Sopenharmony_ci	dma_pool_destroy(dev->prp_small_pool);
25518c2ecf20Sopenharmony_ci}
25528c2ecf20Sopenharmony_ci
25538c2ecf20Sopenharmony_cistatic int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev)
25548c2ecf20Sopenharmony_ci{
25558c2ecf20Sopenharmony_ci	size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
25568c2ecf20Sopenharmony_ci	size_t alloc_size = sizeof(__le64 *) * npages +
25578c2ecf20Sopenharmony_ci			    sizeof(struct scatterlist) * NVME_MAX_SEGS;
25588c2ecf20Sopenharmony_ci
25598c2ecf20Sopenharmony_ci	WARN_ON_ONCE(alloc_size > PAGE_SIZE);
25608c2ecf20Sopenharmony_ci	dev->iod_mempool = mempool_create_node(1,
25618c2ecf20Sopenharmony_ci			mempool_kmalloc, mempool_kfree,
25628c2ecf20Sopenharmony_ci			(void *)alloc_size, GFP_KERNEL,
25638c2ecf20Sopenharmony_ci			dev_to_node(dev->dev));
25648c2ecf20Sopenharmony_ci	if (!dev->iod_mempool)
25658c2ecf20Sopenharmony_ci		return -ENOMEM;
25668c2ecf20Sopenharmony_ci	return 0;
25678c2ecf20Sopenharmony_ci}
25688c2ecf20Sopenharmony_ci
25698c2ecf20Sopenharmony_cistatic void nvme_free_tagset(struct nvme_dev *dev)
25708c2ecf20Sopenharmony_ci{
25718c2ecf20Sopenharmony_ci	if (dev->tagset.tags)
25728c2ecf20Sopenharmony_ci		blk_mq_free_tag_set(&dev->tagset);
25738c2ecf20Sopenharmony_ci	dev->ctrl.tagset = NULL;
25748c2ecf20Sopenharmony_ci}
25758c2ecf20Sopenharmony_ci
25768c2ecf20Sopenharmony_ci/* pairs with nvme_pci_alloc_dev */
25778c2ecf20Sopenharmony_cistatic void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
25788c2ecf20Sopenharmony_ci{
25798c2ecf20Sopenharmony_ci	struct nvme_dev *dev = to_nvme_dev(ctrl);
25808c2ecf20Sopenharmony_ci
25818c2ecf20Sopenharmony_ci	nvme_dbbuf_dma_free(dev);
25828c2ecf20Sopenharmony_ci	nvme_free_tagset(dev);
25838c2ecf20Sopenharmony_ci	if (dev->ctrl.admin_q)
25848c2ecf20Sopenharmony_ci		blk_put_queue(dev->ctrl.admin_q);
25858c2ecf20Sopenharmony_ci	free_opal_dev(dev->ctrl.opal_dev);
25868c2ecf20Sopenharmony_ci	mempool_destroy(dev->iod_mempool);
25878c2ecf20Sopenharmony_ci	put_device(dev->dev);
25888c2ecf20Sopenharmony_ci	kfree(dev->queues);
25898c2ecf20Sopenharmony_ci	kfree(dev);
25908c2ecf20Sopenharmony_ci}
25918c2ecf20Sopenharmony_ci
25928c2ecf20Sopenharmony_cistatic void nvme_remove_dead_ctrl(struct nvme_dev *dev)
25938c2ecf20Sopenharmony_ci{
25948c2ecf20Sopenharmony_ci	/*
25958c2ecf20Sopenharmony_ci	 * Set state to deleting now to avoid blocking nvme_wait_reset(), which
25968c2ecf20Sopenharmony_ci	 * may be holding this pci_dev's device lock.
25978c2ecf20Sopenharmony_ci	 */
25988c2ecf20Sopenharmony_ci	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
25998c2ecf20Sopenharmony_ci	nvme_get_ctrl(&dev->ctrl);
26008c2ecf20Sopenharmony_ci	nvme_dev_disable(dev, false);
26018c2ecf20Sopenharmony_ci	nvme_kill_queues(&dev->ctrl);
26028c2ecf20Sopenharmony_ci	if (!queue_work(nvme_wq, &dev->remove_work))
26038c2ecf20Sopenharmony_ci		nvme_put_ctrl(&dev->ctrl);
26048c2ecf20Sopenharmony_ci}
26058c2ecf20Sopenharmony_ci
26068c2ecf20Sopenharmony_cistatic void nvme_reset_work(struct work_struct *work)
26078c2ecf20Sopenharmony_ci{
26088c2ecf20Sopenharmony_ci	struct nvme_dev *dev =
26098c2ecf20Sopenharmony_ci		container_of(work, struct nvme_dev, ctrl.reset_work);
26108c2ecf20Sopenharmony_ci	bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL);
26118c2ecf20Sopenharmony_ci	int result;
26128c2ecf20Sopenharmony_ci
26138c2ecf20Sopenharmony_ci	if (dev->ctrl.state != NVME_CTRL_RESETTING) {
26148c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n",
26158c2ecf20Sopenharmony_ci			 dev->ctrl.state);
26168c2ecf20Sopenharmony_ci		result = -ENODEV;
26178c2ecf20Sopenharmony_ci		goto out;
26188c2ecf20Sopenharmony_ci	}
26198c2ecf20Sopenharmony_ci
26208c2ecf20Sopenharmony_ci	/*
26218c2ecf20Sopenharmony_ci	 * If we're called to reset a live controller first shut it down before
26228c2ecf20Sopenharmony_ci	 * moving on.
26238c2ecf20Sopenharmony_ci	 */
26248c2ecf20Sopenharmony_ci	if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
26258c2ecf20Sopenharmony_ci		nvme_dev_disable(dev, false);
26268c2ecf20Sopenharmony_ci	nvme_sync_queues(&dev->ctrl);
26278c2ecf20Sopenharmony_ci
26288c2ecf20Sopenharmony_ci	mutex_lock(&dev->shutdown_lock);
26298c2ecf20Sopenharmony_ci	result = nvme_pci_enable(dev);
26308c2ecf20Sopenharmony_ci	if (result)
26318c2ecf20Sopenharmony_ci		goto out_unlock;
26328c2ecf20Sopenharmony_ci
26338c2ecf20Sopenharmony_ci	result = nvme_pci_configure_admin_queue(dev);
26348c2ecf20Sopenharmony_ci	if (result)
26358c2ecf20Sopenharmony_ci		goto out_unlock;
26368c2ecf20Sopenharmony_ci
26378c2ecf20Sopenharmony_ci	result = nvme_alloc_admin_tags(dev);
26388c2ecf20Sopenharmony_ci	if (result)
26398c2ecf20Sopenharmony_ci		goto out_unlock;
26408c2ecf20Sopenharmony_ci
26418c2ecf20Sopenharmony_ci	dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
26428c2ecf20Sopenharmony_ci
26438c2ecf20Sopenharmony_ci	/*
26448c2ecf20Sopenharmony_ci	 * Limit the max command size to prevent iod->sg allocations going
26458c2ecf20Sopenharmony_ci	 * over a single page.
26468c2ecf20Sopenharmony_ci	 */
26478c2ecf20Sopenharmony_ci	dev->ctrl.max_hw_sectors = min_t(u32,
26488c2ecf20Sopenharmony_ci		NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9);
26498c2ecf20Sopenharmony_ci	dev->ctrl.max_segments = NVME_MAX_SEGS;
26508c2ecf20Sopenharmony_ci
26518c2ecf20Sopenharmony_ci	/*
26528c2ecf20Sopenharmony_ci	 * Don't limit the IOMMU merged segment size.
26538c2ecf20Sopenharmony_ci	 */
26548c2ecf20Sopenharmony_ci	dma_set_max_seg_size(dev->dev, 0xffffffff);
26558c2ecf20Sopenharmony_ci
26568c2ecf20Sopenharmony_ci	mutex_unlock(&dev->shutdown_lock);
26578c2ecf20Sopenharmony_ci
26588c2ecf20Sopenharmony_ci	/*
26598c2ecf20Sopenharmony_ci	 * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
26608c2ecf20Sopenharmony_ci	 * initializing procedure here.
26618c2ecf20Sopenharmony_ci	 */
26628c2ecf20Sopenharmony_ci	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
26638c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
26648c2ecf20Sopenharmony_ci			"failed to mark controller CONNECTING\n");
26658c2ecf20Sopenharmony_ci		result = -EBUSY;
26668c2ecf20Sopenharmony_ci		goto out;
26678c2ecf20Sopenharmony_ci	}
26688c2ecf20Sopenharmony_ci
26698c2ecf20Sopenharmony_ci	/*
26708c2ecf20Sopenharmony_ci	 * We do not support an SGL for metadata (yet), so we are limited to a
26718c2ecf20Sopenharmony_ci	 * single integrity segment for the separate metadata pointer.
26728c2ecf20Sopenharmony_ci	 */
26738c2ecf20Sopenharmony_ci	dev->ctrl.max_integrity_segments = 1;
26748c2ecf20Sopenharmony_ci
26758c2ecf20Sopenharmony_ci	result = nvme_init_identify(&dev->ctrl);
26768c2ecf20Sopenharmony_ci	if (result)
26778c2ecf20Sopenharmony_ci		goto out;
26788c2ecf20Sopenharmony_ci
26798c2ecf20Sopenharmony_ci	if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) {
26808c2ecf20Sopenharmony_ci		if (!dev->ctrl.opal_dev)
26818c2ecf20Sopenharmony_ci			dev->ctrl.opal_dev =
26828c2ecf20Sopenharmony_ci				init_opal_dev(&dev->ctrl, &nvme_sec_submit);
26838c2ecf20Sopenharmony_ci		else if (was_suspend)
26848c2ecf20Sopenharmony_ci			opal_unlock_from_suspend(dev->ctrl.opal_dev);
26858c2ecf20Sopenharmony_ci	} else {
26868c2ecf20Sopenharmony_ci		free_opal_dev(dev->ctrl.opal_dev);
26878c2ecf20Sopenharmony_ci		dev->ctrl.opal_dev = NULL;
26888c2ecf20Sopenharmony_ci	}
26898c2ecf20Sopenharmony_ci
26908c2ecf20Sopenharmony_ci	if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) {
26918c2ecf20Sopenharmony_ci		result = nvme_dbbuf_dma_alloc(dev);
26928c2ecf20Sopenharmony_ci		if (result)
26938c2ecf20Sopenharmony_ci			dev_warn(dev->dev,
26948c2ecf20Sopenharmony_ci				 "unable to allocate dma for dbbuf\n");
26958c2ecf20Sopenharmony_ci	}
26968c2ecf20Sopenharmony_ci
26978c2ecf20Sopenharmony_ci	if (dev->ctrl.hmpre) {
26988c2ecf20Sopenharmony_ci		result = nvme_setup_host_mem(dev);
26998c2ecf20Sopenharmony_ci		if (result < 0)
27008c2ecf20Sopenharmony_ci			goto out;
27018c2ecf20Sopenharmony_ci	}
27028c2ecf20Sopenharmony_ci
27038c2ecf20Sopenharmony_ci	result = nvme_setup_io_queues(dev);
27048c2ecf20Sopenharmony_ci	if (result)
27058c2ecf20Sopenharmony_ci		goto out;
27068c2ecf20Sopenharmony_ci
27078c2ecf20Sopenharmony_ci	/*
27088c2ecf20Sopenharmony_ci	 * Keep the controller around but remove all namespaces if we don't have
27098c2ecf20Sopenharmony_ci	 * any working I/O queue.
27108c2ecf20Sopenharmony_ci	 */
27118c2ecf20Sopenharmony_ci	if (dev->online_queues < 2) {
27128c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device, "IO queues not created\n");
27138c2ecf20Sopenharmony_ci		nvme_kill_queues(&dev->ctrl);
27148c2ecf20Sopenharmony_ci		nvme_remove_namespaces(&dev->ctrl);
27158c2ecf20Sopenharmony_ci		nvme_free_tagset(dev);
27168c2ecf20Sopenharmony_ci	} else {
27178c2ecf20Sopenharmony_ci		nvme_start_queues(&dev->ctrl);
27188c2ecf20Sopenharmony_ci		nvme_wait_freeze(&dev->ctrl);
27198c2ecf20Sopenharmony_ci		nvme_dev_add(dev);
27208c2ecf20Sopenharmony_ci		nvme_unfreeze(&dev->ctrl);
27218c2ecf20Sopenharmony_ci	}
27228c2ecf20Sopenharmony_ci
27238c2ecf20Sopenharmony_ci	/*
27248c2ecf20Sopenharmony_ci	 * If only admin queue live, keep it to do further investigation or
27258c2ecf20Sopenharmony_ci	 * recovery.
27268c2ecf20Sopenharmony_ci	 */
27278c2ecf20Sopenharmony_ci	if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) {
27288c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
27298c2ecf20Sopenharmony_ci			"failed to mark controller live state\n");
27308c2ecf20Sopenharmony_ci		result = -ENODEV;
27318c2ecf20Sopenharmony_ci		goto out;
27328c2ecf20Sopenharmony_ci	}
27338c2ecf20Sopenharmony_ci
27348c2ecf20Sopenharmony_ci	nvme_start_ctrl(&dev->ctrl);
27358c2ecf20Sopenharmony_ci	return;
27368c2ecf20Sopenharmony_ci
27378c2ecf20Sopenharmony_ci out_unlock:
27388c2ecf20Sopenharmony_ci	mutex_unlock(&dev->shutdown_lock);
27398c2ecf20Sopenharmony_ci out:
27408c2ecf20Sopenharmony_ci	if (result)
27418c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
27428c2ecf20Sopenharmony_ci			 "Removing after probe failure status: %d\n", result);
27438c2ecf20Sopenharmony_ci	nvme_remove_dead_ctrl(dev);
27448c2ecf20Sopenharmony_ci}
27458c2ecf20Sopenharmony_ci
27468c2ecf20Sopenharmony_cistatic void nvme_remove_dead_ctrl_work(struct work_struct *work)
27478c2ecf20Sopenharmony_ci{
27488c2ecf20Sopenharmony_ci	struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
27498c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
27508c2ecf20Sopenharmony_ci
27518c2ecf20Sopenharmony_ci	if (pci_get_drvdata(pdev))
27528c2ecf20Sopenharmony_ci		device_release_driver(&pdev->dev);
27538c2ecf20Sopenharmony_ci	nvme_put_ctrl(&dev->ctrl);
27548c2ecf20Sopenharmony_ci}
27558c2ecf20Sopenharmony_ci
27568c2ecf20Sopenharmony_cistatic int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val)
27578c2ecf20Sopenharmony_ci{
27588c2ecf20Sopenharmony_ci	*val = readl(to_nvme_dev(ctrl)->bar + off);
27598c2ecf20Sopenharmony_ci	return 0;
27608c2ecf20Sopenharmony_ci}
27618c2ecf20Sopenharmony_ci
27628c2ecf20Sopenharmony_cistatic int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
27638c2ecf20Sopenharmony_ci{
27648c2ecf20Sopenharmony_ci	writel(val, to_nvme_dev(ctrl)->bar + off);
27658c2ecf20Sopenharmony_ci	return 0;
27668c2ecf20Sopenharmony_ci}
27678c2ecf20Sopenharmony_ci
27688c2ecf20Sopenharmony_cistatic int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
27698c2ecf20Sopenharmony_ci{
27708c2ecf20Sopenharmony_ci	*val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off);
27718c2ecf20Sopenharmony_ci	return 0;
27728c2ecf20Sopenharmony_ci}
27738c2ecf20Sopenharmony_ci
27748c2ecf20Sopenharmony_cistatic int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
27758c2ecf20Sopenharmony_ci{
27768c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
27778c2ecf20Sopenharmony_ci
27788c2ecf20Sopenharmony_ci	return snprintf(buf, size, "%s\n", dev_name(&pdev->dev));
27798c2ecf20Sopenharmony_ci}
27808c2ecf20Sopenharmony_ci
27818c2ecf20Sopenharmony_cistatic const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
27828c2ecf20Sopenharmony_ci	.name			= "pcie",
27838c2ecf20Sopenharmony_ci	.module			= THIS_MODULE,
27848c2ecf20Sopenharmony_ci	.flags			= NVME_F_METADATA_SUPPORTED |
27858c2ecf20Sopenharmony_ci				  NVME_F_PCI_P2PDMA,
27868c2ecf20Sopenharmony_ci	.reg_read32		= nvme_pci_reg_read32,
27878c2ecf20Sopenharmony_ci	.reg_write32		= nvme_pci_reg_write32,
27888c2ecf20Sopenharmony_ci	.reg_read64		= nvme_pci_reg_read64,
27898c2ecf20Sopenharmony_ci	.free_ctrl		= nvme_pci_free_ctrl,
27908c2ecf20Sopenharmony_ci	.submit_async_event	= nvme_pci_submit_async_event,
27918c2ecf20Sopenharmony_ci	.get_address		= nvme_pci_get_address,
27928c2ecf20Sopenharmony_ci};
27938c2ecf20Sopenharmony_ci
27948c2ecf20Sopenharmony_cistatic int nvme_dev_map(struct nvme_dev *dev)
27958c2ecf20Sopenharmony_ci{
27968c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev->dev);
27978c2ecf20Sopenharmony_ci
27988c2ecf20Sopenharmony_ci	if (pci_request_mem_regions(pdev, "nvme"))
27998c2ecf20Sopenharmony_ci		return -ENODEV;
28008c2ecf20Sopenharmony_ci
28018c2ecf20Sopenharmony_ci	if (nvme_remap_bar(dev, NVME_REG_DBS + 4096))
28028c2ecf20Sopenharmony_ci		goto release;
28038c2ecf20Sopenharmony_ci
28048c2ecf20Sopenharmony_ci	return 0;
28058c2ecf20Sopenharmony_ci  release:
28068c2ecf20Sopenharmony_ci	pci_release_mem_regions(pdev);
28078c2ecf20Sopenharmony_ci	return -ENODEV;
28088c2ecf20Sopenharmony_ci}
28098c2ecf20Sopenharmony_ci
28108c2ecf20Sopenharmony_cistatic unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
28118c2ecf20Sopenharmony_ci{
28128c2ecf20Sopenharmony_ci	if (pdev->vendor == 0x144d && pdev->device == 0xa802) {
28138c2ecf20Sopenharmony_ci		/*
28148c2ecf20Sopenharmony_ci		 * Several Samsung devices seem to drop off the PCIe bus
28158c2ecf20Sopenharmony_ci		 * randomly when APST is on and uses the deepest sleep state.
28168c2ecf20Sopenharmony_ci		 * This has been observed on a Samsung "SM951 NVMe SAMSUNG
28178c2ecf20Sopenharmony_ci		 * 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD
28188c2ecf20Sopenharmony_ci		 * 950 PRO 256GB", but it seems to be restricted to two Dell
28198c2ecf20Sopenharmony_ci		 * laptops.
28208c2ecf20Sopenharmony_ci		 */
28218c2ecf20Sopenharmony_ci		if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") &&
28228c2ecf20Sopenharmony_ci		    (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") ||
28238c2ecf20Sopenharmony_ci		     dmi_match(DMI_PRODUCT_NAME, "Precision 5510")))
28248c2ecf20Sopenharmony_ci			return NVME_QUIRK_NO_DEEPEST_PS;
28258c2ecf20Sopenharmony_ci	} else if (pdev->vendor == 0x144d && pdev->device == 0xa804) {
28268c2ecf20Sopenharmony_ci		/*
28278c2ecf20Sopenharmony_ci		 * Samsung SSD 960 EVO drops off the PCIe bus after system
28288c2ecf20Sopenharmony_ci		 * suspend on a Ryzen board, ASUS PRIME B350M-A, as well as
28298c2ecf20Sopenharmony_ci		 * within few minutes after bootup on a Coffee Lake board -
28308c2ecf20Sopenharmony_ci		 * ASUS PRIME Z370-A
28318c2ecf20Sopenharmony_ci		 */
28328c2ecf20Sopenharmony_ci		if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") &&
28338c2ecf20Sopenharmony_ci		    (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") ||
28348c2ecf20Sopenharmony_ci		     dmi_match(DMI_BOARD_NAME, "PRIME Z370-A")))
28358c2ecf20Sopenharmony_ci			return NVME_QUIRK_NO_APST;
28368c2ecf20Sopenharmony_ci	} else if ((pdev->vendor == 0x144d && (pdev->device == 0xa801 ||
28378c2ecf20Sopenharmony_ci		    pdev->device == 0xa808 || pdev->device == 0xa809)) ||
28388c2ecf20Sopenharmony_ci		   (pdev->vendor == 0x1e0f && pdev->device == 0x0001)) {
28398c2ecf20Sopenharmony_ci		/*
28408c2ecf20Sopenharmony_ci		 * Forcing to use host managed nvme power settings for
28418c2ecf20Sopenharmony_ci		 * lowest idle power with quick resume latency on
28428c2ecf20Sopenharmony_ci		 * Samsung and Toshiba SSDs based on suspend behavior
28438c2ecf20Sopenharmony_ci		 * on Coffee Lake board for LENOVO C640
28448c2ecf20Sopenharmony_ci		 */
28458c2ecf20Sopenharmony_ci		if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) &&
28468c2ecf20Sopenharmony_ci		     dmi_match(DMI_BOARD_NAME, "LNVNB161216"))
28478c2ecf20Sopenharmony_ci			return NVME_QUIRK_SIMPLE_SUSPEND;
28488c2ecf20Sopenharmony_ci	}
28498c2ecf20Sopenharmony_ci
28508c2ecf20Sopenharmony_ci	return 0;
28518c2ecf20Sopenharmony_ci}
28528c2ecf20Sopenharmony_ci
28538c2ecf20Sopenharmony_cistatic void nvme_async_probe(void *data, async_cookie_t cookie)
28548c2ecf20Sopenharmony_ci{
28558c2ecf20Sopenharmony_ci	struct nvme_dev *dev = data;
28568c2ecf20Sopenharmony_ci
28578c2ecf20Sopenharmony_ci	flush_work(&dev->ctrl.reset_work);
28588c2ecf20Sopenharmony_ci	flush_work(&dev->ctrl.scan_work);
28598c2ecf20Sopenharmony_ci	nvme_put_ctrl(&dev->ctrl);
28608c2ecf20Sopenharmony_ci}
28618c2ecf20Sopenharmony_ci
28628c2ecf20Sopenharmony_cistatic struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev,
28638c2ecf20Sopenharmony_ci		const struct pci_device_id *id)
28648c2ecf20Sopenharmony_ci{
28658c2ecf20Sopenharmony_ci	unsigned long quirks = id->driver_data;
28668c2ecf20Sopenharmony_ci	int node = dev_to_node(&pdev->dev);
28678c2ecf20Sopenharmony_ci	struct nvme_dev *dev;
28688c2ecf20Sopenharmony_ci	int ret = -ENOMEM;
28698c2ecf20Sopenharmony_ci
28708c2ecf20Sopenharmony_ci	dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
28718c2ecf20Sopenharmony_ci	if (!dev)
28728c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
28738c2ecf20Sopenharmony_ci	INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
28748c2ecf20Sopenharmony_ci	INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
28758c2ecf20Sopenharmony_ci	mutex_init(&dev->shutdown_lock);
28768c2ecf20Sopenharmony_ci
28778c2ecf20Sopenharmony_ci	dev->nr_write_queues = write_queues;
28788c2ecf20Sopenharmony_ci	dev->nr_poll_queues = poll_queues;
28798c2ecf20Sopenharmony_ci	dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1;
28808c2ecf20Sopenharmony_ci	dev->queues = kcalloc_node(dev->nr_allocated_queues,
28818c2ecf20Sopenharmony_ci			sizeof(struct nvme_queue), GFP_KERNEL, node);
28828c2ecf20Sopenharmony_ci	if (!dev->queues)
28838c2ecf20Sopenharmony_ci		goto out_free_dev;
28848c2ecf20Sopenharmony_ci
28858c2ecf20Sopenharmony_ci	dev->dev = get_device(&pdev->dev);
28868c2ecf20Sopenharmony_ci
28878c2ecf20Sopenharmony_ci	quirks |= check_vendor_combination_bug(pdev);
28888c2ecf20Sopenharmony_ci	if (!noacpi && acpi_storage_d3(&pdev->dev)) {
28898c2ecf20Sopenharmony_ci		/*
28908c2ecf20Sopenharmony_ci		 * Some systems use a bios work around to ask for D3 on
28918c2ecf20Sopenharmony_ci		 * platforms that support kernel managed suspend.
28928c2ecf20Sopenharmony_ci		 */
28938c2ecf20Sopenharmony_ci		dev_info(&pdev->dev,
28948c2ecf20Sopenharmony_ci			 "platform quirk: setting simple suspend\n");
28958c2ecf20Sopenharmony_ci		quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
28968c2ecf20Sopenharmony_ci	}
28978c2ecf20Sopenharmony_ci	ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
28988c2ecf20Sopenharmony_ci			     quirks);
28998c2ecf20Sopenharmony_ci	if (ret)
29008c2ecf20Sopenharmony_ci		goto out_put_device;
29018c2ecf20Sopenharmony_ci	return dev;
29028c2ecf20Sopenharmony_ci
29038c2ecf20Sopenharmony_ciout_put_device:
29048c2ecf20Sopenharmony_ci	put_device(dev->dev);
29058c2ecf20Sopenharmony_ci	kfree(dev->queues);
29068c2ecf20Sopenharmony_ciout_free_dev:
29078c2ecf20Sopenharmony_ci	kfree(dev);
29088c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
29098c2ecf20Sopenharmony_ci}
29108c2ecf20Sopenharmony_ci
29118c2ecf20Sopenharmony_cistatic int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
29128c2ecf20Sopenharmony_ci{
29138c2ecf20Sopenharmony_ci	struct nvme_dev *dev;
29148c2ecf20Sopenharmony_ci	int result = -ENOMEM;
29158c2ecf20Sopenharmony_ci
29168c2ecf20Sopenharmony_ci	dev = nvme_pci_alloc_dev(pdev, id);
29178c2ecf20Sopenharmony_ci	if (IS_ERR(dev))
29188c2ecf20Sopenharmony_ci		return PTR_ERR(dev);
29198c2ecf20Sopenharmony_ci
29208c2ecf20Sopenharmony_ci	result = nvme_dev_map(dev);
29218c2ecf20Sopenharmony_ci	if (result)
29228c2ecf20Sopenharmony_ci		goto out_uninit_ctrl;
29238c2ecf20Sopenharmony_ci
29248c2ecf20Sopenharmony_ci	result = nvme_setup_prp_pools(dev);
29258c2ecf20Sopenharmony_ci	if (result)
29268c2ecf20Sopenharmony_ci		goto out_dev_unmap;
29278c2ecf20Sopenharmony_ci
29288c2ecf20Sopenharmony_ci	result = nvme_pci_alloc_iod_mempool(dev);
29298c2ecf20Sopenharmony_ci	if (result)
29308c2ecf20Sopenharmony_ci		goto out_release_prp_pools;
29318c2ecf20Sopenharmony_ci
29328c2ecf20Sopenharmony_ci	dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
29338c2ecf20Sopenharmony_ci	pci_set_drvdata(pdev, dev);
29348c2ecf20Sopenharmony_ci
29358c2ecf20Sopenharmony_ci	nvme_reset_ctrl(&dev->ctrl);
29368c2ecf20Sopenharmony_ci	async_schedule(nvme_async_probe, dev);
29378c2ecf20Sopenharmony_ci	return 0;
29388c2ecf20Sopenharmony_ci
29398c2ecf20Sopenharmony_ciout_release_prp_pools:
29408c2ecf20Sopenharmony_ci	nvme_release_prp_pools(dev);
29418c2ecf20Sopenharmony_ciout_dev_unmap:
29428c2ecf20Sopenharmony_ci	nvme_dev_unmap(dev);
29438c2ecf20Sopenharmony_ciout_uninit_ctrl:
29448c2ecf20Sopenharmony_ci	nvme_uninit_ctrl(&dev->ctrl);
29458c2ecf20Sopenharmony_ci	return result;
29468c2ecf20Sopenharmony_ci}
29478c2ecf20Sopenharmony_ci
29488c2ecf20Sopenharmony_cistatic void nvme_reset_prepare(struct pci_dev *pdev)
29498c2ecf20Sopenharmony_ci{
29508c2ecf20Sopenharmony_ci	struct nvme_dev *dev = pci_get_drvdata(pdev);
29518c2ecf20Sopenharmony_ci
29528c2ecf20Sopenharmony_ci	/*
29538c2ecf20Sopenharmony_ci	 * We don't need to check the return value from waiting for the reset
29548c2ecf20Sopenharmony_ci	 * state as pci_dev device lock is held, making it impossible to race
29558c2ecf20Sopenharmony_ci	 * with ->remove().
29568c2ecf20Sopenharmony_ci	 */
29578c2ecf20Sopenharmony_ci	nvme_disable_prepare_reset(dev, false);
29588c2ecf20Sopenharmony_ci	nvme_sync_queues(&dev->ctrl);
29598c2ecf20Sopenharmony_ci}
29608c2ecf20Sopenharmony_ci
29618c2ecf20Sopenharmony_cistatic void nvme_reset_done(struct pci_dev *pdev)
29628c2ecf20Sopenharmony_ci{
29638c2ecf20Sopenharmony_ci	struct nvme_dev *dev = pci_get_drvdata(pdev);
29648c2ecf20Sopenharmony_ci
29658c2ecf20Sopenharmony_ci	if (!nvme_try_sched_reset(&dev->ctrl))
29668c2ecf20Sopenharmony_ci		flush_work(&dev->ctrl.reset_work);
29678c2ecf20Sopenharmony_ci}
29688c2ecf20Sopenharmony_ci
29698c2ecf20Sopenharmony_cistatic void nvme_shutdown(struct pci_dev *pdev)
29708c2ecf20Sopenharmony_ci{
29718c2ecf20Sopenharmony_ci	struct nvme_dev *dev = pci_get_drvdata(pdev);
29728c2ecf20Sopenharmony_ci
29738c2ecf20Sopenharmony_ci	nvme_disable_prepare_reset(dev, true);
29748c2ecf20Sopenharmony_ci}
29758c2ecf20Sopenharmony_ci
29768c2ecf20Sopenharmony_ci/*
29778c2ecf20Sopenharmony_ci * The driver's remove may be called on a device in a partially initialized
29788c2ecf20Sopenharmony_ci * state. This function must not have any dependencies on the device state in
29798c2ecf20Sopenharmony_ci * order to proceed.
29808c2ecf20Sopenharmony_ci */
29818c2ecf20Sopenharmony_cistatic void nvme_remove(struct pci_dev *pdev)
29828c2ecf20Sopenharmony_ci{
29838c2ecf20Sopenharmony_ci	struct nvme_dev *dev = pci_get_drvdata(pdev);
29848c2ecf20Sopenharmony_ci
29858c2ecf20Sopenharmony_ci	nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING);
29868c2ecf20Sopenharmony_ci	pci_set_drvdata(pdev, NULL);
29878c2ecf20Sopenharmony_ci
29888c2ecf20Sopenharmony_ci	if (!pci_device_is_present(pdev)) {
29898c2ecf20Sopenharmony_ci		nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD);
29908c2ecf20Sopenharmony_ci		nvme_dev_disable(dev, true);
29918c2ecf20Sopenharmony_ci	}
29928c2ecf20Sopenharmony_ci
29938c2ecf20Sopenharmony_ci	flush_work(&dev->ctrl.reset_work);
29948c2ecf20Sopenharmony_ci	nvme_stop_ctrl(&dev->ctrl);
29958c2ecf20Sopenharmony_ci	nvme_remove_namespaces(&dev->ctrl);
29968c2ecf20Sopenharmony_ci	nvme_dev_disable(dev, true);
29978c2ecf20Sopenharmony_ci	nvme_release_cmb(dev);
29988c2ecf20Sopenharmony_ci	nvme_free_host_mem(dev);
29998c2ecf20Sopenharmony_ci	nvme_dev_remove_admin(dev);
30008c2ecf20Sopenharmony_ci	nvme_free_queues(dev, 0);
30018c2ecf20Sopenharmony_ci	nvme_release_prp_pools(dev);
30028c2ecf20Sopenharmony_ci	nvme_dev_unmap(dev);
30038c2ecf20Sopenharmony_ci	nvme_uninit_ctrl(&dev->ctrl);
30048c2ecf20Sopenharmony_ci}
30058c2ecf20Sopenharmony_ci
30068c2ecf20Sopenharmony_ci#ifdef CONFIG_PM_SLEEP
30078c2ecf20Sopenharmony_cistatic int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps)
30088c2ecf20Sopenharmony_ci{
30098c2ecf20Sopenharmony_ci	return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps);
30108c2ecf20Sopenharmony_ci}
30118c2ecf20Sopenharmony_ci
30128c2ecf20Sopenharmony_cistatic int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps)
30138c2ecf20Sopenharmony_ci{
30148c2ecf20Sopenharmony_ci	return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL);
30158c2ecf20Sopenharmony_ci}
30168c2ecf20Sopenharmony_ci
30178c2ecf20Sopenharmony_cistatic int nvme_resume(struct device *dev)
30188c2ecf20Sopenharmony_ci{
30198c2ecf20Sopenharmony_ci	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
30208c2ecf20Sopenharmony_ci	struct nvme_ctrl *ctrl = &ndev->ctrl;
30218c2ecf20Sopenharmony_ci
30228c2ecf20Sopenharmony_ci	if (ndev->last_ps == U32_MAX ||
30238c2ecf20Sopenharmony_ci	    nvme_set_power_state(ctrl, ndev->last_ps) != 0)
30248c2ecf20Sopenharmony_ci		return nvme_try_sched_reset(&ndev->ctrl);
30258c2ecf20Sopenharmony_ci	return 0;
30268c2ecf20Sopenharmony_ci}
30278c2ecf20Sopenharmony_ci
30288c2ecf20Sopenharmony_cistatic int nvme_suspend(struct device *dev)
30298c2ecf20Sopenharmony_ci{
30308c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev);
30318c2ecf20Sopenharmony_ci	struct nvme_dev *ndev = pci_get_drvdata(pdev);
30328c2ecf20Sopenharmony_ci	struct nvme_ctrl *ctrl = &ndev->ctrl;
30338c2ecf20Sopenharmony_ci	int ret = -EBUSY;
30348c2ecf20Sopenharmony_ci
30358c2ecf20Sopenharmony_ci	ndev->last_ps = U32_MAX;
30368c2ecf20Sopenharmony_ci
30378c2ecf20Sopenharmony_ci	/*
30388c2ecf20Sopenharmony_ci	 * The platform does not remove power for a kernel managed suspend so
30398c2ecf20Sopenharmony_ci	 * use host managed nvme power settings for lowest idle power if
30408c2ecf20Sopenharmony_ci	 * possible. This should have quicker resume latency than a full device
30418c2ecf20Sopenharmony_ci	 * shutdown.  But if the firmware is involved after the suspend or the
30428c2ecf20Sopenharmony_ci	 * device does not support any non-default power states, shut down the
30438c2ecf20Sopenharmony_ci	 * device fully.
30448c2ecf20Sopenharmony_ci	 *
30458c2ecf20Sopenharmony_ci	 * If ASPM is not enabled for the device, shut down the device and allow
30468c2ecf20Sopenharmony_ci	 * the PCI bus layer to put it into D3 in order to take the PCIe link
30478c2ecf20Sopenharmony_ci	 * down, so as to allow the platform to achieve its minimum low-power
30488c2ecf20Sopenharmony_ci	 * state (which may not be possible if the link is up).
30498c2ecf20Sopenharmony_ci	 *
30508c2ecf20Sopenharmony_ci	 * If a host memory buffer is enabled, shut down the device as the NVMe
30518c2ecf20Sopenharmony_ci	 * specification allows the device to access the host memory buffer in
30528c2ecf20Sopenharmony_ci	 * host DRAM from all power states, but hosts will fail access to DRAM
30538c2ecf20Sopenharmony_ci	 * during S3.
30548c2ecf20Sopenharmony_ci	 */
30558c2ecf20Sopenharmony_ci	if (pm_suspend_via_firmware() || !ctrl->npss ||
30568c2ecf20Sopenharmony_ci	    !pcie_aspm_enabled(pdev) ||
30578c2ecf20Sopenharmony_ci	    ndev->nr_host_mem_descs ||
30588c2ecf20Sopenharmony_ci	    (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
30598c2ecf20Sopenharmony_ci		return nvme_disable_prepare_reset(ndev, true);
30608c2ecf20Sopenharmony_ci
30618c2ecf20Sopenharmony_ci	nvme_start_freeze(ctrl);
30628c2ecf20Sopenharmony_ci	nvme_wait_freeze(ctrl);
30638c2ecf20Sopenharmony_ci	nvme_sync_queues(ctrl);
30648c2ecf20Sopenharmony_ci
30658c2ecf20Sopenharmony_ci	if (ctrl->state != NVME_CTRL_LIVE)
30668c2ecf20Sopenharmony_ci		goto unfreeze;
30678c2ecf20Sopenharmony_ci
30688c2ecf20Sopenharmony_ci	ret = nvme_get_power_state(ctrl, &ndev->last_ps);
30698c2ecf20Sopenharmony_ci	if (ret < 0)
30708c2ecf20Sopenharmony_ci		goto unfreeze;
30718c2ecf20Sopenharmony_ci
30728c2ecf20Sopenharmony_ci	/*
30738c2ecf20Sopenharmony_ci	 * A saved state prevents pci pm from generically controlling the
30748c2ecf20Sopenharmony_ci	 * device's power. If we're using protocol specific settings, we don't
30758c2ecf20Sopenharmony_ci	 * want pci interfering.
30768c2ecf20Sopenharmony_ci	 */
30778c2ecf20Sopenharmony_ci	pci_save_state(pdev);
30788c2ecf20Sopenharmony_ci
30798c2ecf20Sopenharmony_ci	ret = nvme_set_power_state(ctrl, ctrl->npss);
30808c2ecf20Sopenharmony_ci	if (ret < 0)
30818c2ecf20Sopenharmony_ci		goto unfreeze;
30828c2ecf20Sopenharmony_ci
30838c2ecf20Sopenharmony_ci	if (ret) {
30848c2ecf20Sopenharmony_ci		/* discard the saved state */
30858c2ecf20Sopenharmony_ci		pci_load_saved_state(pdev, NULL);
30868c2ecf20Sopenharmony_ci
30878c2ecf20Sopenharmony_ci		/*
30888c2ecf20Sopenharmony_ci		 * Clearing npss forces a controller reset on resume. The
30898c2ecf20Sopenharmony_ci		 * correct value will be rediscovered then.
30908c2ecf20Sopenharmony_ci		 */
30918c2ecf20Sopenharmony_ci		ret = nvme_disable_prepare_reset(ndev, true);
30928c2ecf20Sopenharmony_ci		ctrl->npss = 0;
30938c2ecf20Sopenharmony_ci	}
30948c2ecf20Sopenharmony_ciunfreeze:
30958c2ecf20Sopenharmony_ci	nvme_unfreeze(ctrl);
30968c2ecf20Sopenharmony_ci	return ret;
30978c2ecf20Sopenharmony_ci}
30988c2ecf20Sopenharmony_ci
30998c2ecf20Sopenharmony_cistatic int nvme_simple_suspend(struct device *dev)
31008c2ecf20Sopenharmony_ci{
31018c2ecf20Sopenharmony_ci	struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev));
31028c2ecf20Sopenharmony_ci
31038c2ecf20Sopenharmony_ci	return nvme_disable_prepare_reset(ndev, true);
31048c2ecf20Sopenharmony_ci}
31058c2ecf20Sopenharmony_ci
31068c2ecf20Sopenharmony_cistatic int nvme_simple_resume(struct device *dev)
31078c2ecf20Sopenharmony_ci{
31088c2ecf20Sopenharmony_ci	struct pci_dev *pdev = to_pci_dev(dev);
31098c2ecf20Sopenharmony_ci	struct nvme_dev *ndev = pci_get_drvdata(pdev);
31108c2ecf20Sopenharmony_ci
31118c2ecf20Sopenharmony_ci	return nvme_try_sched_reset(&ndev->ctrl);
31128c2ecf20Sopenharmony_ci}
31138c2ecf20Sopenharmony_ci
31148c2ecf20Sopenharmony_cistatic const struct dev_pm_ops nvme_dev_pm_ops = {
31158c2ecf20Sopenharmony_ci	.suspend	= nvme_suspend,
31168c2ecf20Sopenharmony_ci	.resume		= nvme_resume,
31178c2ecf20Sopenharmony_ci	.freeze		= nvme_simple_suspend,
31188c2ecf20Sopenharmony_ci	.thaw		= nvme_simple_resume,
31198c2ecf20Sopenharmony_ci	.poweroff	= nvme_simple_suspend,
31208c2ecf20Sopenharmony_ci	.restore	= nvme_simple_resume,
31218c2ecf20Sopenharmony_ci};
31228c2ecf20Sopenharmony_ci#endif /* CONFIG_PM_SLEEP */
31238c2ecf20Sopenharmony_ci
31248c2ecf20Sopenharmony_cistatic pci_ers_result_t nvme_error_detected(struct pci_dev *pdev,
31258c2ecf20Sopenharmony_ci						pci_channel_state_t state)
31268c2ecf20Sopenharmony_ci{
31278c2ecf20Sopenharmony_ci	struct nvme_dev *dev = pci_get_drvdata(pdev);
31288c2ecf20Sopenharmony_ci
31298c2ecf20Sopenharmony_ci	/*
31308c2ecf20Sopenharmony_ci	 * A frozen channel requires a reset. When detected, this method will
31318c2ecf20Sopenharmony_ci	 * shutdown the controller to quiesce. The controller will be restarted
31328c2ecf20Sopenharmony_ci	 * after the slot reset through driver's slot_reset callback.
31338c2ecf20Sopenharmony_ci	 */
31348c2ecf20Sopenharmony_ci	switch (state) {
31358c2ecf20Sopenharmony_ci	case pci_channel_io_normal:
31368c2ecf20Sopenharmony_ci		return PCI_ERS_RESULT_CAN_RECOVER;
31378c2ecf20Sopenharmony_ci	case pci_channel_io_frozen:
31388c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
31398c2ecf20Sopenharmony_ci			"frozen state error detected, reset controller\n");
31408c2ecf20Sopenharmony_ci		nvme_dev_disable(dev, false);
31418c2ecf20Sopenharmony_ci		return PCI_ERS_RESULT_NEED_RESET;
31428c2ecf20Sopenharmony_ci	case pci_channel_io_perm_failure:
31438c2ecf20Sopenharmony_ci		dev_warn(dev->ctrl.device,
31448c2ecf20Sopenharmony_ci			"failure state error detected, request disconnect\n");
31458c2ecf20Sopenharmony_ci		return PCI_ERS_RESULT_DISCONNECT;
31468c2ecf20Sopenharmony_ci	}
31478c2ecf20Sopenharmony_ci	return PCI_ERS_RESULT_NEED_RESET;
31488c2ecf20Sopenharmony_ci}
31498c2ecf20Sopenharmony_ci
31508c2ecf20Sopenharmony_cistatic pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
31518c2ecf20Sopenharmony_ci{
31528c2ecf20Sopenharmony_ci	struct nvme_dev *dev = pci_get_drvdata(pdev);
31538c2ecf20Sopenharmony_ci
31548c2ecf20Sopenharmony_ci	dev_info(dev->ctrl.device, "restart after slot reset\n");
31558c2ecf20Sopenharmony_ci	pci_restore_state(pdev);
31568c2ecf20Sopenharmony_ci	nvme_reset_ctrl(&dev->ctrl);
31578c2ecf20Sopenharmony_ci	return PCI_ERS_RESULT_RECOVERED;
31588c2ecf20Sopenharmony_ci}
31598c2ecf20Sopenharmony_ci
31608c2ecf20Sopenharmony_cistatic void nvme_error_resume(struct pci_dev *pdev)
31618c2ecf20Sopenharmony_ci{
31628c2ecf20Sopenharmony_ci	struct nvme_dev *dev = pci_get_drvdata(pdev);
31638c2ecf20Sopenharmony_ci
31648c2ecf20Sopenharmony_ci	flush_work(&dev->ctrl.reset_work);
31658c2ecf20Sopenharmony_ci}
31668c2ecf20Sopenharmony_ci
31678c2ecf20Sopenharmony_cistatic const struct pci_error_handlers nvme_err_handler = {
31688c2ecf20Sopenharmony_ci	.error_detected	= nvme_error_detected,
31698c2ecf20Sopenharmony_ci	.slot_reset	= nvme_slot_reset,
31708c2ecf20Sopenharmony_ci	.resume		= nvme_error_resume,
31718c2ecf20Sopenharmony_ci	.reset_prepare	= nvme_reset_prepare,
31728c2ecf20Sopenharmony_ci	.reset_done	= nvme_reset_done,
31738c2ecf20Sopenharmony_ci};
31748c2ecf20Sopenharmony_ci
31758c2ecf20Sopenharmony_cistatic const struct pci_device_id nvme_id_table[] = {
31768c2ecf20Sopenharmony_ci	{ PCI_VDEVICE(INTEL, 0x0953),	/* Intel 750/P3500/P3600/P3700 */
31778c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_STRIPE_SIZE |
31788c2ecf20Sopenharmony_ci				NVME_QUIRK_DEALLOCATE_ZEROES, },
31798c2ecf20Sopenharmony_ci	{ PCI_VDEVICE(INTEL, 0x0a53),	/* Intel P3520 */
31808c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_STRIPE_SIZE |
31818c2ecf20Sopenharmony_ci				NVME_QUIRK_DEALLOCATE_ZEROES, },
31828c2ecf20Sopenharmony_ci	{ PCI_VDEVICE(INTEL, 0x0a54),	/* Intel P4500/P4600 */
31838c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_STRIPE_SIZE |
31848c2ecf20Sopenharmony_ci				NVME_QUIRK_DEALLOCATE_ZEROES |
31858c2ecf20Sopenharmony_ci				NVME_QUIRK_IGNORE_DEV_SUBNQN |
31868c2ecf20Sopenharmony_ci				NVME_QUIRK_BOGUS_NID, },
31878c2ecf20Sopenharmony_ci	{ PCI_VDEVICE(INTEL, 0x0a55),	/* Dell Express Flash P4600 */
31888c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_STRIPE_SIZE |
31898c2ecf20Sopenharmony_ci				NVME_QUIRK_DEALLOCATE_ZEROES, },
31908c2ecf20Sopenharmony_ci	{ PCI_VDEVICE(INTEL, 0xf1a5),	/* Intel 600P/P3100 */
31918c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
31928c2ecf20Sopenharmony_ci				NVME_QUIRK_MEDIUM_PRIO_SQ |
31938c2ecf20Sopenharmony_ci				NVME_QUIRK_NO_TEMP_THRESH_CHANGE |
31948c2ecf20Sopenharmony_ci				NVME_QUIRK_DISABLE_WRITE_ZEROES, },
31958c2ecf20Sopenharmony_ci	{ PCI_VDEVICE(INTEL, 0xf1a6),	/* Intel 760p/Pro 7600p */
31968c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
31978c2ecf20Sopenharmony_ci	{ PCI_VDEVICE(INTEL, 0x5845),	/* Qemu emulated controller */
31988c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_IDENTIFY_CNS |
31998c2ecf20Sopenharmony_ci				NVME_QUIRK_DISABLE_WRITE_ZEROES |
32008c2ecf20Sopenharmony_ci				NVME_QUIRK_BOGUS_NID, },
32018c2ecf20Sopenharmony_ci	{ PCI_VDEVICE(REDHAT, 0x0010),	/* Qemu emulated controller */
32028c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_BOGUS_NID, },
32038c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x126f, 0x2263),	/* Silicon Motion unidentified */
32048c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
32058c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1bb1, 0x0100),   /* Seagate Nytro Flash Storage */
32068c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
32078c2ecf20Sopenharmony_ci				NVME_QUIRK_NO_NS_DESC_LIST, },
32088c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1c58, 0x0003),	/* HGST adapter */
32098c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
32108c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1c58, 0x0023),	/* WDC SN200 adapter */
32118c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
32128c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1c5f, 0x0540),	/* Memblaze Pblaze4 adapter */
32138c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
32148c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x144d, 0xa821),   /* Samsung PM1725 */
32158c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
32168c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
32178c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
32188c2ecf20Sopenharmony_ci				NVME_QUIRK_DISABLE_WRITE_ZEROES|
32198c2ecf20Sopenharmony_ci				NVME_QUIRK_IGNORE_DEV_SUBNQN, },
32208c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1987, 0x5016),	/* Phison E16 */
32218c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN |
32228c2ecf20Sopenharmony_ci				NVME_QUIRK_BOGUS_NID, },
32238c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1b4b, 0x1092),	/* Lexar 256 GB SSD */
32248c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
32258c2ecf20Sopenharmony_ci				NVME_QUIRK_IGNORE_DEV_SUBNQN, },
32268c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1d1d, 0x1f1f),	/* LighNVM qemu device */
32278c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_LIGHTNVM, },
32288c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1d1d, 0x2807),	/* CNEX WL */
32298c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_LIGHTNVM, },
32308c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1d1d, 0x2601),	/* CNEX Granby */
32318c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_LIGHTNVM, },
32328c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x10ec, 0x5762),   /* ADATA SX6000LNP */
32338c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN |
32348c2ecf20Sopenharmony_ci				NVME_QUIRK_BOGUS_NID, },
32358c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1cc1, 0x8201),   /* ADATA SX8200PNP 512GB */
32368c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
32378c2ecf20Sopenharmony_ci				NVME_QUIRK_IGNORE_DEV_SUBNQN, },
32388c2ecf20Sopenharmony_ci	 { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */
32398c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN },
32408c2ecf20Sopenharmony_ci	 { PCI_DEVICE(0x1344, 0x6001),   /* Micron Nitro NVMe */
32418c2ecf20Sopenharmony_ci		 .driver_data = NVME_QUIRK_BOGUS_NID, },
32428c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x1c5c, 0x1504),   /* SK Hynix PC400 */
32438c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
32448c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x15b7, 0x2001),   /*  Sandisk Skyhawk */
32458c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
32468c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x2646, 0x2262),   /* KINGSTON SKC2000 NVMe SSD */
32478c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
32488c2ecf20Sopenharmony_ci	{ PCI_DEVICE(0x2646, 0x2263),   /* KINGSTON A2000 NVMe SSD  */
32498c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
32508c2ecf20Sopenharmony_ci	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001),
32518c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_SINGLE_VECTOR },
32528c2ecf20Sopenharmony_ci	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
32538c2ecf20Sopenharmony_ci	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005),
32548c2ecf20Sopenharmony_ci		.driver_data = NVME_QUIRK_SINGLE_VECTOR |
32558c2ecf20Sopenharmony_ci				NVME_QUIRK_128_BYTES_SQES |
32568c2ecf20Sopenharmony_ci				NVME_QUIRK_SHARED_TAGS |
32578c2ecf20Sopenharmony_ci				NVME_QUIRK_SKIP_CID_GEN },
32588c2ecf20Sopenharmony_ci	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
32598c2ecf20Sopenharmony_ci	{ 0, }
32608c2ecf20Sopenharmony_ci};
32618c2ecf20Sopenharmony_ciMODULE_DEVICE_TABLE(pci, nvme_id_table);
32628c2ecf20Sopenharmony_ci
32638c2ecf20Sopenharmony_cistatic struct pci_driver nvme_driver = {
32648c2ecf20Sopenharmony_ci	.name		= "nvme",
32658c2ecf20Sopenharmony_ci	.id_table	= nvme_id_table,
32668c2ecf20Sopenharmony_ci	.probe		= nvme_probe,
32678c2ecf20Sopenharmony_ci	.remove		= nvme_remove,
32688c2ecf20Sopenharmony_ci	.shutdown	= nvme_shutdown,
32698c2ecf20Sopenharmony_ci#ifdef CONFIG_PM_SLEEP
32708c2ecf20Sopenharmony_ci	.driver		= {
32718c2ecf20Sopenharmony_ci		.pm	= &nvme_dev_pm_ops,
32728c2ecf20Sopenharmony_ci	},
32738c2ecf20Sopenharmony_ci#endif
32748c2ecf20Sopenharmony_ci	.sriov_configure = pci_sriov_configure_simple,
32758c2ecf20Sopenharmony_ci	.err_handler	= &nvme_err_handler,
32768c2ecf20Sopenharmony_ci};
32778c2ecf20Sopenharmony_ci
32788c2ecf20Sopenharmony_cistatic int __init nvme_init(void)
32798c2ecf20Sopenharmony_ci{
32808c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
32818c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
32828c2ecf20Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
32838c2ecf20Sopenharmony_ci	BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
32848c2ecf20Sopenharmony_ci
32858c2ecf20Sopenharmony_ci	return pci_register_driver(&nvme_driver);
32868c2ecf20Sopenharmony_ci}
32878c2ecf20Sopenharmony_ci
32888c2ecf20Sopenharmony_cistatic void __exit nvme_exit(void)
32898c2ecf20Sopenharmony_ci{
32908c2ecf20Sopenharmony_ci	pci_unregister_driver(&nvme_driver);
32918c2ecf20Sopenharmony_ci	flush_workqueue(nvme_wq);
32928c2ecf20Sopenharmony_ci}
32938c2ecf20Sopenharmony_ci
32948c2ecf20Sopenharmony_ciMODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
32958c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
32968c2ecf20Sopenharmony_ciMODULE_VERSION("1.0");
32978c2ecf20Sopenharmony_cimodule_init(nvme_init);
32988c2ecf20Sopenharmony_cimodule_exit(nvme_exit);
3299