18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * NVM Express device driver 48c2ecf20Sopenharmony_ci * Copyright (c) 2011-2014, Intel Corporation. 58c2ecf20Sopenharmony_ci */ 68c2ecf20Sopenharmony_ci 78c2ecf20Sopenharmony_ci#include <linux/acpi.h> 88c2ecf20Sopenharmony_ci#include <linux/aer.h> 98c2ecf20Sopenharmony_ci#include <linux/async.h> 108c2ecf20Sopenharmony_ci#include <linux/blkdev.h> 118c2ecf20Sopenharmony_ci#include <linux/blk-mq.h> 128c2ecf20Sopenharmony_ci#include <linux/blk-mq-pci.h> 138c2ecf20Sopenharmony_ci#include <linux/dmi.h> 148c2ecf20Sopenharmony_ci#include <linux/init.h> 158c2ecf20Sopenharmony_ci#include <linux/interrupt.h> 168c2ecf20Sopenharmony_ci#include <linux/io.h> 178c2ecf20Sopenharmony_ci#include <linux/mm.h> 188c2ecf20Sopenharmony_ci#include <linux/module.h> 198c2ecf20Sopenharmony_ci#include <linux/mutex.h> 208c2ecf20Sopenharmony_ci#include <linux/once.h> 218c2ecf20Sopenharmony_ci#include <linux/pci.h> 228c2ecf20Sopenharmony_ci#include <linux/suspend.h> 238c2ecf20Sopenharmony_ci#include <linux/t10-pi.h> 248c2ecf20Sopenharmony_ci#include <linux/types.h> 258c2ecf20Sopenharmony_ci#include <linux/io-64-nonatomic-lo-hi.h> 268c2ecf20Sopenharmony_ci#include <linux/io-64-nonatomic-hi-lo.h> 278c2ecf20Sopenharmony_ci#include <linux/sed-opal.h> 288c2ecf20Sopenharmony_ci#include <linux/pci-p2pdma.h> 298c2ecf20Sopenharmony_ci 308c2ecf20Sopenharmony_ci#include "trace.h" 318c2ecf20Sopenharmony_ci#include "nvme.h" 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci#define SQ_SIZE(q) ((q)->q_depth << (q)->sqes) 348c2ecf20Sopenharmony_ci#define CQ_SIZE(q) ((q)->q_depth * sizeof(struct nvme_completion)) 358c2ecf20Sopenharmony_ci 368c2ecf20Sopenharmony_ci#define SGES_PER_PAGE (NVME_CTRL_PAGE_SIZE / sizeof(struct nvme_sgl_desc)) 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci/* 398c2ecf20Sopenharmony_ci * These can be higher, but we need to ensure that any command doesn't 408c2ecf20Sopenharmony_ci * require an sg allocation that needs more than a page of data. 418c2ecf20Sopenharmony_ci */ 428c2ecf20Sopenharmony_ci#define NVME_MAX_KB_SZ 4096 438c2ecf20Sopenharmony_ci#define NVME_MAX_SEGS 127 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistatic int use_threaded_interrupts; 468c2ecf20Sopenharmony_cimodule_param(use_threaded_interrupts, int, 0); 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_cistatic bool use_cmb_sqes = true; 498c2ecf20Sopenharmony_cimodule_param(use_cmb_sqes, bool, 0444); 508c2ecf20Sopenharmony_ciMODULE_PARM_DESC(use_cmb_sqes, "use controller's memory buffer for I/O SQes"); 518c2ecf20Sopenharmony_ci 528c2ecf20Sopenharmony_cistatic unsigned int max_host_mem_size_mb = 128; 538c2ecf20Sopenharmony_cimodule_param(max_host_mem_size_mb, uint, 0444); 548c2ecf20Sopenharmony_ciMODULE_PARM_DESC(max_host_mem_size_mb, 558c2ecf20Sopenharmony_ci "Maximum Host Memory Buffer (HMB) size per controller (in MiB)"); 568c2ecf20Sopenharmony_ci 578c2ecf20Sopenharmony_cistatic unsigned int sgl_threshold = SZ_32K; 588c2ecf20Sopenharmony_cimodule_param(sgl_threshold, uint, 0644); 598c2ecf20Sopenharmony_ciMODULE_PARM_DESC(sgl_threshold, 608c2ecf20Sopenharmony_ci "Use SGLs when average request segment size is larger or equal to " 618c2ecf20Sopenharmony_ci "this size. Use 0 to disable SGLs."); 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_cistatic int io_queue_depth_set(const char *val, const struct kernel_param *kp); 648c2ecf20Sopenharmony_cistatic const struct kernel_param_ops io_queue_depth_ops = { 658c2ecf20Sopenharmony_ci .set = io_queue_depth_set, 668c2ecf20Sopenharmony_ci .get = param_get_uint, 678c2ecf20Sopenharmony_ci}; 688c2ecf20Sopenharmony_ci 698c2ecf20Sopenharmony_cistatic unsigned int io_queue_depth = 1024; 708c2ecf20Sopenharmony_cimodule_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); 718c2ecf20Sopenharmony_ciMODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_cistatic int io_queue_count_set(const char *val, const struct kernel_param *kp) 748c2ecf20Sopenharmony_ci{ 758c2ecf20Sopenharmony_ci unsigned int n; 768c2ecf20Sopenharmony_ci int ret; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci ret = kstrtouint(val, 10, &n); 798c2ecf20Sopenharmony_ci if (ret != 0 || n > num_possible_cpus()) 808c2ecf20Sopenharmony_ci return -EINVAL; 818c2ecf20Sopenharmony_ci return param_set_uint(val, kp); 828c2ecf20Sopenharmony_ci} 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_cistatic const struct kernel_param_ops io_queue_count_ops = { 858c2ecf20Sopenharmony_ci .set = io_queue_count_set, 868c2ecf20Sopenharmony_ci .get = param_get_uint, 878c2ecf20Sopenharmony_ci}; 888c2ecf20Sopenharmony_ci 898c2ecf20Sopenharmony_cistatic unsigned int write_queues; 908c2ecf20Sopenharmony_cimodule_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644); 918c2ecf20Sopenharmony_ciMODULE_PARM_DESC(write_queues, 928c2ecf20Sopenharmony_ci "Number of queues to use for writes. If not set, reads and writes " 938c2ecf20Sopenharmony_ci "will share a queue set."); 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_cistatic unsigned int poll_queues; 968c2ecf20Sopenharmony_cimodule_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644); 978c2ecf20Sopenharmony_ciMODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_cistatic bool noacpi; 1008c2ecf20Sopenharmony_cimodule_param(noacpi, bool, 0444); 1018c2ecf20Sopenharmony_ciMODULE_PARM_DESC(noacpi, "disable acpi bios quirks"); 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_cistruct nvme_dev; 1048c2ecf20Sopenharmony_cistruct nvme_queue; 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_cistatic void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); 1078c2ecf20Sopenharmony_cistatic bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci/* 1108c2ecf20Sopenharmony_ci * Represents an NVM Express device. Each nvme_dev is a PCI function. 1118c2ecf20Sopenharmony_ci */ 1128c2ecf20Sopenharmony_cistruct nvme_dev { 1138c2ecf20Sopenharmony_ci struct nvme_queue *queues; 1148c2ecf20Sopenharmony_ci struct blk_mq_tag_set tagset; 1158c2ecf20Sopenharmony_ci struct blk_mq_tag_set admin_tagset; 1168c2ecf20Sopenharmony_ci u32 __iomem *dbs; 1178c2ecf20Sopenharmony_ci struct device *dev; 1188c2ecf20Sopenharmony_ci struct dma_pool *prp_page_pool; 1198c2ecf20Sopenharmony_ci struct dma_pool *prp_small_pool; 1208c2ecf20Sopenharmony_ci unsigned online_queues; 1218c2ecf20Sopenharmony_ci unsigned max_qid; 1228c2ecf20Sopenharmony_ci unsigned io_queues[HCTX_MAX_TYPES]; 1238c2ecf20Sopenharmony_ci unsigned int num_vecs; 1248c2ecf20Sopenharmony_ci u32 q_depth; 1258c2ecf20Sopenharmony_ci int io_sqes; 1268c2ecf20Sopenharmony_ci u32 db_stride; 1278c2ecf20Sopenharmony_ci void __iomem *bar; 1288c2ecf20Sopenharmony_ci unsigned long bar_mapped_size; 1298c2ecf20Sopenharmony_ci struct work_struct remove_work; 1308c2ecf20Sopenharmony_ci struct mutex shutdown_lock; 1318c2ecf20Sopenharmony_ci bool subsystem; 1328c2ecf20Sopenharmony_ci u64 cmb_size; 1338c2ecf20Sopenharmony_ci bool cmb_use_sqes; 1348c2ecf20Sopenharmony_ci u32 cmbsz; 1358c2ecf20Sopenharmony_ci u32 cmbloc; 1368c2ecf20Sopenharmony_ci struct nvme_ctrl ctrl; 1378c2ecf20Sopenharmony_ci u32 last_ps; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci mempool_t *iod_mempool; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci /* shadow doorbell buffer support: */ 1428c2ecf20Sopenharmony_ci __le32 *dbbuf_dbs; 1438c2ecf20Sopenharmony_ci dma_addr_t dbbuf_dbs_dma_addr; 1448c2ecf20Sopenharmony_ci __le32 *dbbuf_eis; 1458c2ecf20Sopenharmony_ci dma_addr_t dbbuf_eis_dma_addr; 1468c2ecf20Sopenharmony_ci 1478c2ecf20Sopenharmony_ci /* host memory buffer support: */ 1488c2ecf20Sopenharmony_ci u64 host_mem_size; 1498c2ecf20Sopenharmony_ci u32 nr_host_mem_descs; 1508c2ecf20Sopenharmony_ci dma_addr_t host_mem_descs_dma; 1518c2ecf20Sopenharmony_ci struct nvme_host_mem_buf_desc *host_mem_descs; 1528c2ecf20Sopenharmony_ci void **host_mem_desc_bufs; 1538c2ecf20Sopenharmony_ci unsigned int nr_allocated_queues; 1548c2ecf20Sopenharmony_ci unsigned int nr_write_queues; 1558c2ecf20Sopenharmony_ci unsigned int nr_poll_queues; 1568c2ecf20Sopenharmony_ci}; 1578c2ecf20Sopenharmony_ci 1588c2ecf20Sopenharmony_cistatic int io_queue_depth_set(const char *val, const struct kernel_param *kp) 1598c2ecf20Sopenharmony_ci{ 1608c2ecf20Sopenharmony_ci int ret; 1618c2ecf20Sopenharmony_ci u32 n; 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci ret = kstrtou32(val, 10, &n); 1648c2ecf20Sopenharmony_ci if (ret != 0 || n < 2) 1658c2ecf20Sopenharmony_ci return -EINVAL; 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci return param_set_uint(val, kp); 1688c2ecf20Sopenharmony_ci} 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_cistatic inline unsigned int sq_idx(unsigned int qid, u32 stride) 1718c2ecf20Sopenharmony_ci{ 1728c2ecf20Sopenharmony_ci return qid * 2 * stride; 1738c2ecf20Sopenharmony_ci} 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_cistatic inline unsigned int cq_idx(unsigned int qid, u32 stride) 1768c2ecf20Sopenharmony_ci{ 1778c2ecf20Sopenharmony_ci return (qid * 2 + 1) * stride; 1788c2ecf20Sopenharmony_ci} 1798c2ecf20Sopenharmony_ci 1808c2ecf20Sopenharmony_cistatic inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) 1818c2ecf20Sopenharmony_ci{ 1828c2ecf20Sopenharmony_ci return container_of(ctrl, struct nvme_dev, ctrl); 1838c2ecf20Sopenharmony_ci} 1848c2ecf20Sopenharmony_ci 1858c2ecf20Sopenharmony_ci/* 1868c2ecf20Sopenharmony_ci * An NVM Express queue. Each device has at least two (one for admin 1878c2ecf20Sopenharmony_ci * commands and one for I/O commands). 1888c2ecf20Sopenharmony_ci */ 1898c2ecf20Sopenharmony_cistruct nvme_queue { 1908c2ecf20Sopenharmony_ci struct nvme_dev *dev; 1918c2ecf20Sopenharmony_ci spinlock_t sq_lock; 1928c2ecf20Sopenharmony_ci void *sq_cmds; 1938c2ecf20Sopenharmony_ci /* only used for poll queues: */ 1948c2ecf20Sopenharmony_ci spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; 1958c2ecf20Sopenharmony_ci struct nvme_completion *cqes; 1968c2ecf20Sopenharmony_ci dma_addr_t sq_dma_addr; 1978c2ecf20Sopenharmony_ci dma_addr_t cq_dma_addr; 1988c2ecf20Sopenharmony_ci u32 __iomem *q_db; 1998c2ecf20Sopenharmony_ci u32 q_depth; 2008c2ecf20Sopenharmony_ci u16 cq_vector; 2018c2ecf20Sopenharmony_ci u16 sq_tail; 2028c2ecf20Sopenharmony_ci u16 last_sq_tail; 2038c2ecf20Sopenharmony_ci u16 cq_head; 2048c2ecf20Sopenharmony_ci u16 qid; 2058c2ecf20Sopenharmony_ci u8 cq_phase; 2068c2ecf20Sopenharmony_ci u8 sqes; 2078c2ecf20Sopenharmony_ci unsigned long flags; 2088c2ecf20Sopenharmony_ci#define NVMEQ_ENABLED 0 2098c2ecf20Sopenharmony_ci#define NVMEQ_SQ_CMB 1 2108c2ecf20Sopenharmony_ci#define NVMEQ_DELETE_ERROR 2 2118c2ecf20Sopenharmony_ci#define NVMEQ_POLLED 3 2128c2ecf20Sopenharmony_ci __le32 *dbbuf_sq_db; 2138c2ecf20Sopenharmony_ci __le32 *dbbuf_cq_db; 2148c2ecf20Sopenharmony_ci __le32 *dbbuf_sq_ei; 2158c2ecf20Sopenharmony_ci __le32 *dbbuf_cq_ei; 2168c2ecf20Sopenharmony_ci struct completion delete_done; 2178c2ecf20Sopenharmony_ci}; 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci/* 2208c2ecf20Sopenharmony_ci * The nvme_iod describes the data in an I/O. 2218c2ecf20Sopenharmony_ci * 2228c2ecf20Sopenharmony_ci * The sg pointer contains the list of PRP/SGL chunk allocations in addition 2238c2ecf20Sopenharmony_ci * to the actual struct scatterlist. 2248c2ecf20Sopenharmony_ci */ 2258c2ecf20Sopenharmony_cistruct nvme_iod { 2268c2ecf20Sopenharmony_ci struct nvme_request req; 2278c2ecf20Sopenharmony_ci struct nvme_command cmd; 2288c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq; 2298c2ecf20Sopenharmony_ci bool use_sgl; 2308c2ecf20Sopenharmony_ci int aborted; 2318c2ecf20Sopenharmony_ci int npages; /* In the PRP list. 0 means small pool in use */ 2328c2ecf20Sopenharmony_ci int nents; /* Used in scatterlist */ 2338c2ecf20Sopenharmony_ci dma_addr_t first_dma; 2348c2ecf20Sopenharmony_ci unsigned int dma_len; /* length of single DMA segment mapping */ 2358c2ecf20Sopenharmony_ci dma_addr_t meta_dma; 2368c2ecf20Sopenharmony_ci struct scatterlist *sg; 2378c2ecf20Sopenharmony_ci}; 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_cistatic inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) 2408c2ecf20Sopenharmony_ci{ 2418c2ecf20Sopenharmony_ci return dev->nr_allocated_queues * 8 * dev->db_stride; 2428c2ecf20Sopenharmony_ci} 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_cistatic int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) 2458c2ecf20Sopenharmony_ci{ 2468c2ecf20Sopenharmony_ci unsigned int mem_size = nvme_dbbuf_size(dev); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci if (dev->dbbuf_dbs) 2498c2ecf20Sopenharmony_ci return 0; 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci dev->dbbuf_dbs = dma_alloc_coherent(dev->dev, mem_size, 2528c2ecf20Sopenharmony_ci &dev->dbbuf_dbs_dma_addr, 2538c2ecf20Sopenharmony_ci GFP_KERNEL); 2548c2ecf20Sopenharmony_ci if (!dev->dbbuf_dbs) 2558c2ecf20Sopenharmony_ci return -ENOMEM; 2568c2ecf20Sopenharmony_ci dev->dbbuf_eis = dma_alloc_coherent(dev->dev, mem_size, 2578c2ecf20Sopenharmony_ci &dev->dbbuf_eis_dma_addr, 2588c2ecf20Sopenharmony_ci GFP_KERNEL); 2598c2ecf20Sopenharmony_ci if (!dev->dbbuf_eis) { 2608c2ecf20Sopenharmony_ci dma_free_coherent(dev->dev, mem_size, 2618c2ecf20Sopenharmony_ci dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); 2628c2ecf20Sopenharmony_ci dev->dbbuf_dbs = NULL; 2638c2ecf20Sopenharmony_ci return -ENOMEM; 2648c2ecf20Sopenharmony_ci } 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci return 0; 2678c2ecf20Sopenharmony_ci} 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_cistatic void nvme_dbbuf_dma_free(struct nvme_dev *dev) 2708c2ecf20Sopenharmony_ci{ 2718c2ecf20Sopenharmony_ci unsigned int mem_size = nvme_dbbuf_size(dev); 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci if (dev->dbbuf_dbs) { 2748c2ecf20Sopenharmony_ci dma_free_coherent(dev->dev, mem_size, 2758c2ecf20Sopenharmony_ci dev->dbbuf_dbs, dev->dbbuf_dbs_dma_addr); 2768c2ecf20Sopenharmony_ci dev->dbbuf_dbs = NULL; 2778c2ecf20Sopenharmony_ci } 2788c2ecf20Sopenharmony_ci if (dev->dbbuf_eis) { 2798c2ecf20Sopenharmony_ci dma_free_coherent(dev->dev, mem_size, 2808c2ecf20Sopenharmony_ci dev->dbbuf_eis, dev->dbbuf_eis_dma_addr); 2818c2ecf20Sopenharmony_ci dev->dbbuf_eis = NULL; 2828c2ecf20Sopenharmony_ci } 2838c2ecf20Sopenharmony_ci} 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_cistatic void nvme_dbbuf_init(struct nvme_dev *dev, 2868c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq, int qid) 2878c2ecf20Sopenharmony_ci{ 2888c2ecf20Sopenharmony_ci if (!dev->dbbuf_dbs || !qid) 2898c2ecf20Sopenharmony_ci return; 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci nvmeq->dbbuf_sq_db = &dev->dbbuf_dbs[sq_idx(qid, dev->db_stride)]; 2928c2ecf20Sopenharmony_ci nvmeq->dbbuf_cq_db = &dev->dbbuf_dbs[cq_idx(qid, dev->db_stride)]; 2938c2ecf20Sopenharmony_ci nvmeq->dbbuf_sq_ei = &dev->dbbuf_eis[sq_idx(qid, dev->db_stride)]; 2948c2ecf20Sopenharmony_ci nvmeq->dbbuf_cq_ei = &dev->dbbuf_eis[cq_idx(qid, dev->db_stride)]; 2958c2ecf20Sopenharmony_ci} 2968c2ecf20Sopenharmony_ci 2978c2ecf20Sopenharmony_cistatic void nvme_dbbuf_free(struct nvme_queue *nvmeq) 2988c2ecf20Sopenharmony_ci{ 2998c2ecf20Sopenharmony_ci if (!nvmeq->qid) 3008c2ecf20Sopenharmony_ci return; 3018c2ecf20Sopenharmony_ci 3028c2ecf20Sopenharmony_ci nvmeq->dbbuf_sq_db = NULL; 3038c2ecf20Sopenharmony_ci nvmeq->dbbuf_cq_db = NULL; 3048c2ecf20Sopenharmony_ci nvmeq->dbbuf_sq_ei = NULL; 3058c2ecf20Sopenharmony_ci nvmeq->dbbuf_cq_ei = NULL; 3068c2ecf20Sopenharmony_ci} 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_cistatic void nvme_dbbuf_set(struct nvme_dev *dev) 3098c2ecf20Sopenharmony_ci{ 3108c2ecf20Sopenharmony_ci struct nvme_command c; 3118c2ecf20Sopenharmony_ci unsigned int i; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci if (!dev->dbbuf_dbs) 3148c2ecf20Sopenharmony_ci return; 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci memset(&c, 0, sizeof(c)); 3178c2ecf20Sopenharmony_ci c.dbbuf.opcode = nvme_admin_dbbuf; 3188c2ecf20Sopenharmony_ci c.dbbuf.prp1 = cpu_to_le64(dev->dbbuf_dbs_dma_addr); 3198c2ecf20Sopenharmony_ci c.dbbuf.prp2 = cpu_to_le64(dev->dbbuf_eis_dma_addr); 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci if (nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0)) { 3228c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, "unable to set dbbuf\n"); 3238c2ecf20Sopenharmony_ci /* Free memory and continue on */ 3248c2ecf20Sopenharmony_ci nvme_dbbuf_dma_free(dev); 3258c2ecf20Sopenharmony_ci 3268c2ecf20Sopenharmony_ci for (i = 1; i <= dev->online_queues; i++) 3278c2ecf20Sopenharmony_ci nvme_dbbuf_free(&dev->queues[i]); 3288c2ecf20Sopenharmony_ci } 3298c2ecf20Sopenharmony_ci} 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_cistatic inline int nvme_dbbuf_need_event(u16 event_idx, u16 new_idx, u16 old) 3328c2ecf20Sopenharmony_ci{ 3338c2ecf20Sopenharmony_ci return (u16)(new_idx - event_idx - 1) < (u16)(new_idx - old); 3348c2ecf20Sopenharmony_ci} 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci/* Update dbbuf and return true if an MMIO is required */ 3378c2ecf20Sopenharmony_cistatic bool nvme_dbbuf_update_and_check_event(u16 value, __le32 *dbbuf_db, 3388c2ecf20Sopenharmony_ci volatile __le32 *dbbuf_ei) 3398c2ecf20Sopenharmony_ci{ 3408c2ecf20Sopenharmony_ci if (dbbuf_db) { 3418c2ecf20Sopenharmony_ci u16 old_value, event_idx; 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci /* 3448c2ecf20Sopenharmony_ci * Ensure that the queue is written before updating 3458c2ecf20Sopenharmony_ci * the doorbell in memory 3468c2ecf20Sopenharmony_ci */ 3478c2ecf20Sopenharmony_ci wmb(); 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci old_value = le32_to_cpu(*dbbuf_db); 3508c2ecf20Sopenharmony_ci *dbbuf_db = cpu_to_le32(value); 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci /* 3538c2ecf20Sopenharmony_ci * Ensure that the doorbell is updated before reading the event 3548c2ecf20Sopenharmony_ci * index from memory. The controller needs to provide similar 3558c2ecf20Sopenharmony_ci * ordering to ensure the envent index is updated before reading 3568c2ecf20Sopenharmony_ci * the doorbell. 3578c2ecf20Sopenharmony_ci */ 3588c2ecf20Sopenharmony_ci mb(); 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci event_idx = le32_to_cpu(*dbbuf_ei); 3618c2ecf20Sopenharmony_ci if (!nvme_dbbuf_need_event(event_idx, value, old_value)) 3628c2ecf20Sopenharmony_ci return false; 3638c2ecf20Sopenharmony_ci } 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci return true; 3668c2ecf20Sopenharmony_ci} 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci/* 3698c2ecf20Sopenharmony_ci * Will slightly overestimate the number of pages needed. This is OK 3708c2ecf20Sopenharmony_ci * as it only leads to a small amount of wasted memory for the lifetime of 3718c2ecf20Sopenharmony_ci * the I/O. 3728c2ecf20Sopenharmony_ci */ 3738c2ecf20Sopenharmony_cistatic int nvme_pci_npages_prp(void) 3748c2ecf20Sopenharmony_ci{ 3758c2ecf20Sopenharmony_ci unsigned max_bytes = (NVME_MAX_KB_SZ * 1024) + NVME_CTRL_PAGE_SIZE; 3768c2ecf20Sopenharmony_ci unsigned nprps = DIV_ROUND_UP(max_bytes, NVME_CTRL_PAGE_SIZE); 3778c2ecf20Sopenharmony_ci return DIV_ROUND_UP(8 * nprps, NVME_CTRL_PAGE_SIZE - 8); 3788c2ecf20Sopenharmony_ci} 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci/* 3818c2ecf20Sopenharmony_ci * Calculates the number of pages needed for the SGL segments. For example a 4k 3828c2ecf20Sopenharmony_ci * page can accommodate 256 SGL descriptors. 3838c2ecf20Sopenharmony_ci */ 3848c2ecf20Sopenharmony_cistatic int nvme_pci_npages_sgl(void) 3858c2ecf20Sopenharmony_ci{ 3868c2ecf20Sopenharmony_ci return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc), 3878c2ecf20Sopenharmony_ci NVME_CTRL_PAGE_SIZE); 3888c2ecf20Sopenharmony_ci} 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_cistatic int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 3918c2ecf20Sopenharmony_ci unsigned int hctx_idx) 3928c2ecf20Sopenharmony_ci{ 3938c2ecf20Sopenharmony_ci struct nvme_dev *dev = data; 3948c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = &dev->queues[0]; 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci WARN_ON(hctx_idx != 0); 3978c2ecf20Sopenharmony_ci WARN_ON(dev->admin_tagset.tags[0] != hctx->tags); 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci hctx->driver_data = nvmeq; 4008c2ecf20Sopenharmony_ci return 0; 4018c2ecf20Sopenharmony_ci} 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_cistatic int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 4048c2ecf20Sopenharmony_ci unsigned int hctx_idx) 4058c2ecf20Sopenharmony_ci{ 4068c2ecf20Sopenharmony_ci struct nvme_dev *dev = data; 4078c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = &dev->queues[hctx_idx + 1]; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci WARN_ON(dev->tagset.tags[hctx_idx] != hctx->tags); 4108c2ecf20Sopenharmony_ci hctx->driver_data = nvmeq; 4118c2ecf20Sopenharmony_ci return 0; 4128c2ecf20Sopenharmony_ci} 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_cistatic int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, 4158c2ecf20Sopenharmony_ci unsigned int hctx_idx, unsigned int numa_node) 4168c2ecf20Sopenharmony_ci{ 4178c2ecf20Sopenharmony_ci struct nvme_dev *dev = set->driver_data; 4188c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 4198c2ecf20Sopenharmony_ci int queue_idx = (set == &dev->tagset) ? hctx_idx + 1 : 0; 4208c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = &dev->queues[queue_idx]; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci BUG_ON(!nvmeq); 4238c2ecf20Sopenharmony_ci iod->nvmeq = nvmeq; 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci nvme_req(req)->ctrl = &dev->ctrl; 4268c2ecf20Sopenharmony_ci return 0; 4278c2ecf20Sopenharmony_ci} 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_cistatic int queue_irq_offset(struct nvme_dev *dev) 4308c2ecf20Sopenharmony_ci{ 4318c2ecf20Sopenharmony_ci /* if we have more than 1 vec, admin queue offsets us by 1 */ 4328c2ecf20Sopenharmony_ci if (dev->num_vecs > 1) 4338c2ecf20Sopenharmony_ci return 1; 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci return 0; 4368c2ecf20Sopenharmony_ci} 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_cistatic int nvme_pci_map_queues(struct blk_mq_tag_set *set) 4398c2ecf20Sopenharmony_ci{ 4408c2ecf20Sopenharmony_ci struct nvme_dev *dev = set->driver_data; 4418c2ecf20Sopenharmony_ci int i, qoff, offset; 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci offset = queue_irq_offset(dev); 4448c2ecf20Sopenharmony_ci for (i = 0, qoff = 0; i < set->nr_maps; i++) { 4458c2ecf20Sopenharmony_ci struct blk_mq_queue_map *map = &set->map[i]; 4468c2ecf20Sopenharmony_ci 4478c2ecf20Sopenharmony_ci map->nr_queues = dev->io_queues[i]; 4488c2ecf20Sopenharmony_ci if (!map->nr_queues) { 4498c2ecf20Sopenharmony_ci BUG_ON(i == HCTX_TYPE_DEFAULT); 4508c2ecf20Sopenharmony_ci continue; 4518c2ecf20Sopenharmony_ci } 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci /* 4548c2ecf20Sopenharmony_ci * The poll queue(s) doesn't have an IRQ (and hence IRQ 4558c2ecf20Sopenharmony_ci * affinity), so use the regular blk-mq cpu mapping 4568c2ecf20Sopenharmony_ci */ 4578c2ecf20Sopenharmony_ci map->queue_offset = qoff; 4588c2ecf20Sopenharmony_ci if (i != HCTX_TYPE_POLL && offset) 4598c2ecf20Sopenharmony_ci blk_mq_pci_map_queues(map, to_pci_dev(dev->dev), offset); 4608c2ecf20Sopenharmony_ci else 4618c2ecf20Sopenharmony_ci blk_mq_map_queues(map); 4628c2ecf20Sopenharmony_ci qoff += map->nr_queues; 4638c2ecf20Sopenharmony_ci offset += map->nr_queues; 4648c2ecf20Sopenharmony_ci } 4658c2ecf20Sopenharmony_ci 4668c2ecf20Sopenharmony_ci return 0; 4678c2ecf20Sopenharmony_ci} 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci/* 4708c2ecf20Sopenharmony_ci * Write sq tail if we are asked to, or if the next command would wrap. 4718c2ecf20Sopenharmony_ci */ 4728c2ecf20Sopenharmony_cistatic inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) 4738c2ecf20Sopenharmony_ci{ 4748c2ecf20Sopenharmony_ci if (!write_sq) { 4758c2ecf20Sopenharmony_ci u16 next_tail = nvmeq->sq_tail + 1; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci if (next_tail == nvmeq->q_depth) 4788c2ecf20Sopenharmony_ci next_tail = 0; 4798c2ecf20Sopenharmony_ci if (next_tail != nvmeq->last_sq_tail) 4808c2ecf20Sopenharmony_ci return; 4818c2ecf20Sopenharmony_ci } 4828c2ecf20Sopenharmony_ci 4838c2ecf20Sopenharmony_ci if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, 4848c2ecf20Sopenharmony_ci nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) 4858c2ecf20Sopenharmony_ci writel(nvmeq->sq_tail, nvmeq->q_db); 4868c2ecf20Sopenharmony_ci nvmeq->last_sq_tail = nvmeq->sq_tail; 4878c2ecf20Sopenharmony_ci} 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci/** 4908c2ecf20Sopenharmony_ci * nvme_submit_cmd() - Copy a command into a queue and ring the doorbell 4918c2ecf20Sopenharmony_ci * @nvmeq: The queue to use 4928c2ecf20Sopenharmony_ci * @cmd: The command to send 4938c2ecf20Sopenharmony_ci * @write_sq: whether to write to the SQ doorbell 4948c2ecf20Sopenharmony_ci */ 4958c2ecf20Sopenharmony_cistatic void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, 4968c2ecf20Sopenharmony_ci bool write_sq) 4978c2ecf20Sopenharmony_ci{ 4988c2ecf20Sopenharmony_ci spin_lock(&nvmeq->sq_lock); 4998c2ecf20Sopenharmony_ci memcpy(nvmeq->sq_cmds + (nvmeq->sq_tail << nvmeq->sqes), 5008c2ecf20Sopenharmony_ci cmd, sizeof(*cmd)); 5018c2ecf20Sopenharmony_ci if (++nvmeq->sq_tail == nvmeq->q_depth) 5028c2ecf20Sopenharmony_ci nvmeq->sq_tail = 0; 5038c2ecf20Sopenharmony_ci nvme_write_sq_db(nvmeq, write_sq); 5048c2ecf20Sopenharmony_ci spin_unlock(&nvmeq->sq_lock); 5058c2ecf20Sopenharmony_ci} 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_cistatic void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) 5088c2ecf20Sopenharmony_ci{ 5098c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = hctx->driver_data; 5108c2ecf20Sopenharmony_ci 5118c2ecf20Sopenharmony_ci spin_lock(&nvmeq->sq_lock); 5128c2ecf20Sopenharmony_ci if (nvmeq->sq_tail != nvmeq->last_sq_tail) 5138c2ecf20Sopenharmony_ci nvme_write_sq_db(nvmeq, true); 5148c2ecf20Sopenharmony_ci spin_unlock(&nvmeq->sq_lock); 5158c2ecf20Sopenharmony_ci} 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_cistatic void **nvme_pci_iod_list(struct request *req) 5188c2ecf20Sopenharmony_ci{ 5198c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 5208c2ecf20Sopenharmony_ci return (void **)(iod->sg + blk_rq_nr_phys_segments(req)); 5218c2ecf20Sopenharmony_ci} 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_cistatic inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) 5248c2ecf20Sopenharmony_ci{ 5258c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 5268c2ecf20Sopenharmony_ci int nseg = blk_rq_nr_phys_segments(req); 5278c2ecf20Sopenharmony_ci unsigned int avg_seg_size; 5288c2ecf20Sopenharmony_ci 5298c2ecf20Sopenharmony_ci avg_seg_size = DIV_ROUND_UP(blk_rq_payload_bytes(req), nseg); 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci if (!(dev->ctrl.sgls & ((1 << 0) | (1 << 1)))) 5328c2ecf20Sopenharmony_ci return false; 5338c2ecf20Sopenharmony_ci if (!iod->nvmeq->qid) 5348c2ecf20Sopenharmony_ci return false; 5358c2ecf20Sopenharmony_ci if (!sgl_threshold || avg_seg_size < sgl_threshold) 5368c2ecf20Sopenharmony_ci return false; 5378c2ecf20Sopenharmony_ci return true; 5388c2ecf20Sopenharmony_ci} 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_cistatic void nvme_free_prps(struct nvme_dev *dev, struct request *req) 5418c2ecf20Sopenharmony_ci{ 5428c2ecf20Sopenharmony_ci const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1; 5438c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 5448c2ecf20Sopenharmony_ci dma_addr_t dma_addr = iod->first_dma; 5458c2ecf20Sopenharmony_ci int i; 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci for (i = 0; i < iod->npages; i++) { 5488c2ecf20Sopenharmony_ci __le64 *prp_list = nvme_pci_iod_list(req)[i]; 5498c2ecf20Sopenharmony_ci dma_addr_t next_dma_addr = le64_to_cpu(prp_list[last_prp]); 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci dma_pool_free(dev->prp_page_pool, prp_list, dma_addr); 5528c2ecf20Sopenharmony_ci dma_addr = next_dma_addr; 5538c2ecf20Sopenharmony_ci } 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci} 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_cistatic void nvme_free_sgls(struct nvme_dev *dev, struct request *req) 5588c2ecf20Sopenharmony_ci{ 5598c2ecf20Sopenharmony_ci const int last_sg = SGES_PER_PAGE - 1; 5608c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 5618c2ecf20Sopenharmony_ci dma_addr_t dma_addr = iod->first_dma; 5628c2ecf20Sopenharmony_ci int i; 5638c2ecf20Sopenharmony_ci 5648c2ecf20Sopenharmony_ci for (i = 0; i < iod->npages; i++) { 5658c2ecf20Sopenharmony_ci struct nvme_sgl_desc *sg_list = nvme_pci_iod_list(req)[i]; 5668c2ecf20Sopenharmony_ci dma_addr_t next_dma_addr = le64_to_cpu((sg_list[last_sg]).addr); 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci dma_pool_free(dev->prp_page_pool, sg_list, dma_addr); 5698c2ecf20Sopenharmony_ci dma_addr = next_dma_addr; 5708c2ecf20Sopenharmony_ci } 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci} 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_cistatic void nvme_unmap_sg(struct nvme_dev *dev, struct request *req) 5758c2ecf20Sopenharmony_ci{ 5768c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 5778c2ecf20Sopenharmony_ci 5788c2ecf20Sopenharmony_ci if (is_pci_p2pdma_page(sg_page(iod->sg))) 5798c2ecf20Sopenharmony_ci pci_p2pdma_unmap_sg(dev->dev, iod->sg, iod->nents, 5808c2ecf20Sopenharmony_ci rq_dma_dir(req)); 5818c2ecf20Sopenharmony_ci else 5828c2ecf20Sopenharmony_ci dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); 5838c2ecf20Sopenharmony_ci} 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_cistatic void nvme_unmap_data(struct nvme_dev *dev, struct request *req) 5868c2ecf20Sopenharmony_ci{ 5878c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci if (iod->dma_len) { 5908c2ecf20Sopenharmony_ci dma_unmap_page(dev->dev, iod->first_dma, iod->dma_len, 5918c2ecf20Sopenharmony_ci rq_dma_dir(req)); 5928c2ecf20Sopenharmony_ci return; 5938c2ecf20Sopenharmony_ci } 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci WARN_ON_ONCE(!iod->nents); 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci nvme_unmap_sg(dev, req); 5988c2ecf20Sopenharmony_ci if (iod->npages == 0) 5998c2ecf20Sopenharmony_ci dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], 6008c2ecf20Sopenharmony_ci iod->first_dma); 6018c2ecf20Sopenharmony_ci else if (iod->use_sgl) 6028c2ecf20Sopenharmony_ci nvme_free_sgls(dev, req); 6038c2ecf20Sopenharmony_ci else 6048c2ecf20Sopenharmony_ci nvme_free_prps(dev, req); 6058c2ecf20Sopenharmony_ci mempool_free(iod->sg, dev->iod_mempool); 6068c2ecf20Sopenharmony_ci} 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_cistatic void nvme_print_sgl(struct scatterlist *sgl, int nents) 6098c2ecf20Sopenharmony_ci{ 6108c2ecf20Sopenharmony_ci int i; 6118c2ecf20Sopenharmony_ci struct scatterlist *sg; 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_ci for_each_sg(sgl, sg, nents, i) { 6148c2ecf20Sopenharmony_ci dma_addr_t phys = sg_phys(sg); 6158c2ecf20Sopenharmony_ci pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d " 6168c2ecf20Sopenharmony_ci "dma_address:%pad dma_length:%d\n", 6178c2ecf20Sopenharmony_ci i, &phys, sg->offset, sg->length, &sg_dma_address(sg), 6188c2ecf20Sopenharmony_ci sg_dma_len(sg)); 6198c2ecf20Sopenharmony_ci } 6208c2ecf20Sopenharmony_ci} 6218c2ecf20Sopenharmony_ci 6228c2ecf20Sopenharmony_cistatic blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev, 6238c2ecf20Sopenharmony_ci struct request *req, struct nvme_rw_command *cmnd) 6248c2ecf20Sopenharmony_ci{ 6258c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 6268c2ecf20Sopenharmony_ci struct dma_pool *pool; 6278c2ecf20Sopenharmony_ci int length = blk_rq_payload_bytes(req); 6288c2ecf20Sopenharmony_ci struct scatterlist *sg = iod->sg; 6298c2ecf20Sopenharmony_ci int dma_len = sg_dma_len(sg); 6308c2ecf20Sopenharmony_ci u64 dma_addr = sg_dma_address(sg); 6318c2ecf20Sopenharmony_ci int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1); 6328c2ecf20Sopenharmony_ci __le64 *prp_list; 6338c2ecf20Sopenharmony_ci void **list = nvme_pci_iod_list(req); 6348c2ecf20Sopenharmony_ci dma_addr_t prp_dma; 6358c2ecf20Sopenharmony_ci int nprps, i; 6368c2ecf20Sopenharmony_ci 6378c2ecf20Sopenharmony_ci length -= (NVME_CTRL_PAGE_SIZE - offset); 6388c2ecf20Sopenharmony_ci if (length <= 0) { 6398c2ecf20Sopenharmony_ci iod->first_dma = 0; 6408c2ecf20Sopenharmony_ci goto done; 6418c2ecf20Sopenharmony_ci } 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci dma_len -= (NVME_CTRL_PAGE_SIZE - offset); 6448c2ecf20Sopenharmony_ci if (dma_len) { 6458c2ecf20Sopenharmony_ci dma_addr += (NVME_CTRL_PAGE_SIZE - offset); 6468c2ecf20Sopenharmony_ci } else { 6478c2ecf20Sopenharmony_ci sg = sg_next(sg); 6488c2ecf20Sopenharmony_ci dma_addr = sg_dma_address(sg); 6498c2ecf20Sopenharmony_ci dma_len = sg_dma_len(sg); 6508c2ecf20Sopenharmony_ci } 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci if (length <= NVME_CTRL_PAGE_SIZE) { 6538c2ecf20Sopenharmony_ci iod->first_dma = dma_addr; 6548c2ecf20Sopenharmony_ci goto done; 6558c2ecf20Sopenharmony_ci } 6568c2ecf20Sopenharmony_ci 6578c2ecf20Sopenharmony_ci nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE); 6588c2ecf20Sopenharmony_ci if (nprps <= (256 / 8)) { 6598c2ecf20Sopenharmony_ci pool = dev->prp_small_pool; 6608c2ecf20Sopenharmony_ci iod->npages = 0; 6618c2ecf20Sopenharmony_ci } else { 6628c2ecf20Sopenharmony_ci pool = dev->prp_page_pool; 6638c2ecf20Sopenharmony_ci iod->npages = 1; 6648c2ecf20Sopenharmony_ci } 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); 6678c2ecf20Sopenharmony_ci if (!prp_list) { 6688c2ecf20Sopenharmony_ci iod->first_dma = dma_addr; 6698c2ecf20Sopenharmony_ci iod->npages = -1; 6708c2ecf20Sopenharmony_ci return BLK_STS_RESOURCE; 6718c2ecf20Sopenharmony_ci } 6728c2ecf20Sopenharmony_ci list[0] = prp_list; 6738c2ecf20Sopenharmony_ci iod->first_dma = prp_dma; 6748c2ecf20Sopenharmony_ci i = 0; 6758c2ecf20Sopenharmony_ci for (;;) { 6768c2ecf20Sopenharmony_ci if (i == NVME_CTRL_PAGE_SIZE >> 3) { 6778c2ecf20Sopenharmony_ci __le64 *old_prp_list = prp_list; 6788c2ecf20Sopenharmony_ci prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma); 6798c2ecf20Sopenharmony_ci if (!prp_list) 6808c2ecf20Sopenharmony_ci goto free_prps; 6818c2ecf20Sopenharmony_ci list[iod->npages++] = prp_list; 6828c2ecf20Sopenharmony_ci prp_list[0] = old_prp_list[i - 1]; 6838c2ecf20Sopenharmony_ci old_prp_list[i - 1] = cpu_to_le64(prp_dma); 6848c2ecf20Sopenharmony_ci i = 1; 6858c2ecf20Sopenharmony_ci } 6868c2ecf20Sopenharmony_ci prp_list[i++] = cpu_to_le64(dma_addr); 6878c2ecf20Sopenharmony_ci dma_len -= NVME_CTRL_PAGE_SIZE; 6888c2ecf20Sopenharmony_ci dma_addr += NVME_CTRL_PAGE_SIZE; 6898c2ecf20Sopenharmony_ci length -= NVME_CTRL_PAGE_SIZE; 6908c2ecf20Sopenharmony_ci if (length <= 0) 6918c2ecf20Sopenharmony_ci break; 6928c2ecf20Sopenharmony_ci if (dma_len > 0) 6938c2ecf20Sopenharmony_ci continue; 6948c2ecf20Sopenharmony_ci if (unlikely(dma_len < 0)) 6958c2ecf20Sopenharmony_ci goto bad_sgl; 6968c2ecf20Sopenharmony_ci sg = sg_next(sg); 6978c2ecf20Sopenharmony_ci dma_addr = sg_dma_address(sg); 6988c2ecf20Sopenharmony_ci dma_len = sg_dma_len(sg); 6998c2ecf20Sopenharmony_ci } 7008c2ecf20Sopenharmony_cidone: 7018c2ecf20Sopenharmony_ci cmnd->dptr.prp1 = cpu_to_le64(sg_dma_address(iod->sg)); 7028c2ecf20Sopenharmony_ci cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma); 7038c2ecf20Sopenharmony_ci return BLK_STS_OK; 7048c2ecf20Sopenharmony_cifree_prps: 7058c2ecf20Sopenharmony_ci nvme_free_prps(dev, req); 7068c2ecf20Sopenharmony_ci return BLK_STS_RESOURCE; 7078c2ecf20Sopenharmony_cibad_sgl: 7088c2ecf20Sopenharmony_ci WARN(DO_ONCE(nvme_print_sgl, iod->sg, iod->nents), 7098c2ecf20Sopenharmony_ci "Invalid SGL for payload:%d nents:%d\n", 7108c2ecf20Sopenharmony_ci blk_rq_payload_bytes(req), iod->nents); 7118c2ecf20Sopenharmony_ci return BLK_STS_IOERR; 7128c2ecf20Sopenharmony_ci} 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_cistatic void nvme_pci_sgl_set_data(struct nvme_sgl_desc *sge, 7158c2ecf20Sopenharmony_ci struct scatterlist *sg) 7168c2ecf20Sopenharmony_ci{ 7178c2ecf20Sopenharmony_ci sge->addr = cpu_to_le64(sg_dma_address(sg)); 7188c2ecf20Sopenharmony_ci sge->length = cpu_to_le32(sg_dma_len(sg)); 7198c2ecf20Sopenharmony_ci sge->type = NVME_SGL_FMT_DATA_DESC << 4; 7208c2ecf20Sopenharmony_ci} 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_cistatic void nvme_pci_sgl_set_seg(struct nvme_sgl_desc *sge, 7238c2ecf20Sopenharmony_ci dma_addr_t dma_addr, int entries) 7248c2ecf20Sopenharmony_ci{ 7258c2ecf20Sopenharmony_ci sge->addr = cpu_to_le64(dma_addr); 7268c2ecf20Sopenharmony_ci if (entries < SGES_PER_PAGE) { 7278c2ecf20Sopenharmony_ci sge->length = cpu_to_le32(entries * sizeof(*sge)); 7288c2ecf20Sopenharmony_ci sge->type = NVME_SGL_FMT_LAST_SEG_DESC << 4; 7298c2ecf20Sopenharmony_ci } else { 7308c2ecf20Sopenharmony_ci sge->length = cpu_to_le32(NVME_CTRL_PAGE_SIZE); 7318c2ecf20Sopenharmony_ci sge->type = NVME_SGL_FMT_SEG_DESC << 4; 7328c2ecf20Sopenharmony_ci } 7338c2ecf20Sopenharmony_ci} 7348c2ecf20Sopenharmony_ci 7358c2ecf20Sopenharmony_cistatic blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, 7368c2ecf20Sopenharmony_ci struct request *req, struct nvme_rw_command *cmd, int entries) 7378c2ecf20Sopenharmony_ci{ 7388c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 7398c2ecf20Sopenharmony_ci struct dma_pool *pool; 7408c2ecf20Sopenharmony_ci struct nvme_sgl_desc *sg_list; 7418c2ecf20Sopenharmony_ci struct scatterlist *sg = iod->sg; 7428c2ecf20Sopenharmony_ci dma_addr_t sgl_dma; 7438c2ecf20Sopenharmony_ci int i = 0; 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci /* setting the transfer type as SGL */ 7468c2ecf20Sopenharmony_ci cmd->flags = NVME_CMD_SGL_METABUF; 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci if (entries == 1) { 7498c2ecf20Sopenharmony_ci nvme_pci_sgl_set_data(&cmd->dptr.sgl, sg); 7508c2ecf20Sopenharmony_ci return BLK_STS_OK; 7518c2ecf20Sopenharmony_ci } 7528c2ecf20Sopenharmony_ci 7538c2ecf20Sopenharmony_ci if (entries <= (256 / sizeof(struct nvme_sgl_desc))) { 7548c2ecf20Sopenharmony_ci pool = dev->prp_small_pool; 7558c2ecf20Sopenharmony_ci iod->npages = 0; 7568c2ecf20Sopenharmony_ci } else { 7578c2ecf20Sopenharmony_ci pool = dev->prp_page_pool; 7588c2ecf20Sopenharmony_ci iod->npages = 1; 7598c2ecf20Sopenharmony_ci } 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); 7628c2ecf20Sopenharmony_ci if (!sg_list) { 7638c2ecf20Sopenharmony_ci iod->npages = -1; 7648c2ecf20Sopenharmony_ci return BLK_STS_RESOURCE; 7658c2ecf20Sopenharmony_ci } 7668c2ecf20Sopenharmony_ci 7678c2ecf20Sopenharmony_ci nvme_pci_iod_list(req)[0] = sg_list; 7688c2ecf20Sopenharmony_ci iod->first_dma = sgl_dma; 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_ci nvme_pci_sgl_set_seg(&cmd->dptr.sgl, sgl_dma, entries); 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci do { 7738c2ecf20Sopenharmony_ci if (i == SGES_PER_PAGE) { 7748c2ecf20Sopenharmony_ci struct nvme_sgl_desc *old_sg_desc = sg_list; 7758c2ecf20Sopenharmony_ci struct nvme_sgl_desc *link = &old_sg_desc[i - 1]; 7768c2ecf20Sopenharmony_ci 7778c2ecf20Sopenharmony_ci sg_list = dma_pool_alloc(pool, GFP_ATOMIC, &sgl_dma); 7788c2ecf20Sopenharmony_ci if (!sg_list) 7798c2ecf20Sopenharmony_ci goto free_sgls; 7808c2ecf20Sopenharmony_ci 7818c2ecf20Sopenharmony_ci i = 0; 7828c2ecf20Sopenharmony_ci nvme_pci_iod_list(req)[iod->npages++] = sg_list; 7838c2ecf20Sopenharmony_ci sg_list[i++] = *link; 7848c2ecf20Sopenharmony_ci nvme_pci_sgl_set_seg(link, sgl_dma, entries); 7858c2ecf20Sopenharmony_ci } 7868c2ecf20Sopenharmony_ci 7878c2ecf20Sopenharmony_ci nvme_pci_sgl_set_data(&sg_list[i++], sg); 7888c2ecf20Sopenharmony_ci sg = sg_next(sg); 7898c2ecf20Sopenharmony_ci } while (--entries > 0); 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci return BLK_STS_OK; 7928c2ecf20Sopenharmony_cifree_sgls: 7938c2ecf20Sopenharmony_ci nvme_free_sgls(dev, req); 7948c2ecf20Sopenharmony_ci return BLK_STS_RESOURCE; 7958c2ecf20Sopenharmony_ci} 7968c2ecf20Sopenharmony_ci 7978c2ecf20Sopenharmony_cistatic blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, 7988c2ecf20Sopenharmony_ci struct request *req, struct nvme_rw_command *cmnd, 7998c2ecf20Sopenharmony_ci struct bio_vec *bv) 8008c2ecf20Sopenharmony_ci{ 8018c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 8028c2ecf20Sopenharmony_ci unsigned int offset = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1); 8038c2ecf20Sopenharmony_ci unsigned int first_prp_len = NVME_CTRL_PAGE_SIZE - offset; 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); 8068c2ecf20Sopenharmony_ci if (dma_mapping_error(dev->dev, iod->first_dma)) 8078c2ecf20Sopenharmony_ci return BLK_STS_RESOURCE; 8088c2ecf20Sopenharmony_ci iod->dma_len = bv->bv_len; 8098c2ecf20Sopenharmony_ci 8108c2ecf20Sopenharmony_ci cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); 8118c2ecf20Sopenharmony_ci if (bv->bv_len > first_prp_len) 8128c2ecf20Sopenharmony_ci cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); 8138c2ecf20Sopenharmony_ci else 8148c2ecf20Sopenharmony_ci cmnd->dptr.prp2 = 0; 8158c2ecf20Sopenharmony_ci return BLK_STS_OK; 8168c2ecf20Sopenharmony_ci} 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_cistatic blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, 8198c2ecf20Sopenharmony_ci struct request *req, struct nvme_rw_command *cmnd, 8208c2ecf20Sopenharmony_ci struct bio_vec *bv) 8218c2ecf20Sopenharmony_ci{ 8228c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 8238c2ecf20Sopenharmony_ci 8248c2ecf20Sopenharmony_ci iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); 8258c2ecf20Sopenharmony_ci if (dma_mapping_error(dev->dev, iod->first_dma)) 8268c2ecf20Sopenharmony_ci return BLK_STS_RESOURCE; 8278c2ecf20Sopenharmony_ci iod->dma_len = bv->bv_len; 8288c2ecf20Sopenharmony_ci 8298c2ecf20Sopenharmony_ci cmnd->flags = NVME_CMD_SGL_METABUF; 8308c2ecf20Sopenharmony_ci cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); 8318c2ecf20Sopenharmony_ci cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); 8328c2ecf20Sopenharmony_ci cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; 8338c2ecf20Sopenharmony_ci return BLK_STS_OK; 8348c2ecf20Sopenharmony_ci} 8358c2ecf20Sopenharmony_ci 8368c2ecf20Sopenharmony_cistatic blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, 8378c2ecf20Sopenharmony_ci struct nvme_command *cmnd) 8388c2ecf20Sopenharmony_ci{ 8398c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 8408c2ecf20Sopenharmony_ci blk_status_t ret = BLK_STS_RESOURCE; 8418c2ecf20Sopenharmony_ci int nr_mapped; 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_ci if (blk_rq_nr_phys_segments(req) == 1) { 8448c2ecf20Sopenharmony_ci struct bio_vec bv = req_bvec(req); 8458c2ecf20Sopenharmony_ci 8468c2ecf20Sopenharmony_ci if (!is_pci_p2pdma_page(bv.bv_page)) { 8478c2ecf20Sopenharmony_ci if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2) 8488c2ecf20Sopenharmony_ci return nvme_setup_prp_simple(dev, req, 8498c2ecf20Sopenharmony_ci &cmnd->rw, &bv); 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci if (iod->nvmeq->qid && sgl_threshold && 8528c2ecf20Sopenharmony_ci dev->ctrl.sgls & ((1 << 0) | (1 << 1))) 8538c2ecf20Sopenharmony_ci return nvme_setup_sgl_simple(dev, req, 8548c2ecf20Sopenharmony_ci &cmnd->rw, &bv); 8558c2ecf20Sopenharmony_ci } 8568c2ecf20Sopenharmony_ci } 8578c2ecf20Sopenharmony_ci 8588c2ecf20Sopenharmony_ci iod->dma_len = 0; 8598c2ecf20Sopenharmony_ci iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); 8608c2ecf20Sopenharmony_ci if (!iod->sg) 8618c2ecf20Sopenharmony_ci return BLK_STS_RESOURCE; 8628c2ecf20Sopenharmony_ci sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); 8638c2ecf20Sopenharmony_ci iod->nents = blk_rq_map_sg(req->q, req, iod->sg); 8648c2ecf20Sopenharmony_ci if (!iod->nents) 8658c2ecf20Sopenharmony_ci goto out_free_sg; 8668c2ecf20Sopenharmony_ci 8678c2ecf20Sopenharmony_ci if (is_pci_p2pdma_page(sg_page(iod->sg))) 8688c2ecf20Sopenharmony_ci nr_mapped = pci_p2pdma_map_sg_attrs(dev->dev, iod->sg, 8698c2ecf20Sopenharmony_ci iod->nents, rq_dma_dir(req), DMA_ATTR_NO_WARN); 8708c2ecf20Sopenharmony_ci else 8718c2ecf20Sopenharmony_ci nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, 8728c2ecf20Sopenharmony_ci rq_dma_dir(req), DMA_ATTR_NO_WARN); 8738c2ecf20Sopenharmony_ci if (!nr_mapped) 8748c2ecf20Sopenharmony_ci goto out_free_sg; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci iod->use_sgl = nvme_pci_use_sgls(dev, req); 8778c2ecf20Sopenharmony_ci if (iod->use_sgl) 8788c2ecf20Sopenharmony_ci ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); 8798c2ecf20Sopenharmony_ci else 8808c2ecf20Sopenharmony_ci ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); 8818c2ecf20Sopenharmony_ci if (ret != BLK_STS_OK) 8828c2ecf20Sopenharmony_ci goto out_unmap_sg; 8838c2ecf20Sopenharmony_ci return BLK_STS_OK; 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ciout_unmap_sg: 8868c2ecf20Sopenharmony_ci nvme_unmap_sg(dev, req); 8878c2ecf20Sopenharmony_ciout_free_sg: 8888c2ecf20Sopenharmony_ci mempool_free(iod->sg, dev->iod_mempool); 8898c2ecf20Sopenharmony_ci return ret; 8908c2ecf20Sopenharmony_ci} 8918c2ecf20Sopenharmony_ci 8928c2ecf20Sopenharmony_cistatic blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req, 8938c2ecf20Sopenharmony_ci struct nvme_command *cmnd) 8948c2ecf20Sopenharmony_ci{ 8958c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 8968c2ecf20Sopenharmony_ci 8978c2ecf20Sopenharmony_ci iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req), 8988c2ecf20Sopenharmony_ci rq_dma_dir(req), 0); 8998c2ecf20Sopenharmony_ci if (dma_mapping_error(dev->dev, iod->meta_dma)) 9008c2ecf20Sopenharmony_ci return BLK_STS_IOERR; 9018c2ecf20Sopenharmony_ci cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); 9028c2ecf20Sopenharmony_ci return BLK_STS_OK; 9038c2ecf20Sopenharmony_ci} 9048c2ecf20Sopenharmony_ci 9058c2ecf20Sopenharmony_ci/* 9068c2ecf20Sopenharmony_ci * NOTE: ns is NULL when called on the admin queue. 9078c2ecf20Sopenharmony_ci */ 9088c2ecf20Sopenharmony_cistatic blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, 9098c2ecf20Sopenharmony_ci const struct blk_mq_queue_data *bd) 9108c2ecf20Sopenharmony_ci{ 9118c2ecf20Sopenharmony_ci struct nvme_ns *ns = hctx->queue->queuedata; 9128c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = hctx->driver_data; 9138c2ecf20Sopenharmony_ci struct nvme_dev *dev = nvmeq->dev; 9148c2ecf20Sopenharmony_ci struct request *req = bd->rq; 9158c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 9168c2ecf20Sopenharmony_ci struct nvme_command *cmnd = &iod->cmd; 9178c2ecf20Sopenharmony_ci blk_status_t ret; 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci iod->aborted = 0; 9208c2ecf20Sopenharmony_ci iod->npages = -1; 9218c2ecf20Sopenharmony_ci iod->nents = 0; 9228c2ecf20Sopenharmony_ci 9238c2ecf20Sopenharmony_ci /* 9248c2ecf20Sopenharmony_ci * We should not need to do this, but we're still using this to 9258c2ecf20Sopenharmony_ci * ensure we can drain requests on a dying queue. 9268c2ecf20Sopenharmony_ci */ 9278c2ecf20Sopenharmony_ci if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags))) 9288c2ecf20Sopenharmony_ci return BLK_STS_IOERR; 9298c2ecf20Sopenharmony_ci 9308c2ecf20Sopenharmony_ci ret = nvme_setup_cmd(ns, req, cmnd); 9318c2ecf20Sopenharmony_ci if (ret) 9328c2ecf20Sopenharmony_ci return ret; 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_ci if (blk_rq_nr_phys_segments(req)) { 9358c2ecf20Sopenharmony_ci ret = nvme_map_data(dev, req, cmnd); 9368c2ecf20Sopenharmony_ci if (ret) 9378c2ecf20Sopenharmony_ci goto out_free_cmd; 9388c2ecf20Sopenharmony_ci } 9398c2ecf20Sopenharmony_ci 9408c2ecf20Sopenharmony_ci if (blk_integrity_rq(req)) { 9418c2ecf20Sopenharmony_ci ret = nvme_map_metadata(dev, req, cmnd); 9428c2ecf20Sopenharmony_ci if (ret) 9438c2ecf20Sopenharmony_ci goto out_unmap_data; 9448c2ecf20Sopenharmony_ci } 9458c2ecf20Sopenharmony_ci 9468c2ecf20Sopenharmony_ci blk_mq_start_request(req); 9478c2ecf20Sopenharmony_ci nvme_submit_cmd(nvmeq, cmnd, bd->last); 9488c2ecf20Sopenharmony_ci return BLK_STS_OK; 9498c2ecf20Sopenharmony_ciout_unmap_data: 9508c2ecf20Sopenharmony_ci if (blk_rq_nr_phys_segments(req)) 9518c2ecf20Sopenharmony_ci nvme_unmap_data(dev, req); 9528c2ecf20Sopenharmony_ciout_free_cmd: 9538c2ecf20Sopenharmony_ci nvme_cleanup_cmd(req); 9548c2ecf20Sopenharmony_ci return ret; 9558c2ecf20Sopenharmony_ci} 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_cistatic void nvme_pci_complete_rq(struct request *req) 9588c2ecf20Sopenharmony_ci{ 9598c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 9608c2ecf20Sopenharmony_ci struct nvme_dev *dev = iod->nvmeq->dev; 9618c2ecf20Sopenharmony_ci 9628c2ecf20Sopenharmony_ci if (blk_integrity_rq(req)) 9638c2ecf20Sopenharmony_ci dma_unmap_page(dev->dev, iod->meta_dma, 9648c2ecf20Sopenharmony_ci rq_integrity_vec(req)->bv_len, rq_dma_dir(req)); 9658c2ecf20Sopenharmony_ci 9668c2ecf20Sopenharmony_ci if (blk_rq_nr_phys_segments(req)) 9678c2ecf20Sopenharmony_ci nvme_unmap_data(dev, req); 9688c2ecf20Sopenharmony_ci nvme_complete_rq(req); 9698c2ecf20Sopenharmony_ci} 9708c2ecf20Sopenharmony_ci 9718c2ecf20Sopenharmony_ci/* We read the CQE phase first to check if the rest of the entry is valid */ 9728c2ecf20Sopenharmony_cistatic inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) 9738c2ecf20Sopenharmony_ci{ 9748c2ecf20Sopenharmony_ci struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head]; 9758c2ecf20Sopenharmony_ci 9768c2ecf20Sopenharmony_ci return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase; 9778c2ecf20Sopenharmony_ci} 9788c2ecf20Sopenharmony_ci 9798c2ecf20Sopenharmony_cistatic inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) 9808c2ecf20Sopenharmony_ci{ 9818c2ecf20Sopenharmony_ci u16 head = nvmeq->cq_head; 9828c2ecf20Sopenharmony_ci 9838c2ecf20Sopenharmony_ci if (nvme_dbbuf_update_and_check_event(head, nvmeq->dbbuf_cq_db, 9848c2ecf20Sopenharmony_ci nvmeq->dbbuf_cq_ei)) 9858c2ecf20Sopenharmony_ci writel(head, nvmeq->q_db + nvmeq->dev->db_stride); 9868c2ecf20Sopenharmony_ci} 9878c2ecf20Sopenharmony_ci 9888c2ecf20Sopenharmony_cistatic inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) 9898c2ecf20Sopenharmony_ci{ 9908c2ecf20Sopenharmony_ci if (!nvmeq->qid) 9918c2ecf20Sopenharmony_ci return nvmeq->dev->admin_tagset.tags[0]; 9928c2ecf20Sopenharmony_ci return nvmeq->dev->tagset.tags[nvmeq->qid - 1]; 9938c2ecf20Sopenharmony_ci} 9948c2ecf20Sopenharmony_ci 9958c2ecf20Sopenharmony_cistatic inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) 9968c2ecf20Sopenharmony_ci{ 9978c2ecf20Sopenharmony_ci struct nvme_completion *cqe = &nvmeq->cqes[idx]; 9988c2ecf20Sopenharmony_ci __u16 command_id = READ_ONCE(cqe->command_id); 9998c2ecf20Sopenharmony_ci struct request *req; 10008c2ecf20Sopenharmony_ci 10018c2ecf20Sopenharmony_ci /* 10028c2ecf20Sopenharmony_ci * AEN requests are special as they don't time out and can 10038c2ecf20Sopenharmony_ci * survive any kind of queue freeze and often don't respond to 10048c2ecf20Sopenharmony_ci * aborts. We don't even bother to allocate a struct request 10058c2ecf20Sopenharmony_ci * for them but rather special case them here. 10068c2ecf20Sopenharmony_ci */ 10078c2ecf20Sopenharmony_ci if (unlikely(nvme_is_aen_req(nvmeq->qid, command_id))) { 10088c2ecf20Sopenharmony_ci nvme_complete_async_event(&nvmeq->dev->ctrl, 10098c2ecf20Sopenharmony_ci cqe->status, &cqe->result); 10108c2ecf20Sopenharmony_ci return; 10118c2ecf20Sopenharmony_ci } 10128c2ecf20Sopenharmony_ci 10138c2ecf20Sopenharmony_ci req = nvme_find_rq(nvme_queue_tagset(nvmeq), command_id); 10148c2ecf20Sopenharmony_ci if (unlikely(!req)) { 10158c2ecf20Sopenharmony_ci dev_warn(nvmeq->dev->ctrl.device, 10168c2ecf20Sopenharmony_ci "invalid id %d completed on queue %d\n", 10178c2ecf20Sopenharmony_ci command_id, le16_to_cpu(cqe->sq_id)); 10188c2ecf20Sopenharmony_ci return; 10198c2ecf20Sopenharmony_ci } 10208c2ecf20Sopenharmony_ci 10218c2ecf20Sopenharmony_ci trace_nvme_sq(req, cqe->sq_head, nvmeq->sq_tail); 10228c2ecf20Sopenharmony_ci if (!nvme_try_complete_req(req, cqe->status, cqe->result)) 10238c2ecf20Sopenharmony_ci nvme_pci_complete_rq(req); 10248c2ecf20Sopenharmony_ci} 10258c2ecf20Sopenharmony_ci 10268c2ecf20Sopenharmony_cistatic inline void nvme_update_cq_head(struct nvme_queue *nvmeq) 10278c2ecf20Sopenharmony_ci{ 10288c2ecf20Sopenharmony_ci u32 tmp = nvmeq->cq_head + 1; 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_ci if (tmp == nvmeq->q_depth) { 10318c2ecf20Sopenharmony_ci nvmeq->cq_head = 0; 10328c2ecf20Sopenharmony_ci nvmeq->cq_phase ^= 1; 10338c2ecf20Sopenharmony_ci } else { 10348c2ecf20Sopenharmony_ci nvmeq->cq_head = tmp; 10358c2ecf20Sopenharmony_ci } 10368c2ecf20Sopenharmony_ci} 10378c2ecf20Sopenharmony_ci 10388c2ecf20Sopenharmony_cistatic inline int nvme_process_cq(struct nvme_queue *nvmeq) 10398c2ecf20Sopenharmony_ci{ 10408c2ecf20Sopenharmony_ci int found = 0; 10418c2ecf20Sopenharmony_ci 10428c2ecf20Sopenharmony_ci while (nvme_cqe_pending(nvmeq)) { 10438c2ecf20Sopenharmony_ci found++; 10448c2ecf20Sopenharmony_ci /* 10458c2ecf20Sopenharmony_ci * load-load control dependency between phase and the rest of 10468c2ecf20Sopenharmony_ci * the cqe requires a full read memory barrier 10478c2ecf20Sopenharmony_ci */ 10488c2ecf20Sopenharmony_ci dma_rmb(); 10498c2ecf20Sopenharmony_ci nvme_handle_cqe(nvmeq, nvmeq->cq_head); 10508c2ecf20Sopenharmony_ci nvme_update_cq_head(nvmeq); 10518c2ecf20Sopenharmony_ci } 10528c2ecf20Sopenharmony_ci 10538c2ecf20Sopenharmony_ci if (found) 10548c2ecf20Sopenharmony_ci nvme_ring_cq_doorbell(nvmeq); 10558c2ecf20Sopenharmony_ci return found; 10568c2ecf20Sopenharmony_ci} 10578c2ecf20Sopenharmony_ci 10588c2ecf20Sopenharmony_cistatic irqreturn_t nvme_irq(int irq, void *data) 10598c2ecf20Sopenharmony_ci{ 10608c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = data; 10618c2ecf20Sopenharmony_ci irqreturn_t ret = IRQ_NONE; 10628c2ecf20Sopenharmony_ci 10638c2ecf20Sopenharmony_ci /* 10648c2ecf20Sopenharmony_ci * The rmb/wmb pair ensures we see all updates from a previous run of 10658c2ecf20Sopenharmony_ci * the irq handler, even if that was on another CPU. 10668c2ecf20Sopenharmony_ci */ 10678c2ecf20Sopenharmony_ci rmb(); 10688c2ecf20Sopenharmony_ci if (nvme_process_cq(nvmeq)) 10698c2ecf20Sopenharmony_ci ret = IRQ_HANDLED; 10708c2ecf20Sopenharmony_ci wmb(); 10718c2ecf20Sopenharmony_ci 10728c2ecf20Sopenharmony_ci return ret; 10738c2ecf20Sopenharmony_ci} 10748c2ecf20Sopenharmony_ci 10758c2ecf20Sopenharmony_cistatic irqreturn_t nvme_irq_check(int irq, void *data) 10768c2ecf20Sopenharmony_ci{ 10778c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = data; 10788c2ecf20Sopenharmony_ci 10798c2ecf20Sopenharmony_ci if (nvme_cqe_pending(nvmeq)) 10808c2ecf20Sopenharmony_ci return IRQ_WAKE_THREAD; 10818c2ecf20Sopenharmony_ci return IRQ_NONE; 10828c2ecf20Sopenharmony_ci} 10838c2ecf20Sopenharmony_ci 10848c2ecf20Sopenharmony_ci/* 10858c2ecf20Sopenharmony_ci * Poll for completions for any interrupt driven queue 10868c2ecf20Sopenharmony_ci * Can be called from any context. 10878c2ecf20Sopenharmony_ci */ 10888c2ecf20Sopenharmony_cistatic void nvme_poll_irqdisable(struct nvme_queue *nvmeq) 10898c2ecf20Sopenharmony_ci{ 10908c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); 10918c2ecf20Sopenharmony_ci 10928c2ecf20Sopenharmony_ci WARN_ON_ONCE(test_bit(NVMEQ_POLLED, &nvmeq->flags)); 10938c2ecf20Sopenharmony_ci 10948c2ecf20Sopenharmony_ci disable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); 10958c2ecf20Sopenharmony_ci nvme_process_cq(nvmeq); 10968c2ecf20Sopenharmony_ci enable_irq(pci_irq_vector(pdev, nvmeq->cq_vector)); 10978c2ecf20Sopenharmony_ci} 10988c2ecf20Sopenharmony_ci 10998c2ecf20Sopenharmony_cistatic int nvme_poll(struct blk_mq_hw_ctx *hctx) 11008c2ecf20Sopenharmony_ci{ 11018c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = hctx->driver_data; 11028c2ecf20Sopenharmony_ci bool found; 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci if (!nvme_cqe_pending(nvmeq)) 11058c2ecf20Sopenharmony_ci return 0; 11068c2ecf20Sopenharmony_ci 11078c2ecf20Sopenharmony_ci spin_lock(&nvmeq->cq_poll_lock); 11088c2ecf20Sopenharmony_ci found = nvme_process_cq(nvmeq); 11098c2ecf20Sopenharmony_ci spin_unlock(&nvmeq->cq_poll_lock); 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_ci return found; 11128c2ecf20Sopenharmony_ci} 11138c2ecf20Sopenharmony_ci 11148c2ecf20Sopenharmony_cistatic void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl) 11158c2ecf20Sopenharmony_ci{ 11168c2ecf20Sopenharmony_ci struct nvme_dev *dev = to_nvme_dev(ctrl); 11178c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = &dev->queues[0]; 11188c2ecf20Sopenharmony_ci struct nvme_command c; 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci memset(&c, 0, sizeof(c)); 11218c2ecf20Sopenharmony_ci c.common.opcode = nvme_admin_async_event; 11228c2ecf20Sopenharmony_ci c.common.command_id = NVME_AQ_BLK_MQ_DEPTH; 11238c2ecf20Sopenharmony_ci nvme_submit_cmd(nvmeq, &c, true); 11248c2ecf20Sopenharmony_ci} 11258c2ecf20Sopenharmony_ci 11268c2ecf20Sopenharmony_cistatic int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) 11278c2ecf20Sopenharmony_ci{ 11288c2ecf20Sopenharmony_ci struct nvme_command c; 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci memset(&c, 0, sizeof(c)); 11318c2ecf20Sopenharmony_ci c.delete_queue.opcode = opcode; 11328c2ecf20Sopenharmony_ci c.delete_queue.qid = cpu_to_le16(id); 11338c2ecf20Sopenharmony_ci 11348c2ecf20Sopenharmony_ci return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); 11358c2ecf20Sopenharmony_ci} 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_cistatic int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, 11388c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq, s16 vector) 11398c2ecf20Sopenharmony_ci{ 11408c2ecf20Sopenharmony_ci struct nvme_command c; 11418c2ecf20Sopenharmony_ci int flags = NVME_QUEUE_PHYS_CONTIG; 11428c2ecf20Sopenharmony_ci 11438c2ecf20Sopenharmony_ci if (!test_bit(NVMEQ_POLLED, &nvmeq->flags)) 11448c2ecf20Sopenharmony_ci flags |= NVME_CQ_IRQ_ENABLED; 11458c2ecf20Sopenharmony_ci 11468c2ecf20Sopenharmony_ci /* 11478c2ecf20Sopenharmony_ci * Note: we (ab)use the fact that the prp fields survive if no data 11488c2ecf20Sopenharmony_ci * is attached to the request. 11498c2ecf20Sopenharmony_ci */ 11508c2ecf20Sopenharmony_ci memset(&c, 0, sizeof(c)); 11518c2ecf20Sopenharmony_ci c.create_cq.opcode = nvme_admin_create_cq; 11528c2ecf20Sopenharmony_ci c.create_cq.prp1 = cpu_to_le64(nvmeq->cq_dma_addr); 11538c2ecf20Sopenharmony_ci c.create_cq.cqid = cpu_to_le16(qid); 11548c2ecf20Sopenharmony_ci c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 11558c2ecf20Sopenharmony_ci c.create_cq.cq_flags = cpu_to_le16(flags); 11568c2ecf20Sopenharmony_ci c.create_cq.irq_vector = cpu_to_le16(vector); 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); 11598c2ecf20Sopenharmony_ci} 11608c2ecf20Sopenharmony_ci 11618c2ecf20Sopenharmony_cistatic int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, 11628c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq) 11638c2ecf20Sopenharmony_ci{ 11648c2ecf20Sopenharmony_ci struct nvme_ctrl *ctrl = &dev->ctrl; 11658c2ecf20Sopenharmony_ci struct nvme_command c; 11668c2ecf20Sopenharmony_ci int flags = NVME_QUEUE_PHYS_CONTIG; 11678c2ecf20Sopenharmony_ci 11688c2ecf20Sopenharmony_ci /* 11698c2ecf20Sopenharmony_ci * Some drives have a bug that auto-enables WRRU if MEDIUM isn't 11708c2ecf20Sopenharmony_ci * set. Since URGENT priority is zeroes, it makes all queues 11718c2ecf20Sopenharmony_ci * URGENT. 11728c2ecf20Sopenharmony_ci */ 11738c2ecf20Sopenharmony_ci if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ) 11748c2ecf20Sopenharmony_ci flags |= NVME_SQ_PRIO_MEDIUM; 11758c2ecf20Sopenharmony_ci 11768c2ecf20Sopenharmony_ci /* 11778c2ecf20Sopenharmony_ci * Note: we (ab)use the fact that the prp fields survive if no data 11788c2ecf20Sopenharmony_ci * is attached to the request. 11798c2ecf20Sopenharmony_ci */ 11808c2ecf20Sopenharmony_ci memset(&c, 0, sizeof(c)); 11818c2ecf20Sopenharmony_ci c.create_sq.opcode = nvme_admin_create_sq; 11828c2ecf20Sopenharmony_ci c.create_sq.prp1 = cpu_to_le64(nvmeq->sq_dma_addr); 11838c2ecf20Sopenharmony_ci c.create_sq.sqid = cpu_to_le16(qid); 11848c2ecf20Sopenharmony_ci c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); 11858c2ecf20Sopenharmony_ci c.create_sq.sq_flags = cpu_to_le16(flags); 11868c2ecf20Sopenharmony_ci c.create_sq.cqid = cpu_to_le16(qid); 11878c2ecf20Sopenharmony_ci 11888c2ecf20Sopenharmony_ci return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); 11898c2ecf20Sopenharmony_ci} 11908c2ecf20Sopenharmony_ci 11918c2ecf20Sopenharmony_cistatic int adapter_delete_cq(struct nvme_dev *dev, u16 cqid) 11928c2ecf20Sopenharmony_ci{ 11938c2ecf20Sopenharmony_ci return adapter_delete_queue(dev, nvme_admin_delete_cq, cqid); 11948c2ecf20Sopenharmony_ci} 11958c2ecf20Sopenharmony_ci 11968c2ecf20Sopenharmony_cistatic int adapter_delete_sq(struct nvme_dev *dev, u16 sqid) 11978c2ecf20Sopenharmony_ci{ 11988c2ecf20Sopenharmony_ci return adapter_delete_queue(dev, nvme_admin_delete_sq, sqid); 11998c2ecf20Sopenharmony_ci} 12008c2ecf20Sopenharmony_ci 12018c2ecf20Sopenharmony_cistatic void abort_endio(struct request *req, blk_status_t error) 12028c2ecf20Sopenharmony_ci{ 12038c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 12048c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = iod->nvmeq; 12058c2ecf20Sopenharmony_ci 12068c2ecf20Sopenharmony_ci dev_warn(nvmeq->dev->ctrl.device, 12078c2ecf20Sopenharmony_ci "Abort status: 0x%x", nvme_req(req)->status); 12088c2ecf20Sopenharmony_ci atomic_inc(&nvmeq->dev->ctrl.abort_limit); 12098c2ecf20Sopenharmony_ci blk_mq_free_request(req); 12108c2ecf20Sopenharmony_ci} 12118c2ecf20Sopenharmony_ci 12128c2ecf20Sopenharmony_cistatic bool nvme_should_reset(struct nvme_dev *dev, u32 csts) 12138c2ecf20Sopenharmony_ci{ 12148c2ecf20Sopenharmony_ci /* If true, indicates loss of adapter communication, possibly by a 12158c2ecf20Sopenharmony_ci * NVMe Subsystem reset. 12168c2ecf20Sopenharmony_ci */ 12178c2ecf20Sopenharmony_ci bool nssro = dev->subsystem && (csts & NVME_CSTS_NSSRO); 12188c2ecf20Sopenharmony_ci 12198c2ecf20Sopenharmony_ci /* If there is a reset/reinit ongoing, we shouldn't reset again. */ 12208c2ecf20Sopenharmony_ci switch (dev->ctrl.state) { 12218c2ecf20Sopenharmony_ci case NVME_CTRL_RESETTING: 12228c2ecf20Sopenharmony_ci case NVME_CTRL_CONNECTING: 12238c2ecf20Sopenharmony_ci return false; 12248c2ecf20Sopenharmony_ci default: 12258c2ecf20Sopenharmony_ci break; 12268c2ecf20Sopenharmony_ci } 12278c2ecf20Sopenharmony_ci 12288c2ecf20Sopenharmony_ci /* We shouldn't reset unless the controller is on fatal error state 12298c2ecf20Sopenharmony_ci * _or_ if we lost the communication with it. 12308c2ecf20Sopenharmony_ci */ 12318c2ecf20Sopenharmony_ci if (!(csts & NVME_CSTS_CFS) && !nssro) 12328c2ecf20Sopenharmony_ci return false; 12338c2ecf20Sopenharmony_ci 12348c2ecf20Sopenharmony_ci return true; 12358c2ecf20Sopenharmony_ci} 12368c2ecf20Sopenharmony_ci 12378c2ecf20Sopenharmony_cistatic void nvme_warn_reset(struct nvme_dev *dev, u32 csts) 12388c2ecf20Sopenharmony_ci{ 12398c2ecf20Sopenharmony_ci /* Read a config register to help see what died. */ 12408c2ecf20Sopenharmony_ci u16 pci_status; 12418c2ecf20Sopenharmony_ci int result; 12428c2ecf20Sopenharmony_ci 12438c2ecf20Sopenharmony_ci result = pci_read_config_word(to_pci_dev(dev->dev), PCI_STATUS, 12448c2ecf20Sopenharmony_ci &pci_status); 12458c2ecf20Sopenharmony_ci if (result == PCIBIOS_SUCCESSFUL) 12468c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 12478c2ecf20Sopenharmony_ci "controller is down; will reset: CSTS=0x%x, PCI_STATUS=0x%hx\n", 12488c2ecf20Sopenharmony_ci csts, pci_status); 12498c2ecf20Sopenharmony_ci else 12508c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 12518c2ecf20Sopenharmony_ci "controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n", 12528c2ecf20Sopenharmony_ci csts, result); 12538c2ecf20Sopenharmony_ci} 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_cistatic enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) 12568c2ecf20Sopenharmony_ci{ 12578c2ecf20Sopenharmony_ci struct nvme_iod *iod = blk_mq_rq_to_pdu(req); 12588c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = iod->nvmeq; 12598c2ecf20Sopenharmony_ci struct nvme_dev *dev = nvmeq->dev; 12608c2ecf20Sopenharmony_ci struct request *abort_req; 12618c2ecf20Sopenharmony_ci struct nvme_command cmd; 12628c2ecf20Sopenharmony_ci u32 csts = readl(dev->bar + NVME_REG_CSTS); 12638c2ecf20Sopenharmony_ci 12648c2ecf20Sopenharmony_ci /* If PCI error recovery process is happening, we cannot reset or 12658c2ecf20Sopenharmony_ci * the recovery mechanism will surely fail. 12668c2ecf20Sopenharmony_ci */ 12678c2ecf20Sopenharmony_ci mb(); 12688c2ecf20Sopenharmony_ci if (pci_channel_offline(to_pci_dev(dev->dev))) 12698c2ecf20Sopenharmony_ci return BLK_EH_RESET_TIMER; 12708c2ecf20Sopenharmony_ci 12718c2ecf20Sopenharmony_ci /* 12728c2ecf20Sopenharmony_ci * Reset immediately if the controller is failed 12738c2ecf20Sopenharmony_ci */ 12748c2ecf20Sopenharmony_ci if (nvme_should_reset(dev, csts)) { 12758c2ecf20Sopenharmony_ci nvme_warn_reset(dev, csts); 12768c2ecf20Sopenharmony_ci nvme_dev_disable(dev, false); 12778c2ecf20Sopenharmony_ci nvme_reset_ctrl(&dev->ctrl); 12788c2ecf20Sopenharmony_ci return BLK_EH_DONE; 12798c2ecf20Sopenharmony_ci } 12808c2ecf20Sopenharmony_ci 12818c2ecf20Sopenharmony_ci /* 12828c2ecf20Sopenharmony_ci * Did we miss an interrupt? 12838c2ecf20Sopenharmony_ci */ 12848c2ecf20Sopenharmony_ci if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) 12858c2ecf20Sopenharmony_ci nvme_poll(req->mq_hctx); 12868c2ecf20Sopenharmony_ci else 12878c2ecf20Sopenharmony_ci nvme_poll_irqdisable(nvmeq); 12888c2ecf20Sopenharmony_ci 12898c2ecf20Sopenharmony_ci if (blk_mq_rq_state(req) != MQ_RQ_IN_FLIGHT) { 12908c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 12918c2ecf20Sopenharmony_ci "I/O %d QID %d timeout, completion polled\n", 12928c2ecf20Sopenharmony_ci req->tag, nvmeq->qid); 12938c2ecf20Sopenharmony_ci return BLK_EH_DONE; 12948c2ecf20Sopenharmony_ci } 12958c2ecf20Sopenharmony_ci 12968c2ecf20Sopenharmony_ci /* 12978c2ecf20Sopenharmony_ci * Shutdown immediately if controller times out while starting. The 12988c2ecf20Sopenharmony_ci * reset work will see the pci device disabled when it gets the forced 12998c2ecf20Sopenharmony_ci * cancellation error. All outstanding requests are completed on 13008c2ecf20Sopenharmony_ci * shutdown, so we return BLK_EH_DONE. 13018c2ecf20Sopenharmony_ci */ 13028c2ecf20Sopenharmony_ci switch (dev->ctrl.state) { 13038c2ecf20Sopenharmony_ci case NVME_CTRL_CONNECTING: 13048c2ecf20Sopenharmony_ci nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); 13058c2ecf20Sopenharmony_ci fallthrough; 13068c2ecf20Sopenharmony_ci case NVME_CTRL_DELETING: 13078c2ecf20Sopenharmony_ci dev_warn_ratelimited(dev->ctrl.device, 13088c2ecf20Sopenharmony_ci "I/O %d QID %d timeout, disable controller\n", 13098c2ecf20Sopenharmony_ci req->tag, nvmeq->qid); 13108c2ecf20Sopenharmony_ci nvme_req(req)->flags |= NVME_REQ_CANCELLED; 13118c2ecf20Sopenharmony_ci nvme_dev_disable(dev, true); 13128c2ecf20Sopenharmony_ci return BLK_EH_DONE; 13138c2ecf20Sopenharmony_ci case NVME_CTRL_RESETTING: 13148c2ecf20Sopenharmony_ci return BLK_EH_RESET_TIMER; 13158c2ecf20Sopenharmony_ci default: 13168c2ecf20Sopenharmony_ci break; 13178c2ecf20Sopenharmony_ci } 13188c2ecf20Sopenharmony_ci 13198c2ecf20Sopenharmony_ci /* 13208c2ecf20Sopenharmony_ci * Shutdown the controller immediately and schedule a reset if the 13218c2ecf20Sopenharmony_ci * command was already aborted once before and still hasn't been 13228c2ecf20Sopenharmony_ci * returned to the driver, or if this is the admin queue. 13238c2ecf20Sopenharmony_ci */ 13248c2ecf20Sopenharmony_ci if (!nvmeq->qid || iod->aborted) { 13258c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 13268c2ecf20Sopenharmony_ci "I/O %d QID %d timeout, reset controller\n", 13278c2ecf20Sopenharmony_ci req->tag, nvmeq->qid); 13288c2ecf20Sopenharmony_ci nvme_req(req)->flags |= NVME_REQ_CANCELLED; 13298c2ecf20Sopenharmony_ci nvme_dev_disable(dev, false); 13308c2ecf20Sopenharmony_ci nvme_reset_ctrl(&dev->ctrl); 13318c2ecf20Sopenharmony_ci 13328c2ecf20Sopenharmony_ci return BLK_EH_DONE; 13338c2ecf20Sopenharmony_ci } 13348c2ecf20Sopenharmony_ci 13358c2ecf20Sopenharmony_ci if (atomic_dec_return(&dev->ctrl.abort_limit) < 0) { 13368c2ecf20Sopenharmony_ci atomic_inc(&dev->ctrl.abort_limit); 13378c2ecf20Sopenharmony_ci return BLK_EH_RESET_TIMER; 13388c2ecf20Sopenharmony_ci } 13398c2ecf20Sopenharmony_ci iod->aborted = 1; 13408c2ecf20Sopenharmony_ci 13418c2ecf20Sopenharmony_ci memset(&cmd, 0, sizeof(cmd)); 13428c2ecf20Sopenharmony_ci cmd.abort.opcode = nvme_admin_abort_cmd; 13438c2ecf20Sopenharmony_ci cmd.abort.cid = nvme_cid(req); 13448c2ecf20Sopenharmony_ci cmd.abort.sqid = cpu_to_le16(nvmeq->qid); 13458c2ecf20Sopenharmony_ci 13468c2ecf20Sopenharmony_ci dev_warn(nvmeq->dev->ctrl.device, 13478c2ecf20Sopenharmony_ci "I/O %d QID %d timeout, aborting\n", 13488c2ecf20Sopenharmony_ci req->tag, nvmeq->qid); 13498c2ecf20Sopenharmony_ci 13508c2ecf20Sopenharmony_ci abort_req = nvme_alloc_request(dev->ctrl.admin_q, &cmd, 13518c2ecf20Sopenharmony_ci BLK_MQ_REQ_NOWAIT); 13528c2ecf20Sopenharmony_ci if (IS_ERR(abort_req)) { 13538c2ecf20Sopenharmony_ci atomic_inc(&dev->ctrl.abort_limit); 13548c2ecf20Sopenharmony_ci return BLK_EH_RESET_TIMER; 13558c2ecf20Sopenharmony_ci } 13568c2ecf20Sopenharmony_ci 13578c2ecf20Sopenharmony_ci abort_req->end_io_data = NULL; 13588c2ecf20Sopenharmony_ci blk_execute_rq_nowait(abort_req->q, NULL, abort_req, 0, abort_endio); 13598c2ecf20Sopenharmony_ci 13608c2ecf20Sopenharmony_ci /* 13618c2ecf20Sopenharmony_ci * The aborted req will be completed on receiving the abort req. 13628c2ecf20Sopenharmony_ci * We enable the timer again. If hit twice, it'll cause a device reset, 13638c2ecf20Sopenharmony_ci * as the device then is in a faulty state. 13648c2ecf20Sopenharmony_ci */ 13658c2ecf20Sopenharmony_ci return BLK_EH_RESET_TIMER; 13668c2ecf20Sopenharmony_ci} 13678c2ecf20Sopenharmony_ci 13688c2ecf20Sopenharmony_cistatic void nvme_free_queue(struct nvme_queue *nvmeq) 13698c2ecf20Sopenharmony_ci{ 13708c2ecf20Sopenharmony_ci dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq), 13718c2ecf20Sopenharmony_ci (void *)nvmeq->cqes, nvmeq->cq_dma_addr); 13728c2ecf20Sopenharmony_ci if (!nvmeq->sq_cmds) 13738c2ecf20Sopenharmony_ci return; 13748c2ecf20Sopenharmony_ci 13758c2ecf20Sopenharmony_ci if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { 13768c2ecf20Sopenharmony_ci pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), 13778c2ecf20Sopenharmony_ci nvmeq->sq_cmds, SQ_SIZE(nvmeq)); 13788c2ecf20Sopenharmony_ci } else { 13798c2ecf20Sopenharmony_ci dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq), 13808c2ecf20Sopenharmony_ci nvmeq->sq_cmds, nvmeq->sq_dma_addr); 13818c2ecf20Sopenharmony_ci } 13828c2ecf20Sopenharmony_ci} 13838c2ecf20Sopenharmony_ci 13848c2ecf20Sopenharmony_cistatic void nvme_free_queues(struct nvme_dev *dev, int lowest) 13858c2ecf20Sopenharmony_ci{ 13868c2ecf20Sopenharmony_ci int i; 13878c2ecf20Sopenharmony_ci 13888c2ecf20Sopenharmony_ci for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) { 13898c2ecf20Sopenharmony_ci dev->ctrl.queue_count--; 13908c2ecf20Sopenharmony_ci nvme_free_queue(&dev->queues[i]); 13918c2ecf20Sopenharmony_ci } 13928c2ecf20Sopenharmony_ci} 13938c2ecf20Sopenharmony_ci 13948c2ecf20Sopenharmony_ci/** 13958c2ecf20Sopenharmony_ci * nvme_suspend_queue - put queue into suspended state 13968c2ecf20Sopenharmony_ci * @nvmeq: queue to suspend 13978c2ecf20Sopenharmony_ci */ 13988c2ecf20Sopenharmony_cistatic int nvme_suspend_queue(struct nvme_queue *nvmeq) 13998c2ecf20Sopenharmony_ci{ 14008c2ecf20Sopenharmony_ci if (!test_and_clear_bit(NVMEQ_ENABLED, &nvmeq->flags)) 14018c2ecf20Sopenharmony_ci return 1; 14028c2ecf20Sopenharmony_ci 14038c2ecf20Sopenharmony_ci /* ensure that nvme_queue_rq() sees NVMEQ_ENABLED cleared */ 14048c2ecf20Sopenharmony_ci mb(); 14058c2ecf20Sopenharmony_ci 14068c2ecf20Sopenharmony_ci nvmeq->dev->online_queues--; 14078c2ecf20Sopenharmony_ci if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) 14088c2ecf20Sopenharmony_ci nvme_stop_admin_queue(&nvmeq->dev->ctrl); 14098c2ecf20Sopenharmony_ci if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) 14108c2ecf20Sopenharmony_ci pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); 14118c2ecf20Sopenharmony_ci return 0; 14128c2ecf20Sopenharmony_ci} 14138c2ecf20Sopenharmony_ci 14148c2ecf20Sopenharmony_cistatic void nvme_suspend_io_queues(struct nvme_dev *dev) 14158c2ecf20Sopenharmony_ci{ 14168c2ecf20Sopenharmony_ci int i; 14178c2ecf20Sopenharmony_ci 14188c2ecf20Sopenharmony_ci for (i = dev->ctrl.queue_count - 1; i > 0; i--) 14198c2ecf20Sopenharmony_ci nvme_suspend_queue(&dev->queues[i]); 14208c2ecf20Sopenharmony_ci} 14218c2ecf20Sopenharmony_ci 14228c2ecf20Sopenharmony_cistatic void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) 14238c2ecf20Sopenharmony_ci{ 14248c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = &dev->queues[0]; 14258c2ecf20Sopenharmony_ci 14268c2ecf20Sopenharmony_ci if (shutdown) 14278c2ecf20Sopenharmony_ci nvme_shutdown_ctrl(&dev->ctrl); 14288c2ecf20Sopenharmony_ci else 14298c2ecf20Sopenharmony_ci nvme_disable_ctrl(&dev->ctrl); 14308c2ecf20Sopenharmony_ci 14318c2ecf20Sopenharmony_ci nvme_poll_irqdisable(nvmeq); 14328c2ecf20Sopenharmony_ci} 14338c2ecf20Sopenharmony_ci 14348c2ecf20Sopenharmony_ci/* 14358c2ecf20Sopenharmony_ci * Called only on a device that has been disabled and after all other threads 14368c2ecf20Sopenharmony_ci * that can check this device's completion queues have synced, except 14378c2ecf20Sopenharmony_ci * nvme_poll(). This is the last chance for the driver to see a natural 14388c2ecf20Sopenharmony_ci * completion before nvme_cancel_request() terminates all incomplete requests. 14398c2ecf20Sopenharmony_ci */ 14408c2ecf20Sopenharmony_cistatic void nvme_reap_pending_cqes(struct nvme_dev *dev) 14418c2ecf20Sopenharmony_ci{ 14428c2ecf20Sopenharmony_ci int i; 14438c2ecf20Sopenharmony_ci 14448c2ecf20Sopenharmony_ci for (i = dev->ctrl.queue_count - 1; i > 0; i--) { 14458c2ecf20Sopenharmony_ci spin_lock(&dev->queues[i].cq_poll_lock); 14468c2ecf20Sopenharmony_ci nvme_process_cq(&dev->queues[i]); 14478c2ecf20Sopenharmony_ci spin_unlock(&dev->queues[i].cq_poll_lock); 14488c2ecf20Sopenharmony_ci } 14498c2ecf20Sopenharmony_ci} 14508c2ecf20Sopenharmony_ci 14518c2ecf20Sopenharmony_cistatic int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues, 14528c2ecf20Sopenharmony_ci int entry_size) 14538c2ecf20Sopenharmony_ci{ 14548c2ecf20Sopenharmony_ci int q_depth = dev->q_depth; 14558c2ecf20Sopenharmony_ci unsigned q_size_aligned = roundup(q_depth * entry_size, 14568c2ecf20Sopenharmony_ci NVME_CTRL_PAGE_SIZE); 14578c2ecf20Sopenharmony_ci 14588c2ecf20Sopenharmony_ci if (q_size_aligned * nr_io_queues > dev->cmb_size) { 14598c2ecf20Sopenharmony_ci u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues); 14608c2ecf20Sopenharmony_ci 14618c2ecf20Sopenharmony_ci mem_per_q = round_down(mem_per_q, NVME_CTRL_PAGE_SIZE); 14628c2ecf20Sopenharmony_ci q_depth = div_u64(mem_per_q, entry_size); 14638c2ecf20Sopenharmony_ci 14648c2ecf20Sopenharmony_ci /* 14658c2ecf20Sopenharmony_ci * Ensure the reduced q_depth is above some threshold where it 14668c2ecf20Sopenharmony_ci * would be better to map queues in system memory with the 14678c2ecf20Sopenharmony_ci * original depth 14688c2ecf20Sopenharmony_ci */ 14698c2ecf20Sopenharmony_ci if (q_depth < 64) 14708c2ecf20Sopenharmony_ci return -ENOMEM; 14718c2ecf20Sopenharmony_ci } 14728c2ecf20Sopenharmony_ci 14738c2ecf20Sopenharmony_ci return q_depth; 14748c2ecf20Sopenharmony_ci} 14758c2ecf20Sopenharmony_ci 14768c2ecf20Sopenharmony_cistatic int nvme_alloc_sq_cmds(struct nvme_dev *dev, struct nvme_queue *nvmeq, 14778c2ecf20Sopenharmony_ci int qid) 14788c2ecf20Sopenharmony_ci{ 14798c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 14808c2ecf20Sopenharmony_ci 14818c2ecf20Sopenharmony_ci if (qid && dev->cmb_use_sqes && (dev->cmbsz & NVME_CMBSZ_SQS)) { 14828c2ecf20Sopenharmony_ci nvmeq->sq_cmds = pci_alloc_p2pmem(pdev, SQ_SIZE(nvmeq)); 14838c2ecf20Sopenharmony_ci if (nvmeq->sq_cmds) { 14848c2ecf20Sopenharmony_ci nvmeq->sq_dma_addr = pci_p2pmem_virt_to_bus(pdev, 14858c2ecf20Sopenharmony_ci nvmeq->sq_cmds); 14868c2ecf20Sopenharmony_ci if (nvmeq->sq_dma_addr) { 14878c2ecf20Sopenharmony_ci set_bit(NVMEQ_SQ_CMB, &nvmeq->flags); 14888c2ecf20Sopenharmony_ci return 0; 14898c2ecf20Sopenharmony_ci } 14908c2ecf20Sopenharmony_ci 14918c2ecf20Sopenharmony_ci pci_free_p2pmem(pdev, nvmeq->sq_cmds, SQ_SIZE(nvmeq)); 14928c2ecf20Sopenharmony_ci } 14938c2ecf20Sopenharmony_ci } 14948c2ecf20Sopenharmony_ci 14958c2ecf20Sopenharmony_ci nvmeq->sq_cmds = dma_alloc_coherent(dev->dev, SQ_SIZE(nvmeq), 14968c2ecf20Sopenharmony_ci &nvmeq->sq_dma_addr, GFP_KERNEL); 14978c2ecf20Sopenharmony_ci if (!nvmeq->sq_cmds) 14988c2ecf20Sopenharmony_ci return -ENOMEM; 14998c2ecf20Sopenharmony_ci return 0; 15008c2ecf20Sopenharmony_ci} 15018c2ecf20Sopenharmony_ci 15028c2ecf20Sopenharmony_cistatic int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) 15038c2ecf20Sopenharmony_ci{ 15048c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = &dev->queues[qid]; 15058c2ecf20Sopenharmony_ci 15068c2ecf20Sopenharmony_ci if (dev->ctrl.queue_count > qid) 15078c2ecf20Sopenharmony_ci return 0; 15088c2ecf20Sopenharmony_ci 15098c2ecf20Sopenharmony_ci nvmeq->sqes = qid ? dev->io_sqes : NVME_ADM_SQES; 15108c2ecf20Sopenharmony_ci nvmeq->q_depth = depth; 15118c2ecf20Sopenharmony_ci nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(nvmeq), 15128c2ecf20Sopenharmony_ci &nvmeq->cq_dma_addr, GFP_KERNEL); 15138c2ecf20Sopenharmony_ci if (!nvmeq->cqes) 15148c2ecf20Sopenharmony_ci goto free_nvmeq; 15158c2ecf20Sopenharmony_ci 15168c2ecf20Sopenharmony_ci if (nvme_alloc_sq_cmds(dev, nvmeq, qid)) 15178c2ecf20Sopenharmony_ci goto free_cqdma; 15188c2ecf20Sopenharmony_ci 15198c2ecf20Sopenharmony_ci nvmeq->dev = dev; 15208c2ecf20Sopenharmony_ci spin_lock_init(&nvmeq->sq_lock); 15218c2ecf20Sopenharmony_ci spin_lock_init(&nvmeq->cq_poll_lock); 15228c2ecf20Sopenharmony_ci nvmeq->cq_head = 0; 15238c2ecf20Sopenharmony_ci nvmeq->cq_phase = 1; 15248c2ecf20Sopenharmony_ci nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 15258c2ecf20Sopenharmony_ci nvmeq->qid = qid; 15268c2ecf20Sopenharmony_ci dev->ctrl.queue_count++; 15278c2ecf20Sopenharmony_ci 15288c2ecf20Sopenharmony_ci return 0; 15298c2ecf20Sopenharmony_ci 15308c2ecf20Sopenharmony_ci free_cqdma: 15318c2ecf20Sopenharmony_ci dma_free_coherent(dev->dev, CQ_SIZE(nvmeq), (void *)nvmeq->cqes, 15328c2ecf20Sopenharmony_ci nvmeq->cq_dma_addr); 15338c2ecf20Sopenharmony_ci free_nvmeq: 15348c2ecf20Sopenharmony_ci return -ENOMEM; 15358c2ecf20Sopenharmony_ci} 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_cistatic int queue_request_irq(struct nvme_queue *nvmeq) 15388c2ecf20Sopenharmony_ci{ 15398c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(nvmeq->dev->dev); 15408c2ecf20Sopenharmony_ci int nr = nvmeq->dev->ctrl.instance; 15418c2ecf20Sopenharmony_ci 15428c2ecf20Sopenharmony_ci if (use_threaded_interrupts) { 15438c2ecf20Sopenharmony_ci return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq_check, 15448c2ecf20Sopenharmony_ci nvme_irq, nvmeq, "nvme%dq%d", nr, nvmeq->qid); 15458c2ecf20Sopenharmony_ci } else { 15468c2ecf20Sopenharmony_ci return pci_request_irq(pdev, nvmeq->cq_vector, nvme_irq, 15478c2ecf20Sopenharmony_ci NULL, nvmeq, "nvme%dq%d", nr, nvmeq->qid); 15488c2ecf20Sopenharmony_ci } 15498c2ecf20Sopenharmony_ci} 15508c2ecf20Sopenharmony_ci 15518c2ecf20Sopenharmony_cistatic void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) 15528c2ecf20Sopenharmony_ci{ 15538c2ecf20Sopenharmony_ci struct nvme_dev *dev = nvmeq->dev; 15548c2ecf20Sopenharmony_ci 15558c2ecf20Sopenharmony_ci nvmeq->sq_tail = 0; 15568c2ecf20Sopenharmony_ci nvmeq->last_sq_tail = 0; 15578c2ecf20Sopenharmony_ci nvmeq->cq_head = 0; 15588c2ecf20Sopenharmony_ci nvmeq->cq_phase = 1; 15598c2ecf20Sopenharmony_ci nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; 15608c2ecf20Sopenharmony_ci memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq)); 15618c2ecf20Sopenharmony_ci nvme_dbbuf_init(dev, nvmeq, qid); 15628c2ecf20Sopenharmony_ci dev->online_queues++; 15638c2ecf20Sopenharmony_ci wmb(); /* ensure the first interrupt sees the initialization */ 15648c2ecf20Sopenharmony_ci} 15658c2ecf20Sopenharmony_ci 15668c2ecf20Sopenharmony_cistatic int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) 15678c2ecf20Sopenharmony_ci{ 15688c2ecf20Sopenharmony_ci struct nvme_dev *dev = nvmeq->dev; 15698c2ecf20Sopenharmony_ci int result; 15708c2ecf20Sopenharmony_ci u16 vector = 0; 15718c2ecf20Sopenharmony_ci 15728c2ecf20Sopenharmony_ci clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); 15738c2ecf20Sopenharmony_ci 15748c2ecf20Sopenharmony_ci /* 15758c2ecf20Sopenharmony_ci * A queue's vector matches the queue identifier unless the controller 15768c2ecf20Sopenharmony_ci * has only one vector available. 15778c2ecf20Sopenharmony_ci */ 15788c2ecf20Sopenharmony_ci if (!polled) 15798c2ecf20Sopenharmony_ci vector = dev->num_vecs == 1 ? 0 : qid; 15808c2ecf20Sopenharmony_ci else 15818c2ecf20Sopenharmony_ci set_bit(NVMEQ_POLLED, &nvmeq->flags); 15828c2ecf20Sopenharmony_ci 15838c2ecf20Sopenharmony_ci result = adapter_alloc_cq(dev, qid, nvmeq, vector); 15848c2ecf20Sopenharmony_ci if (result) 15858c2ecf20Sopenharmony_ci return result; 15868c2ecf20Sopenharmony_ci 15878c2ecf20Sopenharmony_ci result = adapter_alloc_sq(dev, qid, nvmeq); 15888c2ecf20Sopenharmony_ci if (result < 0) 15898c2ecf20Sopenharmony_ci return result; 15908c2ecf20Sopenharmony_ci if (result) 15918c2ecf20Sopenharmony_ci goto release_cq; 15928c2ecf20Sopenharmony_ci 15938c2ecf20Sopenharmony_ci nvmeq->cq_vector = vector; 15948c2ecf20Sopenharmony_ci nvme_init_queue(nvmeq, qid); 15958c2ecf20Sopenharmony_ci 15968c2ecf20Sopenharmony_ci if (!polled) { 15978c2ecf20Sopenharmony_ci result = queue_request_irq(nvmeq); 15988c2ecf20Sopenharmony_ci if (result < 0) 15998c2ecf20Sopenharmony_ci goto release_sq; 16008c2ecf20Sopenharmony_ci } 16018c2ecf20Sopenharmony_ci 16028c2ecf20Sopenharmony_ci set_bit(NVMEQ_ENABLED, &nvmeq->flags); 16038c2ecf20Sopenharmony_ci return result; 16048c2ecf20Sopenharmony_ci 16058c2ecf20Sopenharmony_cirelease_sq: 16068c2ecf20Sopenharmony_ci dev->online_queues--; 16078c2ecf20Sopenharmony_ci adapter_delete_sq(dev, qid); 16088c2ecf20Sopenharmony_cirelease_cq: 16098c2ecf20Sopenharmony_ci adapter_delete_cq(dev, qid); 16108c2ecf20Sopenharmony_ci return result; 16118c2ecf20Sopenharmony_ci} 16128c2ecf20Sopenharmony_ci 16138c2ecf20Sopenharmony_cistatic const struct blk_mq_ops nvme_mq_admin_ops = { 16148c2ecf20Sopenharmony_ci .queue_rq = nvme_queue_rq, 16158c2ecf20Sopenharmony_ci .complete = nvme_pci_complete_rq, 16168c2ecf20Sopenharmony_ci .init_hctx = nvme_admin_init_hctx, 16178c2ecf20Sopenharmony_ci .init_request = nvme_init_request, 16188c2ecf20Sopenharmony_ci .timeout = nvme_timeout, 16198c2ecf20Sopenharmony_ci}; 16208c2ecf20Sopenharmony_ci 16218c2ecf20Sopenharmony_cistatic const struct blk_mq_ops nvme_mq_ops = { 16228c2ecf20Sopenharmony_ci .queue_rq = nvme_queue_rq, 16238c2ecf20Sopenharmony_ci .complete = nvme_pci_complete_rq, 16248c2ecf20Sopenharmony_ci .commit_rqs = nvme_commit_rqs, 16258c2ecf20Sopenharmony_ci .init_hctx = nvme_init_hctx, 16268c2ecf20Sopenharmony_ci .init_request = nvme_init_request, 16278c2ecf20Sopenharmony_ci .map_queues = nvme_pci_map_queues, 16288c2ecf20Sopenharmony_ci .timeout = nvme_timeout, 16298c2ecf20Sopenharmony_ci .poll = nvme_poll, 16308c2ecf20Sopenharmony_ci}; 16318c2ecf20Sopenharmony_ci 16328c2ecf20Sopenharmony_cistatic void nvme_dev_remove_admin(struct nvme_dev *dev) 16338c2ecf20Sopenharmony_ci{ 16348c2ecf20Sopenharmony_ci if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) { 16358c2ecf20Sopenharmony_ci /* 16368c2ecf20Sopenharmony_ci * If the controller was reset during removal, it's possible 16378c2ecf20Sopenharmony_ci * user requests may be waiting on a stopped queue. Start the 16388c2ecf20Sopenharmony_ci * queue to flush these to completion. 16398c2ecf20Sopenharmony_ci */ 16408c2ecf20Sopenharmony_ci nvme_start_admin_queue(&dev->ctrl); 16418c2ecf20Sopenharmony_ci blk_cleanup_queue(dev->ctrl.admin_q); 16428c2ecf20Sopenharmony_ci blk_mq_free_tag_set(&dev->admin_tagset); 16438c2ecf20Sopenharmony_ci } 16448c2ecf20Sopenharmony_ci} 16458c2ecf20Sopenharmony_ci 16468c2ecf20Sopenharmony_cistatic int nvme_alloc_admin_tags(struct nvme_dev *dev) 16478c2ecf20Sopenharmony_ci{ 16488c2ecf20Sopenharmony_ci if (!dev->ctrl.admin_q) { 16498c2ecf20Sopenharmony_ci dev->admin_tagset.ops = &nvme_mq_admin_ops; 16508c2ecf20Sopenharmony_ci dev->admin_tagset.nr_hw_queues = 1; 16518c2ecf20Sopenharmony_ci 16528c2ecf20Sopenharmony_ci dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; 16538c2ecf20Sopenharmony_ci dev->admin_tagset.timeout = ADMIN_TIMEOUT; 16548c2ecf20Sopenharmony_ci dev->admin_tagset.numa_node = dev->ctrl.numa_node; 16558c2ecf20Sopenharmony_ci dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); 16568c2ecf20Sopenharmony_ci dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; 16578c2ecf20Sopenharmony_ci dev->admin_tagset.driver_data = dev; 16588c2ecf20Sopenharmony_ci 16598c2ecf20Sopenharmony_ci if (blk_mq_alloc_tag_set(&dev->admin_tagset)) 16608c2ecf20Sopenharmony_ci return -ENOMEM; 16618c2ecf20Sopenharmony_ci dev->ctrl.admin_tagset = &dev->admin_tagset; 16628c2ecf20Sopenharmony_ci 16638c2ecf20Sopenharmony_ci dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); 16648c2ecf20Sopenharmony_ci if (IS_ERR(dev->ctrl.admin_q)) { 16658c2ecf20Sopenharmony_ci blk_mq_free_tag_set(&dev->admin_tagset); 16668c2ecf20Sopenharmony_ci dev->ctrl.admin_q = NULL; 16678c2ecf20Sopenharmony_ci return -ENOMEM; 16688c2ecf20Sopenharmony_ci } 16698c2ecf20Sopenharmony_ci if (!blk_get_queue(dev->ctrl.admin_q)) { 16708c2ecf20Sopenharmony_ci nvme_dev_remove_admin(dev); 16718c2ecf20Sopenharmony_ci dev->ctrl.admin_q = NULL; 16728c2ecf20Sopenharmony_ci return -ENODEV; 16738c2ecf20Sopenharmony_ci } 16748c2ecf20Sopenharmony_ci } else 16758c2ecf20Sopenharmony_ci nvme_start_admin_queue(&dev->ctrl); 16768c2ecf20Sopenharmony_ci 16778c2ecf20Sopenharmony_ci return 0; 16788c2ecf20Sopenharmony_ci} 16798c2ecf20Sopenharmony_ci 16808c2ecf20Sopenharmony_cistatic unsigned long db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues) 16818c2ecf20Sopenharmony_ci{ 16828c2ecf20Sopenharmony_ci return NVME_REG_DBS + ((nr_io_queues + 1) * 8 * dev->db_stride); 16838c2ecf20Sopenharmony_ci} 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_cistatic int nvme_remap_bar(struct nvme_dev *dev, unsigned long size) 16868c2ecf20Sopenharmony_ci{ 16878c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 16888c2ecf20Sopenharmony_ci 16898c2ecf20Sopenharmony_ci if (size <= dev->bar_mapped_size) 16908c2ecf20Sopenharmony_ci return 0; 16918c2ecf20Sopenharmony_ci if (size > pci_resource_len(pdev, 0)) 16928c2ecf20Sopenharmony_ci return -ENOMEM; 16938c2ecf20Sopenharmony_ci if (dev->bar) 16948c2ecf20Sopenharmony_ci iounmap(dev->bar); 16958c2ecf20Sopenharmony_ci dev->bar = ioremap(pci_resource_start(pdev, 0), size); 16968c2ecf20Sopenharmony_ci if (!dev->bar) { 16978c2ecf20Sopenharmony_ci dev->bar_mapped_size = 0; 16988c2ecf20Sopenharmony_ci return -ENOMEM; 16998c2ecf20Sopenharmony_ci } 17008c2ecf20Sopenharmony_ci dev->bar_mapped_size = size; 17018c2ecf20Sopenharmony_ci dev->dbs = dev->bar + NVME_REG_DBS; 17028c2ecf20Sopenharmony_ci 17038c2ecf20Sopenharmony_ci return 0; 17048c2ecf20Sopenharmony_ci} 17058c2ecf20Sopenharmony_ci 17068c2ecf20Sopenharmony_cistatic int nvme_pci_configure_admin_queue(struct nvme_dev *dev) 17078c2ecf20Sopenharmony_ci{ 17088c2ecf20Sopenharmony_ci int result; 17098c2ecf20Sopenharmony_ci u32 aqa; 17108c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq; 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_ci result = nvme_remap_bar(dev, db_bar_size(dev, 0)); 17138c2ecf20Sopenharmony_ci if (result < 0) 17148c2ecf20Sopenharmony_ci return result; 17158c2ecf20Sopenharmony_ci 17168c2ecf20Sopenharmony_ci dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? 17178c2ecf20Sopenharmony_ci NVME_CAP_NSSRC(dev->ctrl.cap) : 0; 17188c2ecf20Sopenharmony_ci 17198c2ecf20Sopenharmony_ci if (dev->subsystem && 17208c2ecf20Sopenharmony_ci (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO)) 17218c2ecf20Sopenharmony_ci writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS); 17228c2ecf20Sopenharmony_ci 17238c2ecf20Sopenharmony_ci result = nvme_disable_ctrl(&dev->ctrl); 17248c2ecf20Sopenharmony_ci if (result < 0) 17258c2ecf20Sopenharmony_ci return result; 17268c2ecf20Sopenharmony_ci 17278c2ecf20Sopenharmony_ci result = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); 17288c2ecf20Sopenharmony_ci if (result) 17298c2ecf20Sopenharmony_ci return result; 17308c2ecf20Sopenharmony_ci 17318c2ecf20Sopenharmony_ci dev->ctrl.numa_node = dev_to_node(dev->dev); 17328c2ecf20Sopenharmony_ci 17338c2ecf20Sopenharmony_ci nvmeq = &dev->queues[0]; 17348c2ecf20Sopenharmony_ci aqa = nvmeq->q_depth - 1; 17358c2ecf20Sopenharmony_ci aqa |= aqa << 16; 17368c2ecf20Sopenharmony_ci 17378c2ecf20Sopenharmony_ci writel(aqa, dev->bar + NVME_REG_AQA); 17388c2ecf20Sopenharmony_ci lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ); 17398c2ecf20Sopenharmony_ci lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ); 17408c2ecf20Sopenharmony_ci 17418c2ecf20Sopenharmony_ci result = nvme_enable_ctrl(&dev->ctrl); 17428c2ecf20Sopenharmony_ci if (result) 17438c2ecf20Sopenharmony_ci return result; 17448c2ecf20Sopenharmony_ci 17458c2ecf20Sopenharmony_ci nvmeq->cq_vector = 0; 17468c2ecf20Sopenharmony_ci nvme_init_queue(nvmeq, 0); 17478c2ecf20Sopenharmony_ci result = queue_request_irq(nvmeq); 17488c2ecf20Sopenharmony_ci if (result) { 17498c2ecf20Sopenharmony_ci dev->online_queues--; 17508c2ecf20Sopenharmony_ci return result; 17518c2ecf20Sopenharmony_ci } 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci set_bit(NVMEQ_ENABLED, &nvmeq->flags); 17548c2ecf20Sopenharmony_ci return result; 17558c2ecf20Sopenharmony_ci} 17568c2ecf20Sopenharmony_ci 17578c2ecf20Sopenharmony_cistatic int nvme_create_io_queues(struct nvme_dev *dev) 17588c2ecf20Sopenharmony_ci{ 17598c2ecf20Sopenharmony_ci unsigned i, max, rw_queues; 17608c2ecf20Sopenharmony_ci int ret = 0; 17618c2ecf20Sopenharmony_ci 17628c2ecf20Sopenharmony_ci for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) { 17638c2ecf20Sopenharmony_ci if (nvme_alloc_queue(dev, i, dev->q_depth)) { 17648c2ecf20Sopenharmony_ci ret = -ENOMEM; 17658c2ecf20Sopenharmony_ci break; 17668c2ecf20Sopenharmony_ci } 17678c2ecf20Sopenharmony_ci } 17688c2ecf20Sopenharmony_ci 17698c2ecf20Sopenharmony_ci max = min(dev->max_qid, dev->ctrl.queue_count - 1); 17708c2ecf20Sopenharmony_ci if (max != 1 && dev->io_queues[HCTX_TYPE_POLL]) { 17718c2ecf20Sopenharmony_ci rw_queues = dev->io_queues[HCTX_TYPE_DEFAULT] + 17728c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_READ]; 17738c2ecf20Sopenharmony_ci } else { 17748c2ecf20Sopenharmony_ci rw_queues = max; 17758c2ecf20Sopenharmony_ci } 17768c2ecf20Sopenharmony_ci 17778c2ecf20Sopenharmony_ci for (i = dev->online_queues; i <= max; i++) { 17788c2ecf20Sopenharmony_ci bool polled = i > rw_queues; 17798c2ecf20Sopenharmony_ci 17808c2ecf20Sopenharmony_ci ret = nvme_create_queue(&dev->queues[i], i, polled); 17818c2ecf20Sopenharmony_ci if (ret) 17828c2ecf20Sopenharmony_ci break; 17838c2ecf20Sopenharmony_ci } 17848c2ecf20Sopenharmony_ci 17858c2ecf20Sopenharmony_ci /* 17868c2ecf20Sopenharmony_ci * Ignore failing Create SQ/CQ commands, we can continue with less 17878c2ecf20Sopenharmony_ci * than the desired amount of queues, and even a controller without 17888c2ecf20Sopenharmony_ci * I/O queues can still be used to issue admin commands. This might 17898c2ecf20Sopenharmony_ci * be useful to upgrade a buggy firmware for example. 17908c2ecf20Sopenharmony_ci */ 17918c2ecf20Sopenharmony_ci return ret >= 0 ? 0 : ret; 17928c2ecf20Sopenharmony_ci} 17938c2ecf20Sopenharmony_ci 17948c2ecf20Sopenharmony_cistatic ssize_t nvme_cmb_show(struct device *dev, 17958c2ecf20Sopenharmony_ci struct device_attribute *attr, 17968c2ecf20Sopenharmony_ci char *buf) 17978c2ecf20Sopenharmony_ci{ 17988c2ecf20Sopenharmony_ci struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); 17998c2ecf20Sopenharmony_ci 18008c2ecf20Sopenharmony_ci return scnprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n", 18018c2ecf20Sopenharmony_ci ndev->cmbloc, ndev->cmbsz); 18028c2ecf20Sopenharmony_ci} 18038c2ecf20Sopenharmony_cistatic DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL); 18048c2ecf20Sopenharmony_ci 18058c2ecf20Sopenharmony_cistatic u64 nvme_cmb_size_unit(struct nvme_dev *dev) 18068c2ecf20Sopenharmony_ci{ 18078c2ecf20Sopenharmony_ci u8 szu = (dev->cmbsz >> NVME_CMBSZ_SZU_SHIFT) & NVME_CMBSZ_SZU_MASK; 18088c2ecf20Sopenharmony_ci 18098c2ecf20Sopenharmony_ci return 1ULL << (12 + 4 * szu); 18108c2ecf20Sopenharmony_ci} 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_cistatic u32 nvme_cmb_size(struct nvme_dev *dev) 18138c2ecf20Sopenharmony_ci{ 18148c2ecf20Sopenharmony_ci return (dev->cmbsz >> NVME_CMBSZ_SZ_SHIFT) & NVME_CMBSZ_SZ_MASK; 18158c2ecf20Sopenharmony_ci} 18168c2ecf20Sopenharmony_ci 18178c2ecf20Sopenharmony_cistatic void nvme_map_cmb(struct nvme_dev *dev) 18188c2ecf20Sopenharmony_ci{ 18198c2ecf20Sopenharmony_ci u64 size, offset; 18208c2ecf20Sopenharmony_ci resource_size_t bar_size; 18218c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 18228c2ecf20Sopenharmony_ci int bar; 18238c2ecf20Sopenharmony_ci 18248c2ecf20Sopenharmony_ci if (dev->cmb_size) 18258c2ecf20Sopenharmony_ci return; 18268c2ecf20Sopenharmony_ci 18278c2ecf20Sopenharmony_ci if (NVME_CAP_CMBS(dev->ctrl.cap)) 18288c2ecf20Sopenharmony_ci writel(NVME_CMBMSC_CRE, dev->bar + NVME_REG_CMBMSC); 18298c2ecf20Sopenharmony_ci 18308c2ecf20Sopenharmony_ci dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); 18318c2ecf20Sopenharmony_ci if (!dev->cmbsz) 18328c2ecf20Sopenharmony_ci return; 18338c2ecf20Sopenharmony_ci dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC); 18348c2ecf20Sopenharmony_ci 18358c2ecf20Sopenharmony_ci size = nvme_cmb_size_unit(dev) * nvme_cmb_size(dev); 18368c2ecf20Sopenharmony_ci offset = nvme_cmb_size_unit(dev) * NVME_CMB_OFST(dev->cmbloc); 18378c2ecf20Sopenharmony_ci bar = NVME_CMB_BIR(dev->cmbloc); 18388c2ecf20Sopenharmony_ci bar_size = pci_resource_len(pdev, bar); 18398c2ecf20Sopenharmony_ci 18408c2ecf20Sopenharmony_ci if (offset > bar_size) 18418c2ecf20Sopenharmony_ci return; 18428c2ecf20Sopenharmony_ci 18438c2ecf20Sopenharmony_ci /* 18448c2ecf20Sopenharmony_ci * Tell the controller about the host side address mapping the CMB, 18458c2ecf20Sopenharmony_ci * and enable CMB decoding for the NVMe 1.4+ scheme: 18468c2ecf20Sopenharmony_ci */ 18478c2ecf20Sopenharmony_ci if (NVME_CAP_CMBS(dev->ctrl.cap)) { 18488c2ecf20Sopenharmony_ci hi_lo_writeq(NVME_CMBMSC_CRE | NVME_CMBMSC_CMSE | 18498c2ecf20Sopenharmony_ci (pci_bus_address(pdev, bar) + offset), 18508c2ecf20Sopenharmony_ci dev->bar + NVME_REG_CMBMSC); 18518c2ecf20Sopenharmony_ci } 18528c2ecf20Sopenharmony_ci 18538c2ecf20Sopenharmony_ci /* 18548c2ecf20Sopenharmony_ci * Controllers may support a CMB size larger than their BAR, 18558c2ecf20Sopenharmony_ci * for example, due to being behind a bridge. Reduce the CMB to 18568c2ecf20Sopenharmony_ci * the reported size of the BAR 18578c2ecf20Sopenharmony_ci */ 18588c2ecf20Sopenharmony_ci if (size > bar_size - offset) 18598c2ecf20Sopenharmony_ci size = bar_size - offset; 18608c2ecf20Sopenharmony_ci 18618c2ecf20Sopenharmony_ci if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { 18628c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 18638c2ecf20Sopenharmony_ci "failed to register the CMB\n"); 18648c2ecf20Sopenharmony_ci return; 18658c2ecf20Sopenharmony_ci } 18668c2ecf20Sopenharmony_ci 18678c2ecf20Sopenharmony_ci dev->cmb_size = size; 18688c2ecf20Sopenharmony_ci dev->cmb_use_sqes = use_cmb_sqes && (dev->cmbsz & NVME_CMBSZ_SQS); 18698c2ecf20Sopenharmony_ci 18708c2ecf20Sopenharmony_ci if ((dev->cmbsz & (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) == 18718c2ecf20Sopenharmony_ci (NVME_CMBSZ_WDS | NVME_CMBSZ_RDS)) 18728c2ecf20Sopenharmony_ci pci_p2pmem_publish(pdev, true); 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, 18758c2ecf20Sopenharmony_ci &dev_attr_cmb.attr, NULL)) 18768c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 18778c2ecf20Sopenharmony_ci "failed to add sysfs attribute for CMB\n"); 18788c2ecf20Sopenharmony_ci} 18798c2ecf20Sopenharmony_ci 18808c2ecf20Sopenharmony_cistatic inline void nvme_release_cmb(struct nvme_dev *dev) 18818c2ecf20Sopenharmony_ci{ 18828c2ecf20Sopenharmony_ci if (dev->cmb_size) { 18838c2ecf20Sopenharmony_ci sysfs_remove_file_from_group(&dev->ctrl.device->kobj, 18848c2ecf20Sopenharmony_ci &dev_attr_cmb.attr, NULL); 18858c2ecf20Sopenharmony_ci dev->cmb_size = 0; 18868c2ecf20Sopenharmony_ci } 18878c2ecf20Sopenharmony_ci} 18888c2ecf20Sopenharmony_ci 18898c2ecf20Sopenharmony_cistatic int nvme_set_host_mem(struct nvme_dev *dev, u32 bits) 18908c2ecf20Sopenharmony_ci{ 18918c2ecf20Sopenharmony_ci u32 host_mem_size = dev->host_mem_size >> NVME_CTRL_PAGE_SHIFT; 18928c2ecf20Sopenharmony_ci u64 dma_addr = dev->host_mem_descs_dma; 18938c2ecf20Sopenharmony_ci struct nvme_command c; 18948c2ecf20Sopenharmony_ci int ret; 18958c2ecf20Sopenharmony_ci 18968c2ecf20Sopenharmony_ci memset(&c, 0, sizeof(c)); 18978c2ecf20Sopenharmony_ci c.features.opcode = nvme_admin_set_features; 18988c2ecf20Sopenharmony_ci c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF); 18998c2ecf20Sopenharmony_ci c.features.dword11 = cpu_to_le32(bits); 19008c2ecf20Sopenharmony_ci c.features.dword12 = cpu_to_le32(host_mem_size); 19018c2ecf20Sopenharmony_ci c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr)); 19028c2ecf20Sopenharmony_ci c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr)); 19038c2ecf20Sopenharmony_ci c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs); 19048c2ecf20Sopenharmony_ci 19058c2ecf20Sopenharmony_ci ret = nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); 19068c2ecf20Sopenharmony_ci if (ret) { 19078c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 19088c2ecf20Sopenharmony_ci "failed to set host mem (err %d, flags %#x).\n", 19098c2ecf20Sopenharmony_ci ret, bits); 19108c2ecf20Sopenharmony_ci } 19118c2ecf20Sopenharmony_ci return ret; 19128c2ecf20Sopenharmony_ci} 19138c2ecf20Sopenharmony_ci 19148c2ecf20Sopenharmony_cistatic void nvme_free_host_mem(struct nvme_dev *dev) 19158c2ecf20Sopenharmony_ci{ 19168c2ecf20Sopenharmony_ci int i; 19178c2ecf20Sopenharmony_ci 19188c2ecf20Sopenharmony_ci for (i = 0; i < dev->nr_host_mem_descs; i++) { 19198c2ecf20Sopenharmony_ci struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i]; 19208c2ecf20Sopenharmony_ci size_t size = le32_to_cpu(desc->size) * NVME_CTRL_PAGE_SIZE; 19218c2ecf20Sopenharmony_ci 19228c2ecf20Sopenharmony_ci dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i], 19238c2ecf20Sopenharmony_ci le64_to_cpu(desc->addr), 19248c2ecf20Sopenharmony_ci DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); 19258c2ecf20Sopenharmony_ci } 19268c2ecf20Sopenharmony_ci 19278c2ecf20Sopenharmony_ci kfree(dev->host_mem_desc_bufs); 19288c2ecf20Sopenharmony_ci dev->host_mem_desc_bufs = NULL; 19298c2ecf20Sopenharmony_ci dma_free_coherent(dev->dev, 19308c2ecf20Sopenharmony_ci dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), 19318c2ecf20Sopenharmony_ci dev->host_mem_descs, dev->host_mem_descs_dma); 19328c2ecf20Sopenharmony_ci dev->host_mem_descs = NULL; 19338c2ecf20Sopenharmony_ci dev->nr_host_mem_descs = 0; 19348c2ecf20Sopenharmony_ci} 19358c2ecf20Sopenharmony_ci 19368c2ecf20Sopenharmony_cistatic int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, 19378c2ecf20Sopenharmony_ci u32 chunk_size) 19388c2ecf20Sopenharmony_ci{ 19398c2ecf20Sopenharmony_ci struct nvme_host_mem_buf_desc *descs; 19408c2ecf20Sopenharmony_ci u32 max_entries, len; 19418c2ecf20Sopenharmony_ci dma_addr_t descs_dma; 19428c2ecf20Sopenharmony_ci int i = 0; 19438c2ecf20Sopenharmony_ci void **bufs; 19448c2ecf20Sopenharmony_ci u64 size, tmp; 19458c2ecf20Sopenharmony_ci 19468c2ecf20Sopenharmony_ci tmp = (preferred + chunk_size - 1); 19478c2ecf20Sopenharmony_ci do_div(tmp, chunk_size); 19488c2ecf20Sopenharmony_ci max_entries = tmp; 19498c2ecf20Sopenharmony_ci 19508c2ecf20Sopenharmony_ci if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries) 19518c2ecf20Sopenharmony_ci max_entries = dev->ctrl.hmmaxd; 19528c2ecf20Sopenharmony_ci 19538c2ecf20Sopenharmony_ci descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs), 19548c2ecf20Sopenharmony_ci &descs_dma, GFP_KERNEL); 19558c2ecf20Sopenharmony_ci if (!descs) 19568c2ecf20Sopenharmony_ci goto out; 19578c2ecf20Sopenharmony_ci 19588c2ecf20Sopenharmony_ci bufs = kcalloc(max_entries, sizeof(*bufs), GFP_KERNEL); 19598c2ecf20Sopenharmony_ci if (!bufs) 19608c2ecf20Sopenharmony_ci goto out_free_descs; 19618c2ecf20Sopenharmony_ci 19628c2ecf20Sopenharmony_ci for (size = 0; size < preferred && i < max_entries; size += len) { 19638c2ecf20Sopenharmony_ci dma_addr_t dma_addr; 19648c2ecf20Sopenharmony_ci 19658c2ecf20Sopenharmony_ci len = min_t(u64, chunk_size, preferred - size); 19668c2ecf20Sopenharmony_ci bufs[i] = dma_alloc_attrs(dev->dev, len, &dma_addr, GFP_KERNEL, 19678c2ecf20Sopenharmony_ci DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); 19688c2ecf20Sopenharmony_ci if (!bufs[i]) 19698c2ecf20Sopenharmony_ci break; 19708c2ecf20Sopenharmony_ci 19718c2ecf20Sopenharmony_ci descs[i].addr = cpu_to_le64(dma_addr); 19728c2ecf20Sopenharmony_ci descs[i].size = cpu_to_le32(len / NVME_CTRL_PAGE_SIZE); 19738c2ecf20Sopenharmony_ci i++; 19748c2ecf20Sopenharmony_ci } 19758c2ecf20Sopenharmony_ci 19768c2ecf20Sopenharmony_ci if (!size) 19778c2ecf20Sopenharmony_ci goto out_free_bufs; 19788c2ecf20Sopenharmony_ci 19798c2ecf20Sopenharmony_ci dev->nr_host_mem_descs = i; 19808c2ecf20Sopenharmony_ci dev->host_mem_size = size; 19818c2ecf20Sopenharmony_ci dev->host_mem_descs = descs; 19828c2ecf20Sopenharmony_ci dev->host_mem_descs_dma = descs_dma; 19838c2ecf20Sopenharmony_ci dev->host_mem_desc_bufs = bufs; 19848c2ecf20Sopenharmony_ci return 0; 19858c2ecf20Sopenharmony_ci 19868c2ecf20Sopenharmony_ciout_free_bufs: 19878c2ecf20Sopenharmony_ci while (--i >= 0) { 19888c2ecf20Sopenharmony_ci size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE; 19898c2ecf20Sopenharmony_ci 19908c2ecf20Sopenharmony_ci dma_free_attrs(dev->dev, size, bufs[i], 19918c2ecf20Sopenharmony_ci le64_to_cpu(descs[i].addr), 19928c2ecf20Sopenharmony_ci DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); 19938c2ecf20Sopenharmony_ci } 19948c2ecf20Sopenharmony_ci 19958c2ecf20Sopenharmony_ci kfree(bufs); 19968c2ecf20Sopenharmony_ciout_free_descs: 19978c2ecf20Sopenharmony_ci dma_free_coherent(dev->dev, max_entries * sizeof(*descs), descs, 19988c2ecf20Sopenharmony_ci descs_dma); 19998c2ecf20Sopenharmony_ciout: 20008c2ecf20Sopenharmony_ci dev->host_mem_descs = NULL; 20018c2ecf20Sopenharmony_ci return -ENOMEM; 20028c2ecf20Sopenharmony_ci} 20038c2ecf20Sopenharmony_ci 20048c2ecf20Sopenharmony_cistatic int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred) 20058c2ecf20Sopenharmony_ci{ 20068c2ecf20Sopenharmony_ci u64 min_chunk = min_t(u64, preferred, PAGE_SIZE * MAX_ORDER_NR_PAGES); 20078c2ecf20Sopenharmony_ci u64 hmminds = max_t(u32, dev->ctrl.hmminds * 4096, PAGE_SIZE * 2); 20088c2ecf20Sopenharmony_ci u64 chunk_size; 20098c2ecf20Sopenharmony_ci 20108c2ecf20Sopenharmony_ci /* start big and work our way down */ 20118c2ecf20Sopenharmony_ci for (chunk_size = min_chunk; chunk_size >= hmminds; chunk_size /= 2) { 20128c2ecf20Sopenharmony_ci if (!__nvme_alloc_host_mem(dev, preferred, chunk_size)) { 20138c2ecf20Sopenharmony_ci if (!min || dev->host_mem_size >= min) 20148c2ecf20Sopenharmony_ci return 0; 20158c2ecf20Sopenharmony_ci nvme_free_host_mem(dev); 20168c2ecf20Sopenharmony_ci } 20178c2ecf20Sopenharmony_ci } 20188c2ecf20Sopenharmony_ci 20198c2ecf20Sopenharmony_ci return -ENOMEM; 20208c2ecf20Sopenharmony_ci} 20218c2ecf20Sopenharmony_ci 20228c2ecf20Sopenharmony_cistatic int nvme_setup_host_mem(struct nvme_dev *dev) 20238c2ecf20Sopenharmony_ci{ 20248c2ecf20Sopenharmony_ci u64 max = (u64)max_host_mem_size_mb * SZ_1M; 20258c2ecf20Sopenharmony_ci u64 preferred = (u64)dev->ctrl.hmpre * 4096; 20268c2ecf20Sopenharmony_ci u64 min = (u64)dev->ctrl.hmmin * 4096; 20278c2ecf20Sopenharmony_ci u32 enable_bits = NVME_HOST_MEM_ENABLE; 20288c2ecf20Sopenharmony_ci int ret; 20298c2ecf20Sopenharmony_ci 20308c2ecf20Sopenharmony_ci preferred = min(preferred, max); 20318c2ecf20Sopenharmony_ci if (min > max) { 20328c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 20338c2ecf20Sopenharmony_ci "min host memory (%lld MiB) above limit (%d MiB).\n", 20348c2ecf20Sopenharmony_ci min >> ilog2(SZ_1M), max_host_mem_size_mb); 20358c2ecf20Sopenharmony_ci nvme_free_host_mem(dev); 20368c2ecf20Sopenharmony_ci return 0; 20378c2ecf20Sopenharmony_ci } 20388c2ecf20Sopenharmony_ci 20398c2ecf20Sopenharmony_ci /* 20408c2ecf20Sopenharmony_ci * If we already have a buffer allocated check if we can reuse it. 20418c2ecf20Sopenharmony_ci */ 20428c2ecf20Sopenharmony_ci if (dev->host_mem_descs) { 20438c2ecf20Sopenharmony_ci if (dev->host_mem_size >= min) 20448c2ecf20Sopenharmony_ci enable_bits |= NVME_HOST_MEM_RETURN; 20458c2ecf20Sopenharmony_ci else 20468c2ecf20Sopenharmony_ci nvme_free_host_mem(dev); 20478c2ecf20Sopenharmony_ci } 20488c2ecf20Sopenharmony_ci 20498c2ecf20Sopenharmony_ci if (!dev->host_mem_descs) { 20508c2ecf20Sopenharmony_ci if (nvme_alloc_host_mem(dev, min, preferred)) { 20518c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 20528c2ecf20Sopenharmony_ci "failed to allocate host memory buffer.\n"); 20538c2ecf20Sopenharmony_ci return 0; /* controller must work without HMB */ 20548c2ecf20Sopenharmony_ci } 20558c2ecf20Sopenharmony_ci 20568c2ecf20Sopenharmony_ci dev_info(dev->ctrl.device, 20578c2ecf20Sopenharmony_ci "allocated %lld MiB host memory buffer.\n", 20588c2ecf20Sopenharmony_ci dev->host_mem_size >> ilog2(SZ_1M)); 20598c2ecf20Sopenharmony_ci } 20608c2ecf20Sopenharmony_ci 20618c2ecf20Sopenharmony_ci ret = nvme_set_host_mem(dev, enable_bits); 20628c2ecf20Sopenharmony_ci if (ret) 20638c2ecf20Sopenharmony_ci nvme_free_host_mem(dev); 20648c2ecf20Sopenharmony_ci return ret; 20658c2ecf20Sopenharmony_ci} 20668c2ecf20Sopenharmony_ci 20678c2ecf20Sopenharmony_ci/* 20688c2ecf20Sopenharmony_ci * nirqs is the number of interrupts available for write and read 20698c2ecf20Sopenharmony_ci * queues. The core already reserved an interrupt for the admin queue. 20708c2ecf20Sopenharmony_ci */ 20718c2ecf20Sopenharmony_cistatic void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) 20728c2ecf20Sopenharmony_ci{ 20738c2ecf20Sopenharmony_ci struct nvme_dev *dev = affd->priv; 20748c2ecf20Sopenharmony_ci unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues; 20758c2ecf20Sopenharmony_ci 20768c2ecf20Sopenharmony_ci /* 20778c2ecf20Sopenharmony_ci * If there is no interrupt available for queues, ensure that 20788c2ecf20Sopenharmony_ci * the default queue is set to 1. The affinity set size is 20798c2ecf20Sopenharmony_ci * also set to one, but the irq core ignores it for this case. 20808c2ecf20Sopenharmony_ci * 20818c2ecf20Sopenharmony_ci * If only one interrupt is available or 'write_queue' == 0, combine 20828c2ecf20Sopenharmony_ci * write and read queues. 20838c2ecf20Sopenharmony_ci * 20848c2ecf20Sopenharmony_ci * If 'write_queues' > 0, ensure it leaves room for at least one read 20858c2ecf20Sopenharmony_ci * queue. 20868c2ecf20Sopenharmony_ci */ 20878c2ecf20Sopenharmony_ci if (!nrirqs) { 20888c2ecf20Sopenharmony_ci nrirqs = 1; 20898c2ecf20Sopenharmony_ci nr_read_queues = 0; 20908c2ecf20Sopenharmony_ci } else if (nrirqs == 1 || !nr_write_queues) { 20918c2ecf20Sopenharmony_ci nr_read_queues = 0; 20928c2ecf20Sopenharmony_ci } else if (nr_write_queues >= nrirqs) { 20938c2ecf20Sopenharmony_ci nr_read_queues = 1; 20948c2ecf20Sopenharmony_ci } else { 20958c2ecf20Sopenharmony_ci nr_read_queues = nrirqs - nr_write_queues; 20968c2ecf20Sopenharmony_ci } 20978c2ecf20Sopenharmony_ci 20988c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; 20998c2ecf20Sopenharmony_ci affd->set_size[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; 21008c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_READ] = nr_read_queues; 21018c2ecf20Sopenharmony_ci affd->set_size[HCTX_TYPE_READ] = nr_read_queues; 21028c2ecf20Sopenharmony_ci affd->nr_sets = nr_read_queues ? 2 : 1; 21038c2ecf20Sopenharmony_ci} 21048c2ecf20Sopenharmony_ci 21058c2ecf20Sopenharmony_cistatic int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) 21068c2ecf20Sopenharmony_ci{ 21078c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 21088c2ecf20Sopenharmony_ci struct irq_affinity affd = { 21098c2ecf20Sopenharmony_ci .pre_vectors = 1, 21108c2ecf20Sopenharmony_ci .calc_sets = nvme_calc_irq_sets, 21118c2ecf20Sopenharmony_ci .priv = dev, 21128c2ecf20Sopenharmony_ci }; 21138c2ecf20Sopenharmony_ci unsigned int irq_queues, poll_queues; 21148c2ecf20Sopenharmony_ci 21158c2ecf20Sopenharmony_ci /* 21168c2ecf20Sopenharmony_ci * Poll queues don't need interrupts, but we need at least one I/O queue 21178c2ecf20Sopenharmony_ci * left over for non-polled I/O. 21188c2ecf20Sopenharmony_ci */ 21198c2ecf20Sopenharmony_ci poll_queues = min(dev->nr_poll_queues, nr_io_queues - 1); 21208c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_POLL] = poll_queues; 21218c2ecf20Sopenharmony_ci 21228c2ecf20Sopenharmony_ci /* 21238c2ecf20Sopenharmony_ci * Initialize for the single interrupt case, will be updated in 21248c2ecf20Sopenharmony_ci * nvme_calc_irq_sets(). 21258c2ecf20Sopenharmony_ci */ 21268c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_DEFAULT] = 1; 21278c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_READ] = 0; 21288c2ecf20Sopenharmony_ci 21298c2ecf20Sopenharmony_ci /* 21308c2ecf20Sopenharmony_ci * We need interrupts for the admin queue and each non-polled I/O queue, 21318c2ecf20Sopenharmony_ci * but some Apple controllers require all queues to use the first 21328c2ecf20Sopenharmony_ci * vector. 21338c2ecf20Sopenharmony_ci */ 21348c2ecf20Sopenharmony_ci irq_queues = 1; 21358c2ecf20Sopenharmony_ci if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR)) 21368c2ecf20Sopenharmony_ci irq_queues += (nr_io_queues - poll_queues); 21378c2ecf20Sopenharmony_ci return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, 21388c2ecf20Sopenharmony_ci PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd); 21398c2ecf20Sopenharmony_ci} 21408c2ecf20Sopenharmony_ci 21418c2ecf20Sopenharmony_cistatic void nvme_disable_io_queues(struct nvme_dev *dev) 21428c2ecf20Sopenharmony_ci{ 21438c2ecf20Sopenharmony_ci if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq)) 21448c2ecf20Sopenharmony_ci __nvme_disable_io_queues(dev, nvme_admin_delete_cq); 21458c2ecf20Sopenharmony_ci} 21468c2ecf20Sopenharmony_ci 21478c2ecf20Sopenharmony_cistatic unsigned int nvme_max_io_queues(struct nvme_dev *dev) 21488c2ecf20Sopenharmony_ci{ 21498c2ecf20Sopenharmony_ci return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues; 21508c2ecf20Sopenharmony_ci} 21518c2ecf20Sopenharmony_ci 21528c2ecf20Sopenharmony_cistatic int nvme_setup_io_queues(struct nvme_dev *dev) 21538c2ecf20Sopenharmony_ci{ 21548c2ecf20Sopenharmony_ci struct nvme_queue *adminq = &dev->queues[0]; 21558c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 21568c2ecf20Sopenharmony_ci unsigned int nr_io_queues; 21578c2ecf20Sopenharmony_ci unsigned long size; 21588c2ecf20Sopenharmony_ci int result; 21598c2ecf20Sopenharmony_ci 21608c2ecf20Sopenharmony_ci /* 21618c2ecf20Sopenharmony_ci * Sample the module parameters once at reset time so that we have 21628c2ecf20Sopenharmony_ci * stable values to work with. 21638c2ecf20Sopenharmony_ci */ 21648c2ecf20Sopenharmony_ci dev->nr_write_queues = write_queues; 21658c2ecf20Sopenharmony_ci dev->nr_poll_queues = poll_queues; 21668c2ecf20Sopenharmony_ci 21678c2ecf20Sopenharmony_ci /* 21688c2ecf20Sopenharmony_ci * If tags are shared with admin queue (Apple bug), then 21698c2ecf20Sopenharmony_ci * make sure we only use one IO queue. 21708c2ecf20Sopenharmony_ci */ 21718c2ecf20Sopenharmony_ci if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) 21728c2ecf20Sopenharmony_ci nr_io_queues = 1; 21738c2ecf20Sopenharmony_ci else 21748c2ecf20Sopenharmony_ci nr_io_queues = min(nvme_max_io_queues(dev), 21758c2ecf20Sopenharmony_ci dev->nr_allocated_queues - 1); 21768c2ecf20Sopenharmony_ci 21778c2ecf20Sopenharmony_ci result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); 21788c2ecf20Sopenharmony_ci if (result < 0) 21798c2ecf20Sopenharmony_ci return result; 21808c2ecf20Sopenharmony_ci 21818c2ecf20Sopenharmony_ci if (nr_io_queues == 0) 21828c2ecf20Sopenharmony_ci return 0; 21838c2ecf20Sopenharmony_ci 21848c2ecf20Sopenharmony_ci clear_bit(NVMEQ_ENABLED, &adminq->flags); 21858c2ecf20Sopenharmony_ci 21868c2ecf20Sopenharmony_ci if (dev->cmb_use_sqes) { 21878c2ecf20Sopenharmony_ci result = nvme_cmb_qdepth(dev, nr_io_queues, 21888c2ecf20Sopenharmony_ci sizeof(struct nvme_command)); 21898c2ecf20Sopenharmony_ci if (result > 0) 21908c2ecf20Sopenharmony_ci dev->q_depth = result; 21918c2ecf20Sopenharmony_ci else 21928c2ecf20Sopenharmony_ci dev->cmb_use_sqes = false; 21938c2ecf20Sopenharmony_ci } 21948c2ecf20Sopenharmony_ci 21958c2ecf20Sopenharmony_ci do { 21968c2ecf20Sopenharmony_ci size = db_bar_size(dev, nr_io_queues); 21978c2ecf20Sopenharmony_ci result = nvme_remap_bar(dev, size); 21988c2ecf20Sopenharmony_ci if (!result) 21998c2ecf20Sopenharmony_ci break; 22008c2ecf20Sopenharmony_ci if (!--nr_io_queues) 22018c2ecf20Sopenharmony_ci return -ENOMEM; 22028c2ecf20Sopenharmony_ci } while (1); 22038c2ecf20Sopenharmony_ci adminq->q_db = dev->dbs; 22048c2ecf20Sopenharmony_ci 22058c2ecf20Sopenharmony_ci retry: 22068c2ecf20Sopenharmony_ci /* Deregister the admin queue's interrupt */ 22078c2ecf20Sopenharmony_ci pci_free_irq(pdev, 0, adminq); 22088c2ecf20Sopenharmony_ci 22098c2ecf20Sopenharmony_ci /* 22108c2ecf20Sopenharmony_ci * If we enable msix early due to not intx, disable it again before 22118c2ecf20Sopenharmony_ci * setting up the full range we need. 22128c2ecf20Sopenharmony_ci */ 22138c2ecf20Sopenharmony_ci pci_free_irq_vectors(pdev); 22148c2ecf20Sopenharmony_ci 22158c2ecf20Sopenharmony_ci result = nvme_setup_irqs(dev, nr_io_queues); 22168c2ecf20Sopenharmony_ci if (result <= 0) 22178c2ecf20Sopenharmony_ci return -EIO; 22188c2ecf20Sopenharmony_ci 22198c2ecf20Sopenharmony_ci dev->num_vecs = result; 22208c2ecf20Sopenharmony_ci result = max(result - 1, 1); 22218c2ecf20Sopenharmony_ci dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; 22228c2ecf20Sopenharmony_ci 22238c2ecf20Sopenharmony_ci /* 22248c2ecf20Sopenharmony_ci * Should investigate if there's a performance win from allocating 22258c2ecf20Sopenharmony_ci * more queues than interrupt vectors; it might allow the submission 22268c2ecf20Sopenharmony_ci * path to scale better, even if the receive path is limited by the 22278c2ecf20Sopenharmony_ci * number of interrupts. 22288c2ecf20Sopenharmony_ci */ 22298c2ecf20Sopenharmony_ci result = queue_request_irq(adminq); 22308c2ecf20Sopenharmony_ci if (result) 22318c2ecf20Sopenharmony_ci return result; 22328c2ecf20Sopenharmony_ci set_bit(NVMEQ_ENABLED, &adminq->flags); 22338c2ecf20Sopenharmony_ci 22348c2ecf20Sopenharmony_ci result = nvme_create_io_queues(dev); 22358c2ecf20Sopenharmony_ci if (result || dev->online_queues < 2) 22368c2ecf20Sopenharmony_ci return result; 22378c2ecf20Sopenharmony_ci 22388c2ecf20Sopenharmony_ci if (dev->online_queues - 1 < dev->max_qid) { 22398c2ecf20Sopenharmony_ci nr_io_queues = dev->online_queues - 1; 22408c2ecf20Sopenharmony_ci nvme_disable_io_queues(dev); 22418c2ecf20Sopenharmony_ci nvme_suspend_io_queues(dev); 22428c2ecf20Sopenharmony_ci goto retry; 22438c2ecf20Sopenharmony_ci } 22448c2ecf20Sopenharmony_ci dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", 22458c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_DEFAULT], 22468c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_READ], 22478c2ecf20Sopenharmony_ci dev->io_queues[HCTX_TYPE_POLL]); 22488c2ecf20Sopenharmony_ci return 0; 22498c2ecf20Sopenharmony_ci} 22508c2ecf20Sopenharmony_ci 22518c2ecf20Sopenharmony_cistatic void nvme_del_queue_end(struct request *req, blk_status_t error) 22528c2ecf20Sopenharmony_ci{ 22538c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = req->end_io_data; 22548c2ecf20Sopenharmony_ci 22558c2ecf20Sopenharmony_ci blk_mq_free_request(req); 22568c2ecf20Sopenharmony_ci complete(&nvmeq->delete_done); 22578c2ecf20Sopenharmony_ci} 22588c2ecf20Sopenharmony_ci 22598c2ecf20Sopenharmony_cistatic void nvme_del_cq_end(struct request *req, blk_status_t error) 22608c2ecf20Sopenharmony_ci{ 22618c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = req->end_io_data; 22628c2ecf20Sopenharmony_ci 22638c2ecf20Sopenharmony_ci if (error) 22648c2ecf20Sopenharmony_ci set_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); 22658c2ecf20Sopenharmony_ci 22668c2ecf20Sopenharmony_ci nvme_del_queue_end(req, error); 22678c2ecf20Sopenharmony_ci} 22688c2ecf20Sopenharmony_ci 22698c2ecf20Sopenharmony_cistatic int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) 22708c2ecf20Sopenharmony_ci{ 22718c2ecf20Sopenharmony_ci struct request_queue *q = nvmeq->dev->ctrl.admin_q; 22728c2ecf20Sopenharmony_ci struct request *req; 22738c2ecf20Sopenharmony_ci struct nvme_command cmd; 22748c2ecf20Sopenharmony_ci 22758c2ecf20Sopenharmony_ci memset(&cmd, 0, sizeof(cmd)); 22768c2ecf20Sopenharmony_ci cmd.delete_queue.opcode = opcode; 22778c2ecf20Sopenharmony_ci cmd.delete_queue.qid = cpu_to_le16(nvmeq->qid); 22788c2ecf20Sopenharmony_ci 22798c2ecf20Sopenharmony_ci req = nvme_alloc_request(q, &cmd, BLK_MQ_REQ_NOWAIT); 22808c2ecf20Sopenharmony_ci if (IS_ERR(req)) 22818c2ecf20Sopenharmony_ci return PTR_ERR(req); 22828c2ecf20Sopenharmony_ci 22838c2ecf20Sopenharmony_ci req->end_io_data = nvmeq; 22848c2ecf20Sopenharmony_ci 22858c2ecf20Sopenharmony_ci init_completion(&nvmeq->delete_done); 22868c2ecf20Sopenharmony_ci blk_execute_rq_nowait(q, NULL, req, false, 22878c2ecf20Sopenharmony_ci opcode == nvme_admin_delete_cq ? 22888c2ecf20Sopenharmony_ci nvme_del_cq_end : nvme_del_queue_end); 22898c2ecf20Sopenharmony_ci return 0; 22908c2ecf20Sopenharmony_ci} 22918c2ecf20Sopenharmony_ci 22928c2ecf20Sopenharmony_cistatic bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) 22938c2ecf20Sopenharmony_ci{ 22948c2ecf20Sopenharmony_ci int nr_queues = dev->online_queues - 1, sent = 0; 22958c2ecf20Sopenharmony_ci unsigned long timeout; 22968c2ecf20Sopenharmony_ci 22978c2ecf20Sopenharmony_ci retry: 22988c2ecf20Sopenharmony_ci timeout = ADMIN_TIMEOUT; 22998c2ecf20Sopenharmony_ci while (nr_queues > 0) { 23008c2ecf20Sopenharmony_ci if (nvme_delete_queue(&dev->queues[nr_queues], opcode)) 23018c2ecf20Sopenharmony_ci break; 23028c2ecf20Sopenharmony_ci nr_queues--; 23038c2ecf20Sopenharmony_ci sent++; 23048c2ecf20Sopenharmony_ci } 23058c2ecf20Sopenharmony_ci while (sent) { 23068c2ecf20Sopenharmony_ci struct nvme_queue *nvmeq = &dev->queues[nr_queues + sent]; 23078c2ecf20Sopenharmony_ci 23088c2ecf20Sopenharmony_ci timeout = wait_for_completion_io_timeout(&nvmeq->delete_done, 23098c2ecf20Sopenharmony_ci timeout); 23108c2ecf20Sopenharmony_ci if (timeout == 0) 23118c2ecf20Sopenharmony_ci return false; 23128c2ecf20Sopenharmony_ci 23138c2ecf20Sopenharmony_ci sent--; 23148c2ecf20Sopenharmony_ci if (nr_queues) 23158c2ecf20Sopenharmony_ci goto retry; 23168c2ecf20Sopenharmony_ci } 23178c2ecf20Sopenharmony_ci return true; 23188c2ecf20Sopenharmony_ci} 23198c2ecf20Sopenharmony_ci 23208c2ecf20Sopenharmony_cistatic void nvme_dev_add(struct nvme_dev *dev) 23218c2ecf20Sopenharmony_ci{ 23228c2ecf20Sopenharmony_ci int ret; 23238c2ecf20Sopenharmony_ci 23248c2ecf20Sopenharmony_ci if (!dev->ctrl.tagset) { 23258c2ecf20Sopenharmony_ci dev->tagset.ops = &nvme_mq_ops; 23268c2ecf20Sopenharmony_ci dev->tagset.nr_hw_queues = dev->online_queues - 1; 23278c2ecf20Sopenharmony_ci dev->tagset.nr_maps = 2; /* default + read */ 23288c2ecf20Sopenharmony_ci if (dev->io_queues[HCTX_TYPE_POLL]) 23298c2ecf20Sopenharmony_ci dev->tagset.nr_maps++; 23308c2ecf20Sopenharmony_ci dev->tagset.timeout = NVME_IO_TIMEOUT; 23318c2ecf20Sopenharmony_ci dev->tagset.numa_node = dev->ctrl.numa_node; 23328c2ecf20Sopenharmony_ci dev->tagset.queue_depth = min_t(unsigned int, dev->q_depth, 23338c2ecf20Sopenharmony_ci BLK_MQ_MAX_DEPTH) - 1; 23348c2ecf20Sopenharmony_ci dev->tagset.cmd_size = sizeof(struct nvme_iod); 23358c2ecf20Sopenharmony_ci dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; 23368c2ecf20Sopenharmony_ci dev->tagset.driver_data = dev; 23378c2ecf20Sopenharmony_ci 23388c2ecf20Sopenharmony_ci /* 23398c2ecf20Sopenharmony_ci * Some Apple controllers requires tags to be unique 23408c2ecf20Sopenharmony_ci * across admin and IO queue, so reserve the first 32 23418c2ecf20Sopenharmony_ci * tags of the IO queue. 23428c2ecf20Sopenharmony_ci */ 23438c2ecf20Sopenharmony_ci if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) 23448c2ecf20Sopenharmony_ci dev->tagset.reserved_tags = NVME_AQ_DEPTH; 23458c2ecf20Sopenharmony_ci 23468c2ecf20Sopenharmony_ci ret = blk_mq_alloc_tag_set(&dev->tagset); 23478c2ecf20Sopenharmony_ci if (ret) { 23488c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 23498c2ecf20Sopenharmony_ci "IO queues tagset allocation failed %d\n", ret); 23508c2ecf20Sopenharmony_ci return; 23518c2ecf20Sopenharmony_ci } 23528c2ecf20Sopenharmony_ci dev->ctrl.tagset = &dev->tagset; 23538c2ecf20Sopenharmony_ci } else { 23548c2ecf20Sopenharmony_ci blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); 23558c2ecf20Sopenharmony_ci 23568c2ecf20Sopenharmony_ci /* Free previously allocated queues that are no longer usable */ 23578c2ecf20Sopenharmony_ci nvme_free_queues(dev, dev->online_queues); 23588c2ecf20Sopenharmony_ci } 23598c2ecf20Sopenharmony_ci 23608c2ecf20Sopenharmony_ci nvme_dbbuf_set(dev); 23618c2ecf20Sopenharmony_ci} 23628c2ecf20Sopenharmony_ci 23638c2ecf20Sopenharmony_cistatic int nvme_pci_enable(struct nvme_dev *dev) 23648c2ecf20Sopenharmony_ci{ 23658c2ecf20Sopenharmony_ci int result = -ENOMEM; 23668c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 23678c2ecf20Sopenharmony_ci 23688c2ecf20Sopenharmony_ci if (pci_enable_device_mem(pdev)) 23698c2ecf20Sopenharmony_ci return result; 23708c2ecf20Sopenharmony_ci 23718c2ecf20Sopenharmony_ci pci_set_master(pdev); 23728c2ecf20Sopenharmony_ci 23738c2ecf20Sopenharmony_ci if (dma_set_mask_and_coherent(dev->dev, DMA_BIT_MASK(64))) 23748c2ecf20Sopenharmony_ci goto disable; 23758c2ecf20Sopenharmony_ci 23768c2ecf20Sopenharmony_ci if (readl(dev->bar + NVME_REG_CSTS) == -1) { 23778c2ecf20Sopenharmony_ci result = -ENODEV; 23788c2ecf20Sopenharmony_ci goto disable; 23798c2ecf20Sopenharmony_ci } 23808c2ecf20Sopenharmony_ci 23818c2ecf20Sopenharmony_ci /* 23828c2ecf20Sopenharmony_ci * Some devices and/or platforms don't advertise or work with INTx 23838c2ecf20Sopenharmony_ci * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll 23848c2ecf20Sopenharmony_ci * adjust this later. 23858c2ecf20Sopenharmony_ci */ 23868c2ecf20Sopenharmony_ci result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); 23878c2ecf20Sopenharmony_ci if (result < 0) 23888c2ecf20Sopenharmony_ci return result; 23898c2ecf20Sopenharmony_ci 23908c2ecf20Sopenharmony_ci dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP); 23918c2ecf20Sopenharmony_ci 23928c2ecf20Sopenharmony_ci dev->q_depth = min_t(u32, NVME_CAP_MQES(dev->ctrl.cap) + 1, 23938c2ecf20Sopenharmony_ci io_queue_depth); 23948c2ecf20Sopenharmony_ci dev->ctrl.sqsize = dev->q_depth - 1; /* 0's based queue depth */ 23958c2ecf20Sopenharmony_ci dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap); 23968c2ecf20Sopenharmony_ci dev->dbs = dev->bar + 4096; 23978c2ecf20Sopenharmony_ci 23988c2ecf20Sopenharmony_ci /* 23998c2ecf20Sopenharmony_ci * Some Apple controllers require a non-standard SQE size. 24008c2ecf20Sopenharmony_ci * Interestingly they also seem to ignore the CC:IOSQES register 24018c2ecf20Sopenharmony_ci * so we don't bother updating it here. 24028c2ecf20Sopenharmony_ci */ 24038c2ecf20Sopenharmony_ci if (dev->ctrl.quirks & NVME_QUIRK_128_BYTES_SQES) 24048c2ecf20Sopenharmony_ci dev->io_sqes = 7; 24058c2ecf20Sopenharmony_ci else 24068c2ecf20Sopenharmony_ci dev->io_sqes = NVME_NVM_IOSQES; 24078c2ecf20Sopenharmony_ci 24088c2ecf20Sopenharmony_ci /* 24098c2ecf20Sopenharmony_ci * Temporary fix for the Apple controller found in the MacBook8,1 and 24108c2ecf20Sopenharmony_ci * some MacBook7,1 to avoid controller resets and data loss. 24118c2ecf20Sopenharmony_ci */ 24128c2ecf20Sopenharmony_ci if (pdev->vendor == PCI_VENDOR_ID_APPLE && pdev->device == 0x2001) { 24138c2ecf20Sopenharmony_ci dev->q_depth = 2; 24148c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, "detected Apple NVMe controller, " 24158c2ecf20Sopenharmony_ci "set queue depth=%u to work around controller resets\n", 24168c2ecf20Sopenharmony_ci dev->q_depth); 24178c2ecf20Sopenharmony_ci } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG && 24188c2ecf20Sopenharmony_ci (pdev->device == 0xa821 || pdev->device == 0xa822) && 24198c2ecf20Sopenharmony_ci NVME_CAP_MQES(dev->ctrl.cap) == 0) { 24208c2ecf20Sopenharmony_ci dev->q_depth = 64; 24218c2ecf20Sopenharmony_ci dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, " 24228c2ecf20Sopenharmony_ci "set queue depth=%u\n", dev->q_depth); 24238c2ecf20Sopenharmony_ci } 24248c2ecf20Sopenharmony_ci 24258c2ecf20Sopenharmony_ci /* 24268c2ecf20Sopenharmony_ci * Controllers with the shared tags quirk need the IO queue to be 24278c2ecf20Sopenharmony_ci * big enough so that we get 32 tags for the admin queue 24288c2ecf20Sopenharmony_ci */ 24298c2ecf20Sopenharmony_ci if ((dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) && 24308c2ecf20Sopenharmony_ci (dev->q_depth < (NVME_AQ_DEPTH + 2))) { 24318c2ecf20Sopenharmony_ci dev->q_depth = NVME_AQ_DEPTH + 2; 24328c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, "IO queue depth clamped to %d\n", 24338c2ecf20Sopenharmony_ci dev->q_depth); 24348c2ecf20Sopenharmony_ci } 24358c2ecf20Sopenharmony_ci 24368c2ecf20Sopenharmony_ci 24378c2ecf20Sopenharmony_ci nvme_map_cmb(dev); 24388c2ecf20Sopenharmony_ci 24398c2ecf20Sopenharmony_ci pci_enable_pcie_error_reporting(pdev); 24408c2ecf20Sopenharmony_ci pci_save_state(pdev); 24418c2ecf20Sopenharmony_ci return 0; 24428c2ecf20Sopenharmony_ci 24438c2ecf20Sopenharmony_ci disable: 24448c2ecf20Sopenharmony_ci pci_disable_device(pdev); 24458c2ecf20Sopenharmony_ci return result; 24468c2ecf20Sopenharmony_ci} 24478c2ecf20Sopenharmony_ci 24488c2ecf20Sopenharmony_cistatic void nvme_dev_unmap(struct nvme_dev *dev) 24498c2ecf20Sopenharmony_ci{ 24508c2ecf20Sopenharmony_ci if (dev->bar) 24518c2ecf20Sopenharmony_ci iounmap(dev->bar); 24528c2ecf20Sopenharmony_ci pci_release_mem_regions(to_pci_dev(dev->dev)); 24538c2ecf20Sopenharmony_ci} 24548c2ecf20Sopenharmony_ci 24558c2ecf20Sopenharmony_cistatic void nvme_pci_disable(struct nvme_dev *dev) 24568c2ecf20Sopenharmony_ci{ 24578c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 24588c2ecf20Sopenharmony_ci 24598c2ecf20Sopenharmony_ci pci_free_irq_vectors(pdev); 24608c2ecf20Sopenharmony_ci 24618c2ecf20Sopenharmony_ci if (pci_is_enabled(pdev)) { 24628c2ecf20Sopenharmony_ci pci_disable_pcie_error_reporting(pdev); 24638c2ecf20Sopenharmony_ci pci_disable_device(pdev); 24648c2ecf20Sopenharmony_ci } 24658c2ecf20Sopenharmony_ci} 24668c2ecf20Sopenharmony_ci 24678c2ecf20Sopenharmony_cistatic void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) 24688c2ecf20Sopenharmony_ci{ 24698c2ecf20Sopenharmony_ci bool dead = true, freeze = false; 24708c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 24718c2ecf20Sopenharmony_ci 24728c2ecf20Sopenharmony_ci mutex_lock(&dev->shutdown_lock); 24738c2ecf20Sopenharmony_ci if (pci_is_enabled(pdev)) { 24748c2ecf20Sopenharmony_ci u32 csts = readl(dev->bar + NVME_REG_CSTS); 24758c2ecf20Sopenharmony_ci 24768c2ecf20Sopenharmony_ci if (dev->ctrl.state == NVME_CTRL_LIVE || 24778c2ecf20Sopenharmony_ci dev->ctrl.state == NVME_CTRL_RESETTING) { 24788c2ecf20Sopenharmony_ci freeze = true; 24798c2ecf20Sopenharmony_ci nvme_start_freeze(&dev->ctrl); 24808c2ecf20Sopenharmony_ci } 24818c2ecf20Sopenharmony_ci dead = !!((csts & NVME_CSTS_CFS) || !(csts & NVME_CSTS_RDY) || 24828c2ecf20Sopenharmony_ci pdev->error_state != pci_channel_io_normal); 24838c2ecf20Sopenharmony_ci } 24848c2ecf20Sopenharmony_ci 24858c2ecf20Sopenharmony_ci /* 24868c2ecf20Sopenharmony_ci * Give the controller a chance to complete all entered requests if 24878c2ecf20Sopenharmony_ci * doing a safe shutdown. 24888c2ecf20Sopenharmony_ci */ 24898c2ecf20Sopenharmony_ci if (!dead && shutdown && freeze) 24908c2ecf20Sopenharmony_ci nvme_wait_freeze_timeout(&dev->ctrl, NVME_IO_TIMEOUT); 24918c2ecf20Sopenharmony_ci 24928c2ecf20Sopenharmony_ci nvme_stop_queues(&dev->ctrl); 24938c2ecf20Sopenharmony_ci 24948c2ecf20Sopenharmony_ci if (!dead && dev->ctrl.queue_count > 0) { 24958c2ecf20Sopenharmony_ci nvme_disable_io_queues(dev); 24968c2ecf20Sopenharmony_ci nvme_disable_admin_queue(dev, shutdown); 24978c2ecf20Sopenharmony_ci } 24988c2ecf20Sopenharmony_ci nvme_suspend_io_queues(dev); 24998c2ecf20Sopenharmony_ci nvme_suspend_queue(&dev->queues[0]); 25008c2ecf20Sopenharmony_ci nvme_pci_disable(dev); 25018c2ecf20Sopenharmony_ci nvme_reap_pending_cqes(dev); 25028c2ecf20Sopenharmony_ci 25038c2ecf20Sopenharmony_ci blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); 25048c2ecf20Sopenharmony_ci blk_mq_tagset_busy_iter(&dev->admin_tagset, nvme_cancel_request, &dev->ctrl); 25058c2ecf20Sopenharmony_ci blk_mq_tagset_wait_completed_request(&dev->tagset); 25068c2ecf20Sopenharmony_ci blk_mq_tagset_wait_completed_request(&dev->admin_tagset); 25078c2ecf20Sopenharmony_ci 25088c2ecf20Sopenharmony_ci /* 25098c2ecf20Sopenharmony_ci * The driver will not be starting up queues again if shutting down so 25108c2ecf20Sopenharmony_ci * must flush all entered requests to their failed completion to avoid 25118c2ecf20Sopenharmony_ci * deadlocking blk-mq hot-cpu notifier. 25128c2ecf20Sopenharmony_ci */ 25138c2ecf20Sopenharmony_ci if (shutdown) { 25148c2ecf20Sopenharmony_ci nvme_start_queues(&dev->ctrl); 25158c2ecf20Sopenharmony_ci if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) 25168c2ecf20Sopenharmony_ci nvme_start_admin_queue(&dev->ctrl); 25178c2ecf20Sopenharmony_ci } 25188c2ecf20Sopenharmony_ci mutex_unlock(&dev->shutdown_lock); 25198c2ecf20Sopenharmony_ci} 25208c2ecf20Sopenharmony_ci 25218c2ecf20Sopenharmony_cistatic int nvme_disable_prepare_reset(struct nvme_dev *dev, bool shutdown) 25228c2ecf20Sopenharmony_ci{ 25238c2ecf20Sopenharmony_ci if (!nvme_wait_reset(&dev->ctrl)) 25248c2ecf20Sopenharmony_ci return -EBUSY; 25258c2ecf20Sopenharmony_ci nvme_dev_disable(dev, shutdown); 25268c2ecf20Sopenharmony_ci return 0; 25278c2ecf20Sopenharmony_ci} 25288c2ecf20Sopenharmony_ci 25298c2ecf20Sopenharmony_cistatic int nvme_setup_prp_pools(struct nvme_dev *dev) 25308c2ecf20Sopenharmony_ci{ 25318c2ecf20Sopenharmony_ci dev->prp_page_pool = dma_pool_create("prp list page", dev->dev, 25328c2ecf20Sopenharmony_ci NVME_CTRL_PAGE_SIZE, 25338c2ecf20Sopenharmony_ci NVME_CTRL_PAGE_SIZE, 0); 25348c2ecf20Sopenharmony_ci if (!dev->prp_page_pool) 25358c2ecf20Sopenharmony_ci return -ENOMEM; 25368c2ecf20Sopenharmony_ci 25378c2ecf20Sopenharmony_ci /* Optimisation for I/Os between 4k and 128k */ 25388c2ecf20Sopenharmony_ci dev->prp_small_pool = dma_pool_create("prp list 256", dev->dev, 25398c2ecf20Sopenharmony_ci 256, 256, 0); 25408c2ecf20Sopenharmony_ci if (!dev->prp_small_pool) { 25418c2ecf20Sopenharmony_ci dma_pool_destroy(dev->prp_page_pool); 25428c2ecf20Sopenharmony_ci return -ENOMEM; 25438c2ecf20Sopenharmony_ci } 25448c2ecf20Sopenharmony_ci return 0; 25458c2ecf20Sopenharmony_ci} 25468c2ecf20Sopenharmony_ci 25478c2ecf20Sopenharmony_cistatic void nvme_release_prp_pools(struct nvme_dev *dev) 25488c2ecf20Sopenharmony_ci{ 25498c2ecf20Sopenharmony_ci dma_pool_destroy(dev->prp_page_pool); 25508c2ecf20Sopenharmony_ci dma_pool_destroy(dev->prp_small_pool); 25518c2ecf20Sopenharmony_ci} 25528c2ecf20Sopenharmony_ci 25538c2ecf20Sopenharmony_cistatic int nvme_pci_alloc_iod_mempool(struct nvme_dev *dev) 25548c2ecf20Sopenharmony_ci{ 25558c2ecf20Sopenharmony_ci size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl()); 25568c2ecf20Sopenharmony_ci size_t alloc_size = sizeof(__le64 *) * npages + 25578c2ecf20Sopenharmony_ci sizeof(struct scatterlist) * NVME_MAX_SEGS; 25588c2ecf20Sopenharmony_ci 25598c2ecf20Sopenharmony_ci WARN_ON_ONCE(alloc_size > PAGE_SIZE); 25608c2ecf20Sopenharmony_ci dev->iod_mempool = mempool_create_node(1, 25618c2ecf20Sopenharmony_ci mempool_kmalloc, mempool_kfree, 25628c2ecf20Sopenharmony_ci (void *)alloc_size, GFP_KERNEL, 25638c2ecf20Sopenharmony_ci dev_to_node(dev->dev)); 25648c2ecf20Sopenharmony_ci if (!dev->iod_mempool) 25658c2ecf20Sopenharmony_ci return -ENOMEM; 25668c2ecf20Sopenharmony_ci return 0; 25678c2ecf20Sopenharmony_ci} 25688c2ecf20Sopenharmony_ci 25698c2ecf20Sopenharmony_cistatic void nvme_free_tagset(struct nvme_dev *dev) 25708c2ecf20Sopenharmony_ci{ 25718c2ecf20Sopenharmony_ci if (dev->tagset.tags) 25728c2ecf20Sopenharmony_ci blk_mq_free_tag_set(&dev->tagset); 25738c2ecf20Sopenharmony_ci dev->ctrl.tagset = NULL; 25748c2ecf20Sopenharmony_ci} 25758c2ecf20Sopenharmony_ci 25768c2ecf20Sopenharmony_ci/* pairs with nvme_pci_alloc_dev */ 25778c2ecf20Sopenharmony_cistatic void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) 25788c2ecf20Sopenharmony_ci{ 25798c2ecf20Sopenharmony_ci struct nvme_dev *dev = to_nvme_dev(ctrl); 25808c2ecf20Sopenharmony_ci 25818c2ecf20Sopenharmony_ci nvme_dbbuf_dma_free(dev); 25828c2ecf20Sopenharmony_ci nvme_free_tagset(dev); 25838c2ecf20Sopenharmony_ci if (dev->ctrl.admin_q) 25848c2ecf20Sopenharmony_ci blk_put_queue(dev->ctrl.admin_q); 25858c2ecf20Sopenharmony_ci free_opal_dev(dev->ctrl.opal_dev); 25868c2ecf20Sopenharmony_ci mempool_destroy(dev->iod_mempool); 25878c2ecf20Sopenharmony_ci put_device(dev->dev); 25888c2ecf20Sopenharmony_ci kfree(dev->queues); 25898c2ecf20Sopenharmony_ci kfree(dev); 25908c2ecf20Sopenharmony_ci} 25918c2ecf20Sopenharmony_ci 25928c2ecf20Sopenharmony_cistatic void nvme_remove_dead_ctrl(struct nvme_dev *dev) 25938c2ecf20Sopenharmony_ci{ 25948c2ecf20Sopenharmony_ci /* 25958c2ecf20Sopenharmony_ci * Set state to deleting now to avoid blocking nvme_wait_reset(), which 25968c2ecf20Sopenharmony_ci * may be holding this pci_dev's device lock. 25978c2ecf20Sopenharmony_ci */ 25988c2ecf20Sopenharmony_ci nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); 25998c2ecf20Sopenharmony_ci nvme_get_ctrl(&dev->ctrl); 26008c2ecf20Sopenharmony_ci nvme_dev_disable(dev, false); 26018c2ecf20Sopenharmony_ci nvme_kill_queues(&dev->ctrl); 26028c2ecf20Sopenharmony_ci if (!queue_work(nvme_wq, &dev->remove_work)) 26038c2ecf20Sopenharmony_ci nvme_put_ctrl(&dev->ctrl); 26048c2ecf20Sopenharmony_ci} 26058c2ecf20Sopenharmony_ci 26068c2ecf20Sopenharmony_cistatic void nvme_reset_work(struct work_struct *work) 26078c2ecf20Sopenharmony_ci{ 26088c2ecf20Sopenharmony_ci struct nvme_dev *dev = 26098c2ecf20Sopenharmony_ci container_of(work, struct nvme_dev, ctrl.reset_work); 26108c2ecf20Sopenharmony_ci bool was_suspend = !!(dev->ctrl.ctrl_config & NVME_CC_SHN_NORMAL); 26118c2ecf20Sopenharmony_ci int result; 26128c2ecf20Sopenharmony_ci 26138c2ecf20Sopenharmony_ci if (dev->ctrl.state != NVME_CTRL_RESETTING) { 26148c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, "ctrl state %d is not RESETTING\n", 26158c2ecf20Sopenharmony_ci dev->ctrl.state); 26168c2ecf20Sopenharmony_ci result = -ENODEV; 26178c2ecf20Sopenharmony_ci goto out; 26188c2ecf20Sopenharmony_ci } 26198c2ecf20Sopenharmony_ci 26208c2ecf20Sopenharmony_ci /* 26218c2ecf20Sopenharmony_ci * If we're called to reset a live controller first shut it down before 26228c2ecf20Sopenharmony_ci * moving on. 26238c2ecf20Sopenharmony_ci */ 26248c2ecf20Sopenharmony_ci if (dev->ctrl.ctrl_config & NVME_CC_ENABLE) 26258c2ecf20Sopenharmony_ci nvme_dev_disable(dev, false); 26268c2ecf20Sopenharmony_ci nvme_sync_queues(&dev->ctrl); 26278c2ecf20Sopenharmony_ci 26288c2ecf20Sopenharmony_ci mutex_lock(&dev->shutdown_lock); 26298c2ecf20Sopenharmony_ci result = nvme_pci_enable(dev); 26308c2ecf20Sopenharmony_ci if (result) 26318c2ecf20Sopenharmony_ci goto out_unlock; 26328c2ecf20Sopenharmony_ci 26338c2ecf20Sopenharmony_ci result = nvme_pci_configure_admin_queue(dev); 26348c2ecf20Sopenharmony_ci if (result) 26358c2ecf20Sopenharmony_ci goto out_unlock; 26368c2ecf20Sopenharmony_ci 26378c2ecf20Sopenharmony_ci result = nvme_alloc_admin_tags(dev); 26388c2ecf20Sopenharmony_ci if (result) 26398c2ecf20Sopenharmony_ci goto out_unlock; 26408c2ecf20Sopenharmony_ci 26418c2ecf20Sopenharmony_ci dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1); 26428c2ecf20Sopenharmony_ci 26438c2ecf20Sopenharmony_ci /* 26448c2ecf20Sopenharmony_ci * Limit the max command size to prevent iod->sg allocations going 26458c2ecf20Sopenharmony_ci * over a single page. 26468c2ecf20Sopenharmony_ci */ 26478c2ecf20Sopenharmony_ci dev->ctrl.max_hw_sectors = min_t(u32, 26488c2ecf20Sopenharmony_ci NVME_MAX_KB_SZ << 1, dma_max_mapping_size(dev->dev) >> 9); 26498c2ecf20Sopenharmony_ci dev->ctrl.max_segments = NVME_MAX_SEGS; 26508c2ecf20Sopenharmony_ci 26518c2ecf20Sopenharmony_ci /* 26528c2ecf20Sopenharmony_ci * Don't limit the IOMMU merged segment size. 26538c2ecf20Sopenharmony_ci */ 26548c2ecf20Sopenharmony_ci dma_set_max_seg_size(dev->dev, 0xffffffff); 26558c2ecf20Sopenharmony_ci 26568c2ecf20Sopenharmony_ci mutex_unlock(&dev->shutdown_lock); 26578c2ecf20Sopenharmony_ci 26588c2ecf20Sopenharmony_ci /* 26598c2ecf20Sopenharmony_ci * Introduce CONNECTING state from nvme-fc/rdma transports to mark the 26608c2ecf20Sopenharmony_ci * initializing procedure here. 26618c2ecf20Sopenharmony_ci */ 26628c2ecf20Sopenharmony_ci if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) { 26638c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 26648c2ecf20Sopenharmony_ci "failed to mark controller CONNECTING\n"); 26658c2ecf20Sopenharmony_ci result = -EBUSY; 26668c2ecf20Sopenharmony_ci goto out; 26678c2ecf20Sopenharmony_ci } 26688c2ecf20Sopenharmony_ci 26698c2ecf20Sopenharmony_ci /* 26708c2ecf20Sopenharmony_ci * We do not support an SGL for metadata (yet), so we are limited to a 26718c2ecf20Sopenharmony_ci * single integrity segment for the separate metadata pointer. 26728c2ecf20Sopenharmony_ci */ 26738c2ecf20Sopenharmony_ci dev->ctrl.max_integrity_segments = 1; 26748c2ecf20Sopenharmony_ci 26758c2ecf20Sopenharmony_ci result = nvme_init_identify(&dev->ctrl); 26768c2ecf20Sopenharmony_ci if (result) 26778c2ecf20Sopenharmony_ci goto out; 26788c2ecf20Sopenharmony_ci 26798c2ecf20Sopenharmony_ci if (dev->ctrl.oacs & NVME_CTRL_OACS_SEC_SUPP) { 26808c2ecf20Sopenharmony_ci if (!dev->ctrl.opal_dev) 26818c2ecf20Sopenharmony_ci dev->ctrl.opal_dev = 26828c2ecf20Sopenharmony_ci init_opal_dev(&dev->ctrl, &nvme_sec_submit); 26838c2ecf20Sopenharmony_ci else if (was_suspend) 26848c2ecf20Sopenharmony_ci opal_unlock_from_suspend(dev->ctrl.opal_dev); 26858c2ecf20Sopenharmony_ci } else { 26868c2ecf20Sopenharmony_ci free_opal_dev(dev->ctrl.opal_dev); 26878c2ecf20Sopenharmony_ci dev->ctrl.opal_dev = NULL; 26888c2ecf20Sopenharmony_ci } 26898c2ecf20Sopenharmony_ci 26908c2ecf20Sopenharmony_ci if (dev->ctrl.oacs & NVME_CTRL_OACS_DBBUF_SUPP) { 26918c2ecf20Sopenharmony_ci result = nvme_dbbuf_dma_alloc(dev); 26928c2ecf20Sopenharmony_ci if (result) 26938c2ecf20Sopenharmony_ci dev_warn(dev->dev, 26948c2ecf20Sopenharmony_ci "unable to allocate dma for dbbuf\n"); 26958c2ecf20Sopenharmony_ci } 26968c2ecf20Sopenharmony_ci 26978c2ecf20Sopenharmony_ci if (dev->ctrl.hmpre) { 26988c2ecf20Sopenharmony_ci result = nvme_setup_host_mem(dev); 26998c2ecf20Sopenharmony_ci if (result < 0) 27008c2ecf20Sopenharmony_ci goto out; 27018c2ecf20Sopenharmony_ci } 27028c2ecf20Sopenharmony_ci 27038c2ecf20Sopenharmony_ci result = nvme_setup_io_queues(dev); 27048c2ecf20Sopenharmony_ci if (result) 27058c2ecf20Sopenharmony_ci goto out; 27068c2ecf20Sopenharmony_ci 27078c2ecf20Sopenharmony_ci /* 27088c2ecf20Sopenharmony_ci * Keep the controller around but remove all namespaces if we don't have 27098c2ecf20Sopenharmony_ci * any working I/O queue. 27108c2ecf20Sopenharmony_ci */ 27118c2ecf20Sopenharmony_ci if (dev->online_queues < 2) { 27128c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, "IO queues not created\n"); 27138c2ecf20Sopenharmony_ci nvme_kill_queues(&dev->ctrl); 27148c2ecf20Sopenharmony_ci nvme_remove_namespaces(&dev->ctrl); 27158c2ecf20Sopenharmony_ci nvme_free_tagset(dev); 27168c2ecf20Sopenharmony_ci } else { 27178c2ecf20Sopenharmony_ci nvme_start_queues(&dev->ctrl); 27188c2ecf20Sopenharmony_ci nvme_wait_freeze(&dev->ctrl); 27198c2ecf20Sopenharmony_ci nvme_dev_add(dev); 27208c2ecf20Sopenharmony_ci nvme_unfreeze(&dev->ctrl); 27218c2ecf20Sopenharmony_ci } 27228c2ecf20Sopenharmony_ci 27238c2ecf20Sopenharmony_ci /* 27248c2ecf20Sopenharmony_ci * If only admin queue live, keep it to do further investigation or 27258c2ecf20Sopenharmony_ci * recovery. 27268c2ecf20Sopenharmony_ci */ 27278c2ecf20Sopenharmony_ci if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_LIVE)) { 27288c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 27298c2ecf20Sopenharmony_ci "failed to mark controller live state\n"); 27308c2ecf20Sopenharmony_ci result = -ENODEV; 27318c2ecf20Sopenharmony_ci goto out; 27328c2ecf20Sopenharmony_ci } 27338c2ecf20Sopenharmony_ci 27348c2ecf20Sopenharmony_ci nvme_start_ctrl(&dev->ctrl); 27358c2ecf20Sopenharmony_ci return; 27368c2ecf20Sopenharmony_ci 27378c2ecf20Sopenharmony_ci out_unlock: 27388c2ecf20Sopenharmony_ci mutex_unlock(&dev->shutdown_lock); 27398c2ecf20Sopenharmony_ci out: 27408c2ecf20Sopenharmony_ci if (result) 27418c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 27428c2ecf20Sopenharmony_ci "Removing after probe failure status: %d\n", result); 27438c2ecf20Sopenharmony_ci nvme_remove_dead_ctrl(dev); 27448c2ecf20Sopenharmony_ci} 27458c2ecf20Sopenharmony_ci 27468c2ecf20Sopenharmony_cistatic void nvme_remove_dead_ctrl_work(struct work_struct *work) 27478c2ecf20Sopenharmony_ci{ 27488c2ecf20Sopenharmony_ci struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); 27498c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 27508c2ecf20Sopenharmony_ci 27518c2ecf20Sopenharmony_ci if (pci_get_drvdata(pdev)) 27528c2ecf20Sopenharmony_ci device_release_driver(&pdev->dev); 27538c2ecf20Sopenharmony_ci nvme_put_ctrl(&dev->ctrl); 27548c2ecf20Sopenharmony_ci} 27558c2ecf20Sopenharmony_ci 27568c2ecf20Sopenharmony_cistatic int nvme_pci_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val) 27578c2ecf20Sopenharmony_ci{ 27588c2ecf20Sopenharmony_ci *val = readl(to_nvme_dev(ctrl)->bar + off); 27598c2ecf20Sopenharmony_ci return 0; 27608c2ecf20Sopenharmony_ci} 27618c2ecf20Sopenharmony_ci 27628c2ecf20Sopenharmony_cistatic int nvme_pci_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val) 27638c2ecf20Sopenharmony_ci{ 27648c2ecf20Sopenharmony_ci writel(val, to_nvme_dev(ctrl)->bar + off); 27658c2ecf20Sopenharmony_ci return 0; 27668c2ecf20Sopenharmony_ci} 27678c2ecf20Sopenharmony_ci 27688c2ecf20Sopenharmony_cistatic int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) 27698c2ecf20Sopenharmony_ci{ 27708c2ecf20Sopenharmony_ci *val = lo_hi_readq(to_nvme_dev(ctrl)->bar + off); 27718c2ecf20Sopenharmony_ci return 0; 27728c2ecf20Sopenharmony_ci} 27738c2ecf20Sopenharmony_ci 27748c2ecf20Sopenharmony_cistatic int nvme_pci_get_address(struct nvme_ctrl *ctrl, char *buf, int size) 27758c2ecf20Sopenharmony_ci{ 27768c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev); 27778c2ecf20Sopenharmony_ci 27788c2ecf20Sopenharmony_ci return snprintf(buf, size, "%s\n", dev_name(&pdev->dev)); 27798c2ecf20Sopenharmony_ci} 27808c2ecf20Sopenharmony_ci 27818c2ecf20Sopenharmony_cistatic const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { 27828c2ecf20Sopenharmony_ci .name = "pcie", 27838c2ecf20Sopenharmony_ci .module = THIS_MODULE, 27848c2ecf20Sopenharmony_ci .flags = NVME_F_METADATA_SUPPORTED | 27858c2ecf20Sopenharmony_ci NVME_F_PCI_P2PDMA, 27868c2ecf20Sopenharmony_ci .reg_read32 = nvme_pci_reg_read32, 27878c2ecf20Sopenharmony_ci .reg_write32 = nvme_pci_reg_write32, 27888c2ecf20Sopenharmony_ci .reg_read64 = nvme_pci_reg_read64, 27898c2ecf20Sopenharmony_ci .free_ctrl = nvme_pci_free_ctrl, 27908c2ecf20Sopenharmony_ci .submit_async_event = nvme_pci_submit_async_event, 27918c2ecf20Sopenharmony_ci .get_address = nvme_pci_get_address, 27928c2ecf20Sopenharmony_ci}; 27938c2ecf20Sopenharmony_ci 27948c2ecf20Sopenharmony_cistatic int nvme_dev_map(struct nvme_dev *dev) 27958c2ecf20Sopenharmony_ci{ 27968c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev->dev); 27978c2ecf20Sopenharmony_ci 27988c2ecf20Sopenharmony_ci if (pci_request_mem_regions(pdev, "nvme")) 27998c2ecf20Sopenharmony_ci return -ENODEV; 28008c2ecf20Sopenharmony_ci 28018c2ecf20Sopenharmony_ci if (nvme_remap_bar(dev, NVME_REG_DBS + 4096)) 28028c2ecf20Sopenharmony_ci goto release; 28038c2ecf20Sopenharmony_ci 28048c2ecf20Sopenharmony_ci return 0; 28058c2ecf20Sopenharmony_ci release: 28068c2ecf20Sopenharmony_ci pci_release_mem_regions(pdev); 28078c2ecf20Sopenharmony_ci return -ENODEV; 28088c2ecf20Sopenharmony_ci} 28098c2ecf20Sopenharmony_ci 28108c2ecf20Sopenharmony_cistatic unsigned long check_vendor_combination_bug(struct pci_dev *pdev) 28118c2ecf20Sopenharmony_ci{ 28128c2ecf20Sopenharmony_ci if (pdev->vendor == 0x144d && pdev->device == 0xa802) { 28138c2ecf20Sopenharmony_ci /* 28148c2ecf20Sopenharmony_ci * Several Samsung devices seem to drop off the PCIe bus 28158c2ecf20Sopenharmony_ci * randomly when APST is on and uses the deepest sleep state. 28168c2ecf20Sopenharmony_ci * This has been observed on a Samsung "SM951 NVMe SAMSUNG 28178c2ecf20Sopenharmony_ci * 256GB", a "PM951 NVMe SAMSUNG 512GB", and a "Samsung SSD 28188c2ecf20Sopenharmony_ci * 950 PRO 256GB", but it seems to be restricted to two Dell 28198c2ecf20Sopenharmony_ci * laptops. 28208c2ecf20Sopenharmony_ci */ 28218c2ecf20Sopenharmony_ci if (dmi_match(DMI_SYS_VENDOR, "Dell Inc.") && 28228c2ecf20Sopenharmony_ci (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || 28238c2ecf20Sopenharmony_ci dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) 28248c2ecf20Sopenharmony_ci return NVME_QUIRK_NO_DEEPEST_PS; 28258c2ecf20Sopenharmony_ci } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { 28268c2ecf20Sopenharmony_ci /* 28278c2ecf20Sopenharmony_ci * Samsung SSD 960 EVO drops off the PCIe bus after system 28288c2ecf20Sopenharmony_ci * suspend on a Ryzen board, ASUS PRIME B350M-A, as well as 28298c2ecf20Sopenharmony_ci * within few minutes after bootup on a Coffee Lake board - 28308c2ecf20Sopenharmony_ci * ASUS PRIME Z370-A 28318c2ecf20Sopenharmony_ci */ 28328c2ecf20Sopenharmony_ci if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && 28338c2ecf20Sopenharmony_ci (dmi_match(DMI_BOARD_NAME, "PRIME B350M-A") || 28348c2ecf20Sopenharmony_ci dmi_match(DMI_BOARD_NAME, "PRIME Z370-A"))) 28358c2ecf20Sopenharmony_ci return NVME_QUIRK_NO_APST; 28368c2ecf20Sopenharmony_ci } else if ((pdev->vendor == 0x144d && (pdev->device == 0xa801 || 28378c2ecf20Sopenharmony_ci pdev->device == 0xa808 || pdev->device == 0xa809)) || 28388c2ecf20Sopenharmony_ci (pdev->vendor == 0x1e0f && pdev->device == 0x0001)) { 28398c2ecf20Sopenharmony_ci /* 28408c2ecf20Sopenharmony_ci * Forcing to use host managed nvme power settings for 28418c2ecf20Sopenharmony_ci * lowest idle power with quick resume latency on 28428c2ecf20Sopenharmony_ci * Samsung and Toshiba SSDs based on suspend behavior 28438c2ecf20Sopenharmony_ci * on Coffee Lake board for LENOVO C640 28448c2ecf20Sopenharmony_ci */ 28458c2ecf20Sopenharmony_ci if ((dmi_match(DMI_BOARD_VENDOR, "LENOVO")) && 28468c2ecf20Sopenharmony_ci dmi_match(DMI_BOARD_NAME, "LNVNB161216")) 28478c2ecf20Sopenharmony_ci return NVME_QUIRK_SIMPLE_SUSPEND; 28488c2ecf20Sopenharmony_ci } 28498c2ecf20Sopenharmony_ci 28508c2ecf20Sopenharmony_ci return 0; 28518c2ecf20Sopenharmony_ci} 28528c2ecf20Sopenharmony_ci 28538c2ecf20Sopenharmony_cistatic void nvme_async_probe(void *data, async_cookie_t cookie) 28548c2ecf20Sopenharmony_ci{ 28558c2ecf20Sopenharmony_ci struct nvme_dev *dev = data; 28568c2ecf20Sopenharmony_ci 28578c2ecf20Sopenharmony_ci flush_work(&dev->ctrl.reset_work); 28588c2ecf20Sopenharmony_ci flush_work(&dev->ctrl.scan_work); 28598c2ecf20Sopenharmony_ci nvme_put_ctrl(&dev->ctrl); 28608c2ecf20Sopenharmony_ci} 28618c2ecf20Sopenharmony_ci 28628c2ecf20Sopenharmony_cistatic struct nvme_dev *nvme_pci_alloc_dev(struct pci_dev *pdev, 28638c2ecf20Sopenharmony_ci const struct pci_device_id *id) 28648c2ecf20Sopenharmony_ci{ 28658c2ecf20Sopenharmony_ci unsigned long quirks = id->driver_data; 28668c2ecf20Sopenharmony_ci int node = dev_to_node(&pdev->dev); 28678c2ecf20Sopenharmony_ci struct nvme_dev *dev; 28688c2ecf20Sopenharmony_ci int ret = -ENOMEM; 28698c2ecf20Sopenharmony_ci 28708c2ecf20Sopenharmony_ci dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); 28718c2ecf20Sopenharmony_ci if (!dev) 28728c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 28738c2ecf20Sopenharmony_ci INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work); 28748c2ecf20Sopenharmony_ci INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work); 28758c2ecf20Sopenharmony_ci mutex_init(&dev->shutdown_lock); 28768c2ecf20Sopenharmony_ci 28778c2ecf20Sopenharmony_ci dev->nr_write_queues = write_queues; 28788c2ecf20Sopenharmony_ci dev->nr_poll_queues = poll_queues; 28798c2ecf20Sopenharmony_ci dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1; 28808c2ecf20Sopenharmony_ci dev->queues = kcalloc_node(dev->nr_allocated_queues, 28818c2ecf20Sopenharmony_ci sizeof(struct nvme_queue), GFP_KERNEL, node); 28828c2ecf20Sopenharmony_ci if (!dev->queues) 28838c2ecf20Sopenharmony_ci goto out_free_dev; 28848c2ecf20Sopenharmony_ci 28858c2ecf20Sopenharmony_ci dev->dev = get_device(&pdev->dev); 28868c2ecf20Sopenharmony_ci 28878c2ecf20Sopenharmony_ci quirks |= check_vendor_combination_bug(pdev); 28888c2ecf20Sopenharmony_ci if (!noacpi && acpi_storage_d3(&pdev->dev)) { 28898c2ecf20Sopenharmony_ci /* 28908c2ecf20Sopenharmony_ci * Some systems use a bios work around to ask for D3 on 28918c2ecf20Sopenharmony_ci * platforms that support kernel managed suspend. 28928c2ecf20Sopenharmony_ci */ 28938c2ecf20Sopenharmony_ci dev_info(&pdev->dev, 28948c2ecf20Sopenharmony_ci "platform quirk: setting simple suspend\n"); 28958c2ecf20Sopenharmony_ci quirks |= NVME_QUIRK_SIMPLE_SUSPEND; 28968c2ecf20Sopenharmony_ci } 28978c2ecf20Sopenharmony_ci ret = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, 28988c2ecf20Sopenharmony_ci quirks); 28998c2ecf20Sopenharmony_ci if (ret) 29008c2ecf20Sopenharmony_ci goto out_put_device; 29018c2ecf20Sopenharmony_ci return dev; 29028c2ecf20Sopenharmony_ci 29038c2ecf20Sopenharmony_ciout_put_device: 29048c2ecf20Sopenharmony_ci put_device(dev->dev); 29058c2ecf20Sopenharmony_ci kfree(dev->queues); 29068c2ecf20Sopenharmony_ciout_free_dev: 29078c2ecf20Sopenharmony_ci kfree(dev); 29088c2ecf20Sopenharmony_ci return ERR_PTR(ret); 29098c2ecf20Sopenharmony_ci} 29108c2ecf20Sopenharmony_ci 29118c2ecf20Sopenharmony_cistatic int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) 29128c2ecf20Sopenharmony_ci{ 29138c2ecf20Sopenharmony_ci struct nvme_dev *dev; 29148c2ecf20Sopenharmony_ci int result = -ENOMEM; 29158c2ecf20Sopenharmony_ci 29168c2ecf20Sopenharmony_ci dev = nvme_pci_alloc_dev(pdev, id); 29178c2ecf20Sopenharmony_ci if (IS_ERR(dev)) 29188c2ecf20Sopenharmony_ci return PTR_ERR(dev); 29198c2ecf20Sopenharmony_ci 29208c2ecf20Sopenharmony_ci result = nvme_dev_map(dev); 29218c2ecf20Sopenharmony_ci if (result) 29228c2ecf20Sopenharmony_ci goto out_uninit_ctrl; 29238c2ecf20Sopenharmony_ci 29248c2ecf20Sopenharmony_ci result = nvme_setup_prp_pools(dev); 29258c2ecf20Sopenharmony_ci if (result) 29268c2ecf20Sopenharmony_ci goto out_dev_unmap; 29278c2ecf20Sopenharmony_ci 29288c2ecf20Sopenharmony_ci result = nvme_pci_alloc_iod_mempool(dev); 29298c2ecf20Sopenharmony_ci if (result) 29308c2ecf20Sopenharmony_ci goto out_release_prp_pools; 29318c2ecf20Sopenharmony_ci 29328c2ecf20Sopenharmony_ci dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); 29338c2ecf20Sopenharmony_ci pci_set_drvdata(pdev, dev); 29348c2ecf20Sopenharmony_ci 29358c2ecf20Sopenharmony_ci nvme_reset_ctrl(&dev->ctrl); 29368c2ecf20Sopenharmony_ci async_schedule(nvme_async_probe, dev); 29378c2ecf20Sopenharmony_ci return 0; 29388c2ecf20Sopenharmony_ci 29398c2ecf20Sopenharmony_ciout_release_prp_pools: 29408c2ecf20Sopenharmony_ci nvme_release_prp_pools(dev); 29418c2ecf20Sopenharmony_ciout_dev_unmap: 29428c2ecf20Sopenharmony_ci nvme_dev_unmap(dev); 29438c2ecf20Sopenharmony_ciout_uninit_ctrl: 29448c2ecf20Sopenharmony_ci nvme_uninit_ctrl(&dev->ctrl); 29458c2ecf20Sopenharmony_ci return result; 29468c2ecf20Sopenharmony_ci} 29478c2ecf20Sopenharmony_ci 29488c2ecf20Sopenharmony_cistatic void nvme_reset_prepare(struct pci_dev *pdev) 29498c2ecf20Sopenharmony_ci{ 29508c2ecf20Sopenharmony_ci struct nvme_dev *dev = pci_get_drvdata(pdev); 29518c2ecf20Sopenharmony_ci 29528c2ecf20Sopenharmony_ci /* 29538c2ecf20Sopenharmony_ci * We don't need to check the return value from waiting for the reset 29548c2ecf20Sopenharmony_ci * state as pci_dev device lock is held, making it impossible to race 29558c2ecf20Sopenharmony_ci * with ->remove(). 29568c2ecf20Sopenharmony_ci */ 29578c2ecf20Sopenharmony_ci nvme_disable_prepare_reset(dev, false); 29588c2ecf20Sopenharmony_ci nvme_sync_queues(&dev->ctrl); 29598c2ecf20Sopenharmony_ci} 29608c2ecf20Sopenharmony_ci 29618c2ecf20Sopenharmony_cistatic void nvme_reset_done(struct pci_dev *pdev) 29628c2ecf20Sopenharmony_ci{ 29638c2ecf20Sopenharmony_ci struct nvme_dev *dev = pci_get_drvdata(pdev); 29648c2ecf20Sopenharmony_ci 29658c2ecf20Sopenharmony_ci if (!nvme_try_sched_reset(&dev->ctrl)) 29668c2ecf20Sopenharmony_ci flush_work(&dev->ctrl.reset_work); 29678c2ecf20Sopenharmony_ci} 29688c2ecf20Sopenharmony_ci 29698c2ecf20Sopenharmony_cistatic void nvme_shutdown(struct pci_dev *pdev) 29708c2ecf20Sopenharmony_ci{ 29718c2ecf20Sopenharmony_ci struct nvme_dev *dev = pci_get_drvdata(pdev); 29728c2ecf20Sopenharmony_ci 29738c2ecf20Sopenharmony_ci nvme_disable_prepare_reset(dev, true); 29748c2ecf20Sopenharmony_ci} 29758c2ecf20Sopenharmony_ci 29768c2ecf20Sopenharmony_ci/* 29778c2ecf20Sopenharmony_ci * The driver's remove may be called on a device in a partially initialized 29788c2ecf20Sopenharmony_ci * state. This function must not have any dependencies on the device state in 29798c2ecf20Sopenharmony_ci * order to proceed. 29808c2ecf20Sopenharmony_ci */ 29818c2ecf20Sopenharmony_cistatic void nvme_remove(struct pci_dev *pdev) 29828c2ecf20Sopenharmony_ci{ 29838c2ecf20Sopenharmony_ci struct nvme_dev *dev = pci_get_drvdata(pdev); 29848c2ecf20Sopenharmony_ci 29858c2ecf20Sopenharmony_ci nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DELETING); 29868c2ecf20Sopenharmony_ci pci_set_drvdata(pdev, NULL); 29878c2ecf20Sopenharmony_ci 29888c2ecf20Sopenharmony_ci if (!pci_device_is_present(pdev)) { 29898c2ecf20Sopenharmony_ci nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_DEAD); 29908c2ecf20Sopenharmony_ci nvme_dev_disable(dev, true); 29918c2ecf20Sopenharmony_ci } 29928c2ecf20Sopenharmony_ci 29938c2ecf20Sopenharmony_ci flush_work(&dev->ctrl.reset_work); 29948c2ecf20Sopenharmony_ci nvme_stop_ctrl(&dev->ctrl); 29958c2ecf20Sopenharmony_ci nvme_remove_namespaces(&dev->ctrl); 29968c2ecf20Sopenharmony_ci nvme_dev_disable(dev, true); 29978c2ecf20Sopenharmony_ci nvme_release_cmb(dev); 29988c2ecf20Sopenharmony_ci nvme_free_host_mem(dev); 29998c2ecf20Sopenharmony_ci nvme_dev_remove_admin(dev); 30008c2ecf20Sopenharmony_ci nvme_free_queues(dev, 0); 30018c2ecf20Sopenharmony_ci nvme_release_prp_pools(dev); 30028c2ecf20Sopenharmony_ci nvme_dev_unmap(dev); 30038c2ecf20Sopenharmony_ci nvme_uninit_ctrl(&dev->ctrl); 30048c2ecf20Sopenharmony_ci} 30058c2ecf20Sopenharmony_ci 30068c2ecf20Sopenharmony_ci#ifdef CONFIG_PM_SLEEP 30078c2ecf20Sopenharmony_cistatic int nvme_get_power_state(struct nvme_ctrl *ctrl, u32 *ps) 30088c2ecf20Sopenharmony_ci{ 30098c2ecf20Sopenharmony_ci return nvme_get_features(ctrl, NVME_FEAT_POWER_MGMT, 0, NULL, 0, ps); 30108c2ecf20Sopenharmony_ci} 30118c2ecf20Sopenharmony_ci 30128c2ecf20Sopenharmony_cistatic int nvme_set_power_state(struct nvme_ctrl *ctrl, u32 ps) 30138c2ecf20Sopenharmony_ci{ 30148c2ecf20Sopenharmony_ci return nvme_set_features(ctrl, NVME_FEAT_POWER_MGMT, ps, NULL, 0, NULL); 30158c2ecf20Sopenharmony_ci} 30168c2ecf20Sopenharmony_ci 30178c2ecf20Sopenharmony_cistatic int nvme_resume(struct device *dev) 30188c2ecf20Sopenharmony_ci{ 30198c2ecf20Sopenharmony_ci struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); 30208c2ecf20Sopenharmony_ci struct nvme_ctrl *ctrl = &ndev->ctrl; 30218c2ecf20Sopenharmony_ci 30228c2ecf20Sopenharmony_ci if (ndev->last_ps == U32_MAX || 30238c2ecf20Sopenharmony_ci nvme_set_power_state(ctrl, ndev->last_ps) != 0) 30248c2ecf20Sopenharmony_ci return nvme_try_sched_reset(&ndev->ctrl); 30258c2ecf20Sopenharmony_ci return 0; 30268c2ecf20Sopenharmony_ci} 30278c2ecf20Sopenharmony_ci 30288c2ecf20Sopenharmony_cistatic int nvme_suspend(struct device *dev) 30298c2ecf20Sopenharmony_ci{ 30308c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev); 30318c2ecf20Sopenharmony_ci struct nvme_dev *ndev = pci_get_drvdata(pdev); 30328c2ecf20Sopenharmony_ci struct nvme_ctrl *ctrl = &ndev->ctrl; 30338c2ecf20Sopenharmony_ci int ret = -EBUSY; 30348c2ecf20Sopenharmony_ci 30358c2ecf20Sopenharmony_ci ndev->last_ps = U32_MAX; 30368c2ecf20Sopenharmony_ci 30378c2ecf20Sopenharmony_ci /* 30388c2ecf20Sopenharmony_ci * The platform does not remove power for a kernel managed suspend so 30398c2ecf20Sopenharmony_ci * use host managed nvme power settings for lowest idle power if 30408c2ecf20Sopenharmony_ci * possible. This should have quicker resume latency than a full device 30418c2ecf20Sopenharmony_ci * shutdown. But if the firmware is involved after the suspend or the 30428c2ecf20Sopenharmony_ci * device does not support any non-default power states, shut down the 30438c2ecf20Sopenharmony_ci * device fully. 30448c2ecf20Sopenharmony_ci * 30458c2ecf20Sopenharmony_ci * If ASPM is not enabled for the device, shut down the device and allow 30468c2ecf20Sopenharmony_ci * the PCI bus layer to put it into D3 in order to take the PCIe link 30478c2ecf20Sopenharmony_ci * down, so as to allow the platform to achieve its minimum low-power 30488c2ecf20Sopenharmony_ci * state (which may not be possible if the link is up). 30498c2ecf20Sopenharmony_ci * 30508c2ecf20Sopenharmony_ci * If a host memory buffer is enabled, shut down the device as the NVMe 30518c2ecf20Sopenharmony_ci * specification allows the device to access the host memory buffer in 30528c2ecf20Sopenharmony_ci * host DRAM from all power states, but hosts will fail access to DRAM 30538c2ecf20Sopenharmony_ci * during S3. 30548c2ecf20Sopenharmony_ci */ 30558c2ecf20Sopenharmony_ci if (pm_suspend_via_firmware() || !ctrl->npss || 30568c2ecf20Sopenharmony_ci !pcie_aspm_enabled(pdev) || 30578c2ecf20Sopenharmony_ci ndev->nr_host_mem_descs || 30588c2ecf20Sopenharmony_ci (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) 30598c2ecf20Sopenharmony_ci return nvme_disable_prepare_reset(ndev, true); 30608c2ecf20Sopenharmony_ci 30618c2ecf20Sopenharmony_ci nvme_start_freeze(ctrl); 30628c2ecf20Sopenharmony_ci nvme_wait_freeze(ctrl); 30638c2ecf20Sopenharmony_ci nvme_sync_queues(ctrl); 30648c2ecf20Sopenharmony_ci 30658c2ecf20Sopenharmony_ci if (ctrl->state != NVME_CTRL_LIVE) 30668c2ecf20Sopenharmony_ci goto unfreeze; 30678c2ecf20Sopenharmony_ci 30688c2ecf20Sopenharmony_ci ret = nvme_get_power_state(ctrl, &ndev->last_ps); 30698c2ecf20Sopenharmony_ci if (ret < 0) 30708c2ecf20Sopenharmony_ci goto unfreeze; 30718c2ecf20Sopenharmony_ci 30728c2ecf20Sopenharmony_ci /* 30738c2ecf20Sopenharmony_ci * A saved state prevents pci pm from generically controlling the 30748c2ecf20Sopenharmony_ci * device's power. If we're using protocol specific settings, we don't 30758c2ecf20Sopenharmony_ci * want pci interfering. 30768c2ecf20Sopenharmony_ci */ 30778c2ecf20Sopenharmony_ci pci_save_state(pdev); 30788c2ecf20Sopenharmony_ci 30798c2ecf20Sopenharmony_ci ret = nvme_set_power_state(ctrl, ctrl->npss); 30808c2ecf20Sopenharmony_ci if (ret < 0) 30818c2ecf20Sopenharmony_ci goto unfreeze; 30828c2ecf20Sopenharmony_ci 30838c2ecf20Sopenharmony_ci if (ret) { 30848c2ecf20Sopenharmony_ci /* discard the saved state */ 30858c2ecf20Sopenharmony_ci pci_load_saved_state(pdev, NULL); 30868c2ecf20Sopenharmony_ci 30878c2ecf20Sopenharmony_ci /* 30888c2ecf20Sopenharmony_ci * Clearing npss forces a controller reset on resume. The 30898c2ecf20Sopenharmony_ci * correct value will be rediscovered then. 30908c2ecf20Sopenharmony_ci */ 30918c2ecf20Sopenharmony_ci ret = nvme_disable_prepare_reset(ndev, true); 30928c2ecf20Sopenharmony_ci ctrl->npss = 0; 30938c2ecf20Sopenharmony_ci } 30948c2ecf20Sopenharmony_ciunfreeze: 30958c2ecf20Sopenharmony_ci nvme_unfreeze(ctrl); 30968c2ecf20Sopenharmony_ci return ret; 30978c2ecf20Sopenharmony_ci} 30988c2ecf20Sopenharmony_ci 30998c2ecf20Sopenharmony_cistatic int nvme_simple_suspend(struct device *dev) 31008c2ecf20Sopenharmony_ci{ 31018c2ecf20Sopenharmony_ci struct nvme_dev *ndev = pci_get_drvdata(to_pci_dev(dev)); 31028c2ecf20Sopenharmony_ci 31038c2ecf20Sopenharmony_ci return nvme_disable_prepare_reset(ndev, true); 31048c2ecf20Sopenharmony_ci} 31058c2ecf20Sopenharmony_ci 31068c2ecf20Sopenharmony_cistatic int nvme_simple_resume(struct device *dev) 31078c2ecf20Sopenharmony_ci{ 31088c2ecf20Sopenharmony_ci struct pci_dev *pdev = to_pci_dev(dev); 31098c2ecf20Sopenharmony_ci struct nvme_dev *ndev = pci_get_drvdata(pdev); 31108c2ecf20Sopenharmony_ci 31118c2ecf20Sopenharmony_ci return nvme_try_sched_reset(&ndev->ctrl); 31128c2ecf20Sopenharmony_ci} 31138c2ecf20Sopenharmony_ci 31148c2ecf20Sopenharmony_cistatic const struct dev_pm_ops nvme_dev_pm_ops = { 31158c2ecf20Sopenharmony_ci .suspend = nvme_suspend, 31168c2ecf20Sopenharmony_ci .resume = nvme_resume, 31178c2ecf20Sopenharmony_ci .freeze = nvme_simple_suspend, 31188c2ecf20Sopenharmony_ci .thaw = nvme_simple_resume, 31198c2ecf20Sopenharmony_ci .poweroff = nvme_simple_suspend, 31208c2ecf20Sopenharmony_ci .restore = nvme_simple_resume, 31218c2ecf20Sopenharmony_ci}; 31228c2ecf20Sopenharmony_ci#endif /* CONFIG_PM_SLEEP */ 31238c2ecf20Sopenharmony_ci 31248c2ecf20Sopenharmony_cistatic pci_ers_result_t nvme_error_detected(struct pci_dev *pdev, 31258c2ecf20Sopenharmony_ci pci_channel_state_t state) 31268c2ecf20Sopenharmony_ci{ 31278c2ecf20Sopenharmony_ci struct nvme_dev *dev = pci_get_drvdata(pdev); 31288c2ecf20Sopenharmony_ci 31298c2ecf20Sopenharmony_ci /* 31308c2ecf20Sopenharmony_ci * A frozen channel requires a reset. When detected, this method will 31318c2ecf20Sopenharmony_ci * shutdown the controller to quiesce. The controller will be restarted 31328c2ecf20Sopenharmony_ci * after the slot reset through driver's slot_reset callback. 31338c2ecf20Sopenharmony_ci */ 31348c2ecf20Sopenharmony_ci switch (state) { 31358c2ecf20Sopenharmony_ci case pci_channel_io_normal: 31368c2ecf20Sopenharmony_ci return PCI_ERS_RESULT_CAN_RECOVER; 31378c2ecf20Sopenharmony_ci case pci_channel_io_frozen: 31388c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 31398c2ecf20Sopenharmony_ci "frozen state error detected, reset controller\n"); 31408c2ecf20Sopenharmony_ci nvme_dev_disable(dev, false); 31418c2ecf20Sopenharmony_ci return PCI_ERS_RESULT_NEED_RESET; 31428c2ecf20Sopenharmony_ci case pci_channel_io_perm_failure: 31438c2ecf20Sopenharmony_ci dev_warn(dev->ctrl.device, 31448c2ecf20Sopenharmony_ci "failure state error detected, request disconnect\n"); 31458c2ecf20Sopenharmony_ci return PCI_ERS_RESULT_DISCONNECT; 31468c2ecf20Sopenharmony_ci } 31478c2ecf20Sopenharmony_ci return PCI_ERS_RESULT_NEED_RESET; 31488c2ecf20Sopenharmony_ci} 31498c2ecf20Sopenharmony_ci 31508c2ecf20Sopenharmony_cistatic pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) 31518c2ecf20Sopenharmony_ci{ 31528c2ecf20Sopenharmony_ci struct nvme_dev *dev = pci_get_drvdata(pdev); 31538c2ecf20Sopenharmony_ci 31548c2ecf20Sopenharmony_ci dev_info(dev->ctrl.device, "restart after slot reset\n"); 31558c2ecf20Sopenharmony_ci pci_restore_state(pdev); 31568c2ecf20Sopenharmony_ci nvme_reset_ctrl(&dev->ctrl); 31578c2ecf20Sopenharmony_ci return PCI_ERS_RESULT_RECOVERED; 31588c2ecf20Sopenharmony_ci} 31598c2ecf20Sopenharmony_ci 31608c2ecf20Sopenharmony_cistatic void nvme_error_resume(struct pci_dev *pdev) 31618c2ecf20Sopenharmony_ci{ 31628c2ecf20Sopenharmony_ci struct nvme_dev *dev = pci_get_drvdata(pdev); 31638c2ecf20Sopenharmony_ci 31648c2ecf20Sopenharmony_ci flush_work(&dev->ctrl.reset_work); 31658c2ecf20Sopenharmony_ci} 31668c2ecf20Sopenharmony_ci 31678c2ecf20Sopenharmony_cistatic const struct pci_error_handlers nvme_err_handler = { 31688c2ecf20Sopenharmony_ci .error_detected = nvme_error_detected, 31698c2ecf20Sopenharmony_ci .slot_reset = nvme_slot_reset, 31708c2ecf20Sopenharmony_ci .resume = nvme_error_resume, 31718c2ecf20Sopenharmony_ci .reset_prepare = nvme_reset_prepare, 31728c2ecf20Sopenharmony_ci .reset_done = nvme_reset_done, 31738c2ecf20Sopenharmony_ci}; 31748c2ecf20Sopenharmony_ci 31758c2ecf20Sopenharmony_cistatic const struct pci_device_id nvme_id_table[] = { 31768c2ecf20Sopenharmony_ci { PCI_VDEVICE(INTEL, 0x0953), /* Intel 750/P3500/P3600/P3700 */ 31778c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_STRIPE_SIZE | 31788c2ecf20Sopenharmony_ci NVME_QUIRK_DEALLOCATE_ZEROES, }, 31798c2ecf20Sopenharmony_ci { PCI_VDEVICE(INTEL, 0x0a53), /* Intel P3520 */ 31808c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_STRIPE_SIZE | 31818c2ecf20Sopenharmony_ci NVME_QUIRK_DEALLOCATE_ZEROES, }, 31828c2ecf20Sopenharmony_ci { PCI_VDEVICE(INTEL, 0x0a54), /* Intel P4500/P4600 */ 31838c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_STRIPE_SIZE | 31848c2ecf20Sopenharmony_ci NVME_QUIRK_DEALLOCATE_ZEROES | 31858c2ecf20Sopenharmony_ci NVME_QUIRK_IGNORE_DEV_SUBNQN | 31868c2ecf20Sopenharmony_ci NVME_QUIRK_BOGUS_NID, }, 31878c2ecf20Sopenharmony_ci { PCI_VDEVICE(INTEL, 0x0a55), /* Dell Express Flash P4600 */ 31888c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_STRIPE_SIZE | 31898c2ecf20Sopenharmony_ci NVME_QUIRK_DEALLOCATE_ZEROES, }, 31908c2ecf20Sopenharmony_ci { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ 31918c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_NO_DEEPEST_PS | 31928c2ecf20Sopenharmony_ci NVME_QUIRK_MEDIUM_PRIO_SQ | 31938c2ecf20Sopenharmony_ci NVME_QUIRK_NO_TEMP_THRESH_CHANGE | 31948c2ecf20Sopenharmony_ci NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 31958c2ecf20Sopenharmony_ci { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ 31968c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, 31978c2ecf20Sopenharmony_ci { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ 31988c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_IDENTIFY_CNS | 31998c2ecf20Sopenharmony_ci NVME_QUIRK_DISABLE_WRITE_ZEROES | 32008c2ecf20Sopenharmony_ci NVME_QUIRK_BOGUS_NID, }, 32018c2ecf20Sopenharmony_ci { PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */ 32028c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_BOGUS_NID, }, 32038c2ecf20Sopenharmony_ci { PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */ 32048c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, }, 32058c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ 32068c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | 32078c2ecf20Sopenharmony_ci NVME_QUIRK_NO_NS_DESC_LIST, }, 32088c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ 32098c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 32108c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ 32118c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 32128c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ 32138c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 32148c2ecf20Sopenharmony_ci { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ 32158c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 32168c2ecf20Sopenharmony_ci { PCI_DEVICE(0x144d, 0xa822), /* Samsung PM1725a */ 32178c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY | 32188c2ecf20Sopenharmony_ci NVME_QUIRK_DISABLE_WRITE_ZEROES| 32198c2ecf20Sopenharmony_ci NVME_QUIRK_IGNORE_DEV_SUBNQN, }, 32208c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */ 32218c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | 32228c2ecf20Sopenharmony_ci NVME_QUIRK_BOGUS_NID, }, 32238c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */ 32248c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_NO_NS_DESC_LIST | 32258c2ecf20Sopenharmony_ci NVME_QUIRK_IGNORE_DEV_SUBNQN, }, 32268c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1d1d, 0x1f1f), /* LighNVM qemu device */ 32278c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_LIGHTNVM, }, 32288c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1d1d, 0x2807), /* CNEX WL */ 32298c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_LIGHTNVM, }, 32308c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */ 32318c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_LIGHTNVM, }, 32328c2ecf20Sopenharmony_ci { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */ 32338c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN | 32348c2ecf20Sopenharmony_ci NVME_QUIRK_BOGUS_NID, }, 32358c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */ 32368c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_NO_DEEPEST_PS | 32378c2ecf20Sopenharmony_ci NVME_QUIRK_IGNORE_DEV_SUBNQN, }, 32388c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */ 32398c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN }, 32408c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1344, 0x6001), /* Micron Nitro NVMe */ 32418c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_BOGUS_NID, }, 32428c2ecf20Sopenharmony_ci { PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */ 32438c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 32448c2ecf20Sopenharmony_ci { PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */ 32458c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, }, 32468c2ecf20Sopenharmony_ci { PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */ 32478c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, 32488c2ecf20Sopenharmony_ci { PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */ 32498c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_NO_DEEPEST_PS, }, 32508c2ecf20Sopenharmony_ci { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001), 32518c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_SINGLE_VECTOR }, 32528c2ecf20Sopenharmony_ci { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, 32538c2ecf20Sopenharmony_ci { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2005), 32548c2ecf20Sopenharmony_ci .driver_data = NVME_QUIRK_SINGLE_VECTOR | 32558c2ecf20Sopenharmony_ci NVME_QUIRK_128_BYTES_SQES | 32568c2ecf20Sopenharmony_ci NVME_QUIRK_SHARED_TAGS | 32578c2ecf20Sopenharmony_ci NVME_QUIRK_SKIP_CID_GEN }, 32588c2ecf20Sopenharmony_ci { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, 32598c2ecf20Sopenharmony_ci { 0, } 32608c2ecf20Sopenharmony_ci}; 32618c2ecf20Sopenharmony_ciMODULE_DEVICE_TABLE(pci, nvme_id_table); 32628c2ecf20Sopenharmony_ci 32638c2ecf20Sopenharmony_cistatic struct pci_driver nvme_driver = { 32648c2ecf20Sopenharmony_ci .name = "nvme", 32658c2ecf20Sopenharmony_ci .id_table = nvme_id_table, 32668c2ecf20Sopenharmony_ci .probe = nvme_probe, 32678c2ecf20Sopenharmony_ci .remove = nvme_remove, 32688c2ecf20Sopenharmony_ci .shutdown = nvme_shutdown, 32698c2ecf20Sopenharmony_ci#ifdef CONFIG_PM_SLEEP 32708c2ecf20Sopenharmony_ci .driver = { 32718c2ecf20Sopenharmony_ci .pm = &nvme_dev_pm_ops, 32728c2ecf20Sopenharmony_ci }, 32738c2ecf20Sopenharmony_ci#endif 32748c2ecf20Sopenharmony_ci .sriov_configure = pci_sriov_configure_simple, 32758c2ecf20Sopenharmony_ci .err_handler = &nvme_err_handler, 32768c2ecf20Sopenharmony_ci}; 32778c2ecf20Sopenharmony_ci 32788c2ecf20Sopenharmony_cistatic int __init nvme_init(void) 32798c2ecf20Sopenharmony_ci{ 32808c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); 32818c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); 32828c2ecf20Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); 32838c2ecf20Sopenharmony_ci BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); 32848c2ecf20Sopenharmony_ci 32858c2ecf20Sopenharmony_ci return pci_register_driver(&nvme_driver); 32868c2ecf20Sopenharmony_ci} 32878c2ecf20Sopenharmony_ci 32888c2ecf20Sopenharmony_cistatic void __exit nvme_exit(void) 32898c2ecf20Sopenharmony_ci{ 32908c2ecf20Sopenharmony_ci pci_unregister_driver(&nvme_driver); 32918c2ecf20Sopenharmony_ci flush_workqueue(nvme_wq); 32928c2ecf20Sopenharmony_ci} 32938c2ecf20Sopenharmony_ci 32948c2ecf20Sopenharmony_ciMODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>"); 32958c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 32968c2ecf20Sopenharmony_ciMODULE_VERSION("1.0"); 32978c2ecf20Sopenharmony_cimodule_init(nvme_init); 32988c2ecf20Sopenharmony_cimodule_exit(nvme_exit); 3299