162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * NVMe over Fabrics TCP host. 462306a36Sopenharmony_ci * Copyright (c) 2018 Lightbits Labs. All rights reserved. 562306a36Sopenharmony_ci */ 662306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 762306a36Sopenharmony_ci#include <linux/module.h> 862306a36Sopenharmony_ci#include <linux/init.h> 962306a36Sopenharmony_ci#include <linux/slab.h> 1062306a36Sopenharmony_ci#include <linux/err.h> 1162306a36Sopenharmony_ci#include <linux/nvme-tcp.h> 1262306a36Sopenharmony_ci#include <net/sock.h> 1362306a36Sopenharmony_ci#include <net/tcp.h> 1462306a36Sopenharmony_ci#include <linux/blk-mq.h> 1562306a36Sopenharmony_ci#include <crypto/hash.h> 1662306a36Sopenharmony_ci#include <net/busy_poll.h> 1762306a36Sopenharmony_ci#include <trace/events/sock.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include "nvme.h" 2062306a36Sopenharmony_ci#include "fabrics.h" 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cistruct nvme_tcp_queue; 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci/* Define the socket priority to use for connections were it is desirable 2562306a36Sopenharmony_ci * that the NIC consider performing optimized packet processing or filtering. 2662306a36Sopenharmony_ci * A non-zero value being sufficient to indicate general consideration of any 2762306a36Sopenharmony_ci * possible optimization. Making it a module param allows for alternative 2862306a36Sopenharmony_ci * values that may be unique for some NIC implementations. 2962306a36Sopenharmony_ci */ 3062306a36Sopenharmony_cistatic int so_priority; 3162306a36Sopenharmony_cimodule_param(so_priority, int, 0644); 3262306a36Sopenharmony_ciMODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority"); 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC 3562306a36Sopenharmony_ci/* lockdep can detect a circular dependency of the form 3662306a36Sopenharmony_ci * sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock 3762306a36Sopenharmony_ci * because dependencies are tracked for both nvme-tcp and user contexts. Using 3862306a36Sopenharmony_ci * a separate class prevents lockdep from conflating nvme-tcp socket use with 3962306a36Sopenharmony_ci * user-space socket API use. 4062306a36Sopenharmony_ci */ 4162306a36Sopenharmony_cistatic struct lock_class_key nvme_tcp_sk_key[2]; 4262306a36Sopenharmony_cistatic struct lock_class_key nvme_tcp_slock_key[2]; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_cistatic void nvme_tcp_reclassify_socket(struct socket *sock) 4562306a36Sopenharmony_ci{ 4662306a36Sopenharmony_ci struct sock *sk = sock->sk; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci if (WARN_ON_ONCE(!sock_allow_reclassification(sk))) 4962306a36Sopenharmony_ci return; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci switch (sk->sk_family) { 5262306a36Sopenharmony_ci case AF_INET: 5362306a36Sopenharmony_ci sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME", 5462306a36Sopenharmony_ci &nvme_tcp_slock_key[0], 5562306a36Sopenharmony_ci "sk_lock-AF_INET-NVME", 5662306a36Sopenharmony_ci &nvme_tcp_sk_key[0]); 5762306a36Sopenharmony_ci break; 5862306a36Sopenharmony_ci case AF_INET6: 5962306a36Sopenharmony_ci sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME", 6062306a36Sopenharmony_ci &nvme_tcp_slock_key[1], 6162306a36Sopenharmony_ci "sk_lock-AF_INET6-NVME", 6262306a36Sopenharmony_ci &nvme_tcp_sk_key[1]); 6362306a36Sopenharmony_ci break; 6462306a36Sopenharmony_ci default: 6562306a36Sopenharmony_ci WARN_ON_ONCE(1); 6662306a36Sopenharmony_ci } 6762306a36Sopenharmony_ci} 6862306a36Sopenharmony_ci#else 6962306a36Sopenharmony_cistatic void nvme_tcp_reclassify_socket(struct socket *sock) { } 7062306a36Sopenharmony_ci#endif 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_cienum nvme_tcp_send_state { 7362306a36Sopenharmony_ci NVME_TCP_SEND_CMD_PDU = 0, 7462306a36Sopenharmony_ci NVME_TCP_SEND_H2C_PDU, 7562306a36Sopenharmony_ci NVME_TCP_SEND_DATA, 7662306a36Sopenharmony_ci NVME_TCP_SEND_DDGST, 7762306a36Sopenharmony_ci}; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_cistruct nvme_tcp_request { 8062306a36Sopenharmony_ci struct nvme_request req; 8162306a36Sopenharmony_ci void *pdu; 8262306a36Sopenharmony_ci struct nvme_tcp_queue *queue; 8362306a36Sopenharmony_ci u32 data_len; 8462306a36Sopenharmony_ci u32 pdu_len; 8562306a36Sopenharmony_ci u32 pdu_sent; 8662306a36Sopenharmony_ci u32 h2cdata_left; 8762306a36Sopenharmony_ci u32 h2cdata_offset; 8862306a36Sopenharmony_ci u16 ttag; 8962306a36Sopenharmony_ci __le16 status; 9062306a36Sopenharmony_ci struct list_head entry; 9162306a36Sopenharmony_ci struct llist_node lentry; 9262306a36Sopenharmony_ci __le32 ddgst; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_ci struct bio *curr_bio; 9562306a36Sopenharmony_ci struct iov_iter iter; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci /* send state */ 9862306a36Sopenharmony_ci size_t offset; 9962306a36Sopenharmony_ci size_t data_sent; 10062306a36Sopenharmony_ci enum nvme_tcp_send_state state; 10162306a36Sopenharmony_ci}; 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_cienum nvme_tcp_queue_flags { 10462306a36Sopenharmony_ci NVME_TCP_Q_ALLOCATED = 0, 10562306a36Sopenharmony_ci NVME_TCP_Q_LIVE = 1, 10662306a36Sopenharmony_ci NVME_TCP_Q_POLLING = 2, 10762306a36Sopenharmony_ci}; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_cienum nvme_tcp_recv_state { 11062306a36Sopenharmony_ci NVME_TCP_RECV_PDU = 0, 11162306a36Sopenharmony_ci NVME_TCP_RECV_DATA, 11262306a36Sopenharmony_ci NVME_TCP_RECV_DDGST, 11362306a36Sopenharmony_ci}; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_cistruct nvme_tcp_ctrl; 11662306a36Sopenharmony_cistruct nvme_tcp_queue { 11762306a36Sopenharmony_ci struct socket *sock; 11862306a36Sopenharmony_ci struct work_struct io_work; 11962306a36Sopenharmony_ci int io_cpu; 12062306a36Sopenharmony_ci 12162306a36Sopenharmony_ci struct mutex queue_lock; 12262306a36Sopenharmony_ci struct mutex send_mutex; 12362306a36Sopenharmony_ci struct llist_head req_list; 12462306a36Sopenharmony_ci struct list_head send_list; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci /* recv state */ 12762306a36Sopenharmony_ci void *pdu; 12862306a36Sopenharmony_ci int pdu_remaining; 12962306a36Sopenharmony_ci int pdu_offset; 13062306a36Sopenharmony_ci size_t data_remaining; 13162306a36Sopenharmony_ci size_t ddgst_remaining; 13262306a36Sopenharmony_ci unsigned int nr_cqe; 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ci /* send state */ 13562306a36Sopenharmony_ci struct nvme_tcp_request *request; 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_ci u32 maxh2cdata; 13862306a36Sopenharmony_ci size_t cmnd_capsule_len; 13962306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl; 14062306a36Sopenharmony_ci unsigned long flags; 14162306a36Sopenharmony_ci bool rd_enabled; 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci bool hdr_digest; 14462306a36Sopenharmony_ci bool data_digest; 14562306a36Sopenharmony_ci struct ahash_request *rcv_hash; 14662306a36Sopenharmony_ci struct ahash_request *snd_hash; 14762306a36Sopenharmony_ci __le32 exp_ddgst; 14862306a36Sopenharmony_ci __le32 recv_ddgst; 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci struct page_frag_cache pf_cache; 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci void (*state_change)(struct sock *); 15362306a36Sopenharmony_ci void (*data_ready)(struct sock *); 15462306a36Sopenharmony_ci void (*write_space)(struct sock *); 15562306a36Sopenharmony_ci}; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_cistruct nvme_tcp_ctrl { 15862306a36Sopenharmony_ci /* read only in the hot path */ 15962306a36Sopenharmony_ci struct nvme_tcp_queue *queues; 16062306a36Sopenharmony_ci struct blk_mq_tag_set tag_set; 16162306a36Sopenharmony_ci 16262306a36Sopenharmony_ci /* other member variables */ 16362306a36Sopenharmony_ci struct list_head list; 16462306a36Sopenharmony_ci struct blk_mq_tag_set admin_tag_set; 16562306a36Sopenharmony_ci struct sockaddr_storage addr; 16662306a36Sopenharmony_ci struct sockaddr_storage src_addr; 16762306a36Sopenharmony_ci struct nvme_ctrl ctrl; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci struct work_struct err_work; 17062306a36Sopenharmony_ci struct delayed_work connect_work; 17162306a36Sopenharmony_ci struct nvme_tcp_request async_req; 17262306a36Sopenharmony_ci u32 io_queues[HCTX_MAX_TYPES]; 17362306a36Sopenharmony_ci}; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_cistatic LIST_HEAD(nvme_tcp_ctrl_list); 17662306a36Sopenharmony_cistatic DEFINE_MUTEX(nvme_tcp_ctrl_mutex); 17762306a36Sopenharmony_cistatic struct workqueue_struct *nvme_tcp_wq; 17862306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_tcp_mq_ops; 17962306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_tcp_admin_mq_ops; 18062306a36Sopenharmony_cistatic int nvme_tcp_try_send(struct nvme_tcp_queue *queue); 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_cistatic inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) 18362306a36Sopenharmony_ci{ 18462306a36Sopenharmony_ci return container_of(ctrl, struct nvme_tcp_ctrl, ctrl); 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_cistatic inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) 18862306a36Sopenharmony_ci{ 18962306a36Sopenharmony_ci return queue - queue->ctrl->queues; 19062306a36Sopenharmony_ci} 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_cistatic inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue) 19362306a36Sopenharmony_ci{ 19462306a36Sopenharmony_ci u32 queue_idx = nvme_tcp_queue_id(queue); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci if (queue_idx == 0) 19762306a36Sopenharmony_ci return queue->ctrl->admin_tag_set.tags[queue_idx]; 19862306a36Sopenharmony_ci return queue->ctrl->tag_set.tags[queue_idx - 1]; 19962306a36Sopenharmony_ci} 20062306a36Sopenharmony_ci 20162306a36Sopenharmony_cistatic inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0; 20462306a36Sopenharmony_ci} 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_cistatic inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue) 20762306a36Sopenharmony_ci{ 20862306a36Sopenharmony_ci return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0; 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_cistatic inline void *nvme_tcp_req_cmd_pdu(struct nvme_tcp_request *req) 21262306a36Sopenharmony_ci{ 21362306a36Sopenharmony_ci return req->pdu; 21462306a36Sopenharmony_ci} 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_cistatic inline void *nvme_tcp_req_data_pdu(struct nvme_tcp_request *req) 21762306a36Sopenharmony_ci{ 21862306a36Sopenharmony_ci /* use the pdu space in the back for the data pdu */ 21962306a36Sopenharmony_ci return req->pdu + sizeof(struct nvme_tcp_cmd_pdu) - 22062306a36Sopenharmony_ci sizeof(struct nvme_tcp_data_pdu); 22162306a36Sopenharmony_ci} 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_cistatic inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req) 22462306a36Sopenharmony_ci{ 22562306a36Sopenharmony_ci if (nvme_is_fabrics(req->req.cmd)) 22662306a36Sopenharmony_ci return NVME_TCP_ADMIN_CCSZ; 22762306a36Sopenharmony_ci return req->queue->cmnd_capsule_len - sizeof(struct nvme_command); 22862306a36Sopenharmony_ci} 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_cistatic inline bool nvme_tcp_async_req(struct nvme_tcp_request *req) 23162306a36Sopenharmony_ci{ 23262306a36Sopenharmony_ci return req == &req->queue->ctrl->async_req; 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_cistatic inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req) 23662306a36Sopenharmony_ci{ 23762306a36Sopenharmony_ci struct request *rq; 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_ci if (unlikely(nvme_tcp_async_req(req))) 24062306a36Sopenharmony_ci return false; /* async events don't have a request */ 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci rq = blk_mq_rq_from_pdu(req); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci return rq_data_dir(rq) == WRITE && req->data_len && 24562306a36Sopenharmony_ci req->data_len <= nvme_tcp_inline_data_size(req); 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_cistatic inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req) 24962306a36Sopenharmony_ci{ 25062306a36Sopenharmony_ci return req->iter.bvec->bv_page; 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_cistatic inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req) 25462306a36Sopenharmony_ci{ 25562306a36Sopenharmony_ci return req->iter.bvec->bv_offset + req->iter.iov_offset; 25662306a36Sopenharmony_ci} 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_cistatic inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci return min_t(size_t, iov_iter_single_seg_count(&req->iter), 26162306a36Sopenharmony_ci req->pdu_len - req->pdu_sent); 26262306a36Sopenharmony_ci} 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_cistatic inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ? 26762306a36Sopenharmony_ci req->pdu_len - req->pdu_sent : 0; 26862306a36Sopenharmony_ci} 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_cistatic inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req, 27162306a36Sopenharmony_ci int len) 27262306a36Sopenharmony_ci{ 27362306a36Sopenharmony_ci return nvme_tcp_pdu_data_left(req) <= len; 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_cistatic void nvme_tcp_init_iter(struct nvme_tcp_request *req, 27762306a36Sopenharmony_ci unsigned int dir) 27862306a36Sopenharmony_ci{ 27962306a36Sopenharmony_ci struct request *rq = blk_mq_rq_from_pdu(req); 28062306a36Sopenharmony_ci struct bio_vec *vec; 28162306a36Sopenharmony_ci unsigned int size; 28262306a36Sopenharmony_ci int nr_bvec; 28362306a36Sopenharmony_ci size_t offset; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) { 28662306a36Sopenharmony_ci vec = &rq->special_vec; 28762306a36Sopenharmony_ci nr_bvec = 1; 28862306a36Sopenharmony_ci size = blk_rq_payload_bytes(rq); 28962306a36Sopenharmony_ci offset = 0; 29062306a36Sopenharmony_ci } else { 29162306a36Sopenharmony_ci struct bio *bio = req->curr_bio; 29262306a36Sopenharmony_ci struct bvec_iter bi; 29362306a36Sopenharmony_ci struct bio_vec bv; 29462306a36Sopenharmony_ci 29562306a36Sopenharmony_ci vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); 29662306a36Sopenharmony_ci nr_bvec = 0; 29762306a36Sopenharmony_ci bio_for_each_bvec(bv, bio, bi) { 29862306a36Sopenharmony_ci nr_bvec++; 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci size = bio->bi_iter.bi_size; 30162306a36Sopenharmony_ci offset = bio->bi_iter.bi_bvec_done; 30262306a36Sopenharmony_ci } 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci iov_iter_bvec(&req->iter, dir, vec, nr_bvec, size); 30562306a36Sopenharmony_ci req->iter.iov_offset = offset; 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cistatic inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, 30962306a36Sopenharmony_ci int len) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci req->data_sent += len; 31262306a36Sopenharmony_ci req->pdu_sent += len; 31362306a36Sopenharmony_ci iov_iter_advance(&req->iter, len); 31462306a36Sopenharmony_ci if (!iov_iter_count(&req->iter) && 31562306a36Sopenharmony_ci req->data_sent < req->data_len) { 31662306a36Sopenharmony_ci req->curr_bio = req->curr_bio->bi_next; 31762306a36Sopenharmony_ci nvme_tcp_init_iter(req, ITER_SOURCE); 31862306a36Sopenharmony_ci } 31962306a36Sopenharmony_ci} 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_cistatic inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue) 32262306a36Sopenharmony_ci{ 32362306a36Sopenharmony_ci int ret; 32462306a36Sopenharmony_ci 32562306a36Sopenharmony_ci /* drain the send queue as much as we can... */ 32662306a36Sopenharmony_ci do { 32762306a36Sopenharmony_ci ret = nvme_tcp_try_send(queue); 32862306a36Sopenharmony_ci } while (ret > 0); 32962306a36Sopenharmony_ci} 33062306a36Sopenharmony_ci 33162306a36Sopenharmony_cistatic inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue) 33262306a36Sopenharmony_ci{ 33362306a36Sopenharmony_ci return !list_empty(&queue->send_list) || 33462306a36Sopenharmony_ci !llist_empty(&queue->req_list); 33562306a36Sopenharmony_ci} 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_cistatic inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, 33862306a36Sopenharmony_ci bool sync, bool last) 33962306a36Sopenharmony_ci{ 34062306a36Sopenharmony_ci struct nvme_tcp_queue *queue = req->queue; 34162306a36Sopenharmony_ci bool empty; 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci empty = llist_add(&req->lentry, &queue->req_list) && 34462306a36Sopenharmony_ci list_empty(&queue->send_list) && !queue->request; 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci /* 34762306a36Sopenharmony_ci * if we're the first on the send_list and we can try to send 34862306a36Sopenharmony_ci * directly, otherwise queue io_work. Also, only do that if we 34962306a36Sopenharmony_ci * are on the same cpu, so we don't introduce contention. 35062306a36Sopenharmony_ci */ 35162306a36Sopenharmony_ci if (queue->io_cpu == raw_smp_processor_id() && 35262306a36Sopenharmony_ci sync && empty && mutex_trylock(&queue->send_mutex)) { 35362306a36Sopenharmony_ci nvme_tcp_send_all(queue); 35462306a36Sopenharmony_ci mutex_unlock(&queue->send_mutex); 35562306a36Sopenharmony_ci } 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci if (last && nvme_tcp_queue_more(queue)) 35862306a36Sopenharmony_ci queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 35962306a36Sopenharmony_ci} 36062306a36Sopenharmony_ci 36162306a36Sopenharmony_cistatic void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue) 36262306a36Sopenharmony_ci{ 36362306a36Sopenharmony_ci struct nvme_tcp_request *req; 36462306a36Sopenharmony_ci struct llist_node *node; 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci for (node = llist_del_all(&queue->req_list); node; node = node->next) { 36762306a36Sopenharmony_ci req = llist_entry(node, struct nvme_tcp_request, lentry); 36862306a36Sopenharmony_ci list_add(&req->entry, &queue->send_list); 36962306a36Sopenharmony_ci } 37062306a36Sopenharmony_ci} 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_cistatic inline struct nvme_tcp_request * 37362306a36Sopenharmony_cinvme_tcp_fetch_request(struct nvme_tcp_queue *queue) 37462306a36Sopenharmony_ci{ 37562306a36Sopenharmony_ci struct nvme_tcp_request *req; 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci req = list_first_entry_or_null(&queue->send_list, 37862306a36Sopenharmony_ci struct nvme_tcp_request, entry); 37962306a36Sopenharmony_ci if (!req) { 38062306a36Sopenharmony_ci nvme_tcp_process_req_list(queue); 38162306a36Sopenharmony_ci req = list_first_entry_or_null(&queue->send_list, 38262306a36Sopenharmony_ci struct nvme_tcp_request, entry); 38362306a36Sopenharmony_ci if (unlikely(!req)) 38462306a36Sopenharmony_ci return NULL; 38562306a36Sopenharmony_ci } 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci list_del(&req->entry); 38862306a36Sopenharmony_ci return req; 38962306a36Sopenharmony_ci} 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_cistatic inline void nvme_tcp_ddgst_final(struct ahash_request *hash, 39262306a36Sopenharmony_ci __le32 *dgst) 39362306a36Sopenharmony_ci{ 39462306a36Sopenharmony_ci ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0); 39562306a36Sopenharmony_ci crypto_ahash_final(hash); 39662306a36Sopenharmony_ci} 39762306a36Sopenharmony_ci 39862306a36Sopenharmony_cistatic inline void nvme_tcp_ddgst_update(struct ahash_request *hash, 39962306a36Sopenharmony_ci struct page *page, off_t off, size_t len) 40062306a36Sopenharmony_ci{ 40162306a36Sopenharmony_ci struct scatterlist sg; 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci sg_init_table(&sg, 1); 40462306a36Sopenharmony_ci sg_set_page(&sg, page, len, off); 40562306a36Sopenharmony_ci ahash_request_set_crypt(hash, &sg, NULL, len); 40662306a36Sopenharmony_ci crypto_ahash_update(hash); 40762306a36Sopenharmony_ci} 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_cistatic inline void nvme_tcp_hdgst(struct ahash_request *hash, 41062306a36Sopenharmony_ci void *pdu, size_t len) 41162306a36Sopenharmony_ci{ 41262306a36Sopenharmony_ci struct scatterlist sg; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci sg_init_one(&sg, pdu, len); 41562306a36Sopenharmony_ci ahash_request_set_crypt(hash, &sg, pdu + len, len); 41662306a36Sopenharmony_ci crypto_ahash_digest(hash); 41762306a36Sopenharmony_ci} 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_cistatic int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue, 42062306a36Sopenharmony_ci void *pdu, size_t pdu_len) 42162306a36Sopenharmony_ci{ 42262306a36Sopenharmony_ci struct nvme_tcp_hdr *hdr = pdu; 42362306a36Sopenharmony_ci __le32 recv_digest; 42462306a36Sopenharmony_ci __le32 exp_digest; 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) { 42762306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 42862306a36Sopenharmony_ci "queue %d: header digest flag is cleared\n", 42962306a36Sopenharmony_ci nvme_tcp_queue_id(queue)); 43062306a36Sopenharmony_ci return -EPROTO; 43162306a36Sopenharmony_ci } 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci recv_digest = *(__le32 *)(pdu + hdr->hlen); 43462306a36Sopenharmony_ci nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len); 43562306a36Sopenharmony_ci exp_digest = *(__le32 *)(pdu + hdr->hlen); 43662306a36Sopenharmony_ci if (recv_digest != exp_digest) { 43762306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 43862306a36Sopenharmony_ci "header digest error: recv %#x expected %#x\n", 43962306a36Sopenharmony_ci le32_to_cpu(recv_digest), le32_to_cpu(exp_digest)); 44062306a36Sopenharmony_ci return -EIO; 44162306a36Sopenharmony_ci } 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci return 0; 44462306a36Sopenharmony_ci} 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_cistatic int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu) 44762306a36Sopenharmony_ci{ 44862306a36Sopenharmony_ci struct nvme_tcp_hdr *hdr = pdu; 44962306a36Sopenharmony_ci u8 digest_len = nvme_tcp_hdgst_len(queue); 45062306a36Sopenharmony_ci u32 len; 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_ci len = le32_to_cpu(hdr->plen) - hdr->hlen - 45362306a36Sopenharmony_ci ((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) { 45662306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 45762306a36Sopenharmony_ci "queue %d: data digest flag is cleared\n", 45862306a36Sopenharmony_ci nvme_tcp_queue_id(queue)); 45962306a36Sopenharmony_ci return -EPROTO; 46062306a36Sopenharmony_ci } 46162306a36Sopenharmony_ci crypto_ahash_init(queue->rcv_hash); 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci return 0; 46462306a36Sopenharmony_ci} 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_cistatic void nvme_tcp_exit_request(struct blk_mq_tag_set *set, 46762306a36Sopenharmony_ci struct request *rq, unsigned int hctx_idx) 46862306a36Sopenharmony_ci{ 46962306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci page_frag_free(req->pdu); 47262306a36Sopenharmony_ci} 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_cistatic int nvme_tcp_init_request(struct blk_mq_tag_set *set, 47562306a36Sopenharmony_ci struct request *rq, unsigned int hctx_idx, 47662306a36Sopenharmony_ci unsigned int numa_node) 47762306a36Sopenharmony_ci{ 47862306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); 47962306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 48062306a36Sopenharmony_ci struct nvme_tcp_cmd_pdu *pdu; 48162306a36Sopenharmony_ci int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; 48262306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx]; 48362306a36Sopenharmony_ci u8 hdgst = nvme_tcp_hdgst_len(queue); 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci req->pdu = page_frag_alloc(&queue->pf_cache, 48662306a36Sopenharmony_ci sizeof(struct nvme_tcp_cmd_pdu) + hdgst, 48762306a36Sopenharmony_ci GFP_KERNEL | __GFP_ZERO); 48862306a36Sopenharmony_ci if (!req->pdu) 48962306a36Sopenharmony_ci return -ENOMEM; 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci pdu = req->pdu; 49262306a36Sopenharmony_ci req->queue = queue; 49362306a36Sopenharmony_ci nvme_req(rq)->ctrl = &ctrl->ctrl; 49462306a36Sopenharmony_ci nvme_req(rq)->cmd = &pdu->cmd; 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci return 0; 49762306a36Sopenharmony_ci} 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_cistatic int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 50062306a36Sopenharmony_ci unsigned int hctx_idx) 50162306a36Sopenharmony_ci{ 50262306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); 50362306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1]; 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci hctx->driver_data = queue; 50662306a36Sopenharmony_ci return 0; 50762306a36Sopenharmony_ci} 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_cistatic int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 51062306a36Sopenharmony_ci unsigned int hctx_idx) 51162306a36Sopenharmony_ci{ 51262306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data); 51362306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[0]; 51462306a36Sopenharmony_ci 51562306a36Sopenharmony_ci hctx->driver_data = queue; 51662306a36Sopenharmony_ci return 0; 51762306a36Sopenharmony_ci} 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_cistatic enum nvme_tcp_recv_state 52062306a36Sopenharmony_cinvme_tcp_recv_state(struct nvme_tcp_queue *queue) 52162306a36Sopenharmony_ci{ 52262306a36Sopenharmony_ci return (queue->pdu_remaining) ? NVME_TCP_RECV_PDU : 52362306a36Sopenharmony_ci (queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST : 52462306a36Sopenharmony_ci NVME_TCP_RECV_DATA; 52562306a36Sopenharmony_ci} 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_cistatic void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue) 52862306a36Sopenharmony_ci{ 52962306a36Sopenharmony_ci queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) + 53062306a36Sopenharmony_ci nvme_tcp_hdgst_len(queue); 53162306a36Sopenharmony_ci queue->pdu_offset = 0; 53262306a36Sopenharmony_ci queue->data_remaining = -1; 53362306a36Sopenharmony_ci queue->ddgst_remaining = 0; 53462306a36Sopenharmony_ci} 53562306a36Sopenharmony_ci 53662306a36Sopenharmony_cistatic void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl) 53762306a36Sopenharmony_ci{ 53862306a36Sopenharmony_ci if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) 53962306a36Sopenharmony_ci return; 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci dev_warn(ctrl->device, "starting error recovery\n"); 54262306a36Sopenharmony_ci queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work); 54362306a36Sopenharmony_ci} 54462306a36Sopenharmony_ci 54562306a36Sopenharmony_cistatic int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue, 54662306a36Sopenharmony_ci struct nvme_completion *cqe) 54762306a36Sopenharmony_ci{ 54862306a36Sopenharmony_ci struct nvme_tcp_request *req; 54962306a36Sopenharmony_ci struct request *rq; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id); 55262306a36Sopenharmony_ci if (!rq) { 55362306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 55462306a36Sopenharmony_ci "got bad cqe.command_id %#x on queue %d\n", 55562306a36Sopenharmony_ci cqe->command_id, nvme_tcp_queue_id(queue)); 55662306a36Sopenharmony_ci nvme_tcp_error_recovery(&queue->ctrl->ctrl); 55762306a36Sopenharmony_ci return -EINVAL; 55862306a36Sopenharmony_ci } 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci req = blk_mq_rq_to_pdu(rq); 56162306a36Sopenharmony_ci if (req->status == cpu_to_le16(NVME_SC_SUCCESS)) 56262306a36Sopenharmony_ci req->status = cqe->status; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci if (!nvme_try_complete_req(rq, req->status, cqe->result)) 56562306a36Sopenharmony_ci nvme_complete_rq(rq); 56662306a36Sopenharmony_ci queue->nr_cqe++; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci return 0; 56962306a36Sopenharmony_ci} 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_cistatic int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, 57262306a36Sopenharmony_ci struct nvme_tcp_data_pdu *pdu) 57362306a36Sopenharmony_ci{ 57462306a36Sopenharmony_ci struct request *rq; 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); 57762306a36Sopenharmony_ci if (!rq) { 57862306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 57962306a36Sopenharmony_ci "got bad c2hdata.command_id %#x on queue %d\n", 58062306a36Sopenharmony_ci pdu->command_id, nvme_tcp_queue_id(queue)); 58162306a36Sopenharmony_ci return -ENOENT; 58262306a36Sopenharmony_ci } 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci if (!blk_rq_payload_bytes(rq)) { 58562306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 58662306a36Sopenharmony_ci "queue %d tag %#x unexpected data\n", 58762306a36Sopenharmony_ci nvme_tcp_queue_id(queue), rq->tag); 58862306a36Sopenharmony_ci return -EIO; 58962306a36Sopenharmony_ci } 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci queue->data_remaining = le32_to_cpu(pdu->data_length); 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS && 59462306a36Sopenharmony_ci unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) { 59562306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 59662306a36Sopenharmony_ci "queue %d tag %#x SUCCESS set but not last PDU\n", 59762306a36Sopenharmony_ci nvme_tcp_queue_id(queue), rq->tag); 59862306a36Sopenharmony_ci nvme_tcp_error_recovery(&queue->ctrl->ctrl); 59962306a36Sopenharmony_ci return -EPROTO; 60062306a36Sopenharmony_ci } 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci return 0; 60362306a36Sopenharmony_ci} 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_cistatic int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, 60662306a36Sopenharmony_ci struct nvme_tcp_rsp_pdu *pdu) 60762306a36Sopenharmony_ci{ 60862306a36Sopenharmony_ci struct nvme_completion *cqe = &pdu->cqe; 60962306a36Sopenharmony_ci int ret = 0; 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci /* 61262306a36Sopenharmony_ci * AEN requests are special as they don't time out and can 61362306a36Sopenharmony_ci * survive any kind of queue freeze and often don't respond to 61462306a36Sopenharmony_ci * aborts. We don't even bother to allocate a struct request 61562306a36Sopenharmony_ci * for them but rather special case them here. 61662306a36Sopenharmony_ci */ 61762306a36Sopenharmony_ci if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue), 61862306a36Sopenharmony_ci cqe->command_id))) 61962306a36Sopenharmony_ci nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, 62062306a36Sopenharmony_ci &cqe->result); 62162306a36Sopenharmony_ci else 62262306a36Sopenharmony_ci ret = nvme_tcp_process_nvme_cqe(queue, cqe); 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci return ret; 62562306a36Sopenharmony_ci} 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_cistatic void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req) 62862306a36Sopenharmony_ci{ 62962306a36Sopenharmony_ci struct nvme_tcp_data_pdu *data = nvme_tcp_req_data_pdu(req); 63062306a36Sopenharmony_ci struct nvme_tcp_queue *queue = req->queue; 63162306a36Sopenharmony_ci struct request *rq = blk_mq_rq_from_pdu(req); 63262306a36Sopenharmony_ci u32 h2cdata_sent = req->pdu_len; 63362306a36Sopenharmony_ci u8 hdgst = nvme_tcp_hdgst_len(queue); 63462306a36Sopenharmony_ci u8 ddgst = nvme_tcp_ddgst_len(queue); 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci req->state = NVME_TCP_SEND_H2C_PDU; 63762306a36Sopenharmony_ci req->offset = 0; 63862306a36Sopenharmony_ci req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata); 63962306a36Sopenharmony_ci req->pdu_sent = 0; 64062306a36Sopenharmony_ci req->h2cdata_left -= req->pdu_len; 64162306a36Sopenharmony_ci req->h2cdata_offset += h2cdata_sent; 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci memset(data, 0, sizeof(*data)); 64462306a36Sopenharmony_ci data->hdr.type = nvme_tcp_h2c_data; 64562306a36Sopenharmony_ci if (!req->h2cdata_left) 64662306a36Sopenharmony_ci data->hdr.flags = NVME_TCP_F_DATA_LAST; 64762306a36Sopenharmony_ci if (queue->hdr_digest) 64862306a36Sopenharmony_ci data->hdr.flags |= NVME_TCP_F_HDGST; 64962306a36Sopenharmony_ci if (queue->data_digest) 65062306a36Sopenharmony_ci data->hdr.flags |= NVME_TCP_F_DDGST; 65162306a36Sopenharmony_ci data->hdr.hlen = sizeof(*data); 65262306a36Sopenharmony_ci data->hdr.pdo = data->hdr.hlen + hdgst; 65362306a36Sopenharmony_ci data->hdr.plen = 65462306a36Sopenharmony_ci cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst); 65562306a36Sopenharmony_ci data->ttag = req->ttag; 65662306a36Sopenharmony_ci data->command_id = nvme_cid(rq); 65762306a36Sopenharmony_ci data->data_offset = cpu_to_le32(req->h2cdata_offset); 65862306a36Sopenharmony_ci data->data_length = cpu_to_le32(req->pdu_len); 65962306a36Sopenharmony_ci} 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_cistatic int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, 66262306a36Sopenharmony_ci struct nvme_tcp_r2t_pdu *pdu) 66362306a36Sopenharmony_ci{ 66462306a36Sopenharmony_ci struct nvme_tcp_request *req; 66562306a36Sopenharmony_ci struct request *rq; 66662306a36Sopenharmony_ci u32 r2t_length = le32_to_cpu(pdu->r2t_length); 66762306a36Sopenharmony_ci u32 r2t_offset = le32_to_cpu(pdu->r2t_offset); 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id); 67062306a36Sopenharmony_ci if (!rq) { 67162306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 67262306a36Sopenharmony_ci "got bad r2t.command_id %#x on queue %d\n", 67362306a36Sopenharmony_ci pdu->command_id, nvme_tcp_queue_id(queue)); 67462306a36Sopenharmony_ci return -ENOENT; 67562306a36Sopenharmony_ci } 67662306a36Sopenharmony_ci req = blk_mq_rq_to_pdu(rq); 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_ci if (unlikely(!r2t_length)) { 67962306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 68062306a36Sopenharmony_ci "req %d r2t len is %u, probably a bug...\n", 68162306a36Sopenharmony_ci rq->tag, r2t_length); 68262306a36Sopenharmony_ci return -EPROTO; 68362306a36Sopenharmony_ci } 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci if (unlikely(req->data_sent + r2t_length > req->data_len)) { 68662306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 68762306a36Sopenharmony_ci "req %d r2t len %u exceeded data len %u (%zu sent)\n", 68862306a36Sopenharmony_ci rq->tag, r2t_length, req->data_len, req->data_sent); 68962306a36Sopenharmony_ci return -EPROTO; 69062306a36Sopenharmony_ci } 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci if (unlikely(r2t_offset < req->data_sent)) { 69362306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 69462306a36Sopenharmony_ci "req %d unexpected r2t offset %u (expected %zu)\n", 69562306a36Sopenharmony_ci rq->tag, r2t_offset, req->data_sent); 69662306a36Sopenharmony_ci return -EPROTO; 69762306a36Sopenharmony_ci } 69862306a36Sopenharmony_ci 69962306a36Sopenharmony_ci req->pdu_len = 0; 70062306a36Sopenharmony_ci req->h2cdata_left = r2t_length; 70162306a36Sopenharmony_ci req->h2cdata_offset = r2t_offset; 70262306a36Sopenharmony_ci req->ttag = pdu->ttag; 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci nvme_tcp_setup_h2c_data_pdu(req); 70562306a36Sopenharmony_ci nvme_tcp_queue_request(req, false, true); 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci return 0; 70862306a36Sopenharmony_ci} 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_cistatic int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, 71162306a36Sopenharmony_ci unsigned int *offset, size_t *len) 71262306a36Sopenharmony_ci{ 71362306a36Sopenharmony_ci struct nvme_tcp_hdr *hdr; 71462306a36Sopenharmony_ci char *pdu = queue->pdu; 71562306a36Sopenharmony_ci size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining); 71662306a36Sopenharmony_ci int ret; 71762306a36Sopenharmony_ci 71862306a36Sopenharmony_ci ret = skb_copy_bits(skb, *offset, 71962306a36Sopenharmony_ci &pdu[queue->pdu_offset], rcv_len); 72062306a36Sopenharmony_ci if (unlikely(ret)) 72162306a36Sopenharmony_ci return ret; 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci queue->pdu_remaining -= rcv_len; 72462306a36Sopenharmony_ci queue->pdu_offset += rcv_len; 72562306a36Sopenharmony_ci *offset += rcv_len; 72662306a36Sopenharmony_ci *len -= rcv_len; 72762306a36Sopenharmony_ci if (queue->pdu_remaining) 72862306a36Sopenharmony_ci return 0; 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_ci hdr = queue->pdu; 73162306a36Sopenharmony_ci if (queue->hdr_digest) { 73262306a36Sopenharmony_ci ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen); 73362306a36Sopenharmony_ci if (unlikely(ret)) 73462306a36Sopenharmony_ci return ret; 73562306a36Sopenharmony_ci } 73662306a36Sopenharmony_ci 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci if (queue->data_digest) { 73962306a36Sopenharmony_ci ret = nvme_tcp_check_ddgst(queue, queue->pdu); 74062306a36Sopenharmony_ci if (unlikely(ret)) 74162306a36Sopenharmony_ci return ret; 74262306a36Sopenharmony_ci } 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci switch (hdr->type) { 74562306a36Sopenharmony_ci case nvme_tcp_c2h_data: 74662306a36Sopenharmony_ci return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu); 74762306a36Sopenharmony_ci case nvme_tcp_rsp: 74862306a36Sopenharmony_ci nvme_tcp_init_recv_ctx(queue); 74962306a36Sopenharmony_ci return nvme_tcp_handle_comp(queue, (void *)queue->pdu); 75062306a36Sopenharmony_ci case nvme_tcp_r2t: 75162306a36Sopenharmony_ci nvme_tcp_init_recv_ctx(queue); 75262306a36Sopenharmony_ci return nvme_tcp_handle_r2t(queue, (void *)queue->pdu); 75362306a36Sopenharmony_ci default: 75462306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 75562306a36Sopenharmony_ci "unsupported pdu type (%d)\n", hdr->type); 75662306a36Sopenharmony_ci return -EINVAL; 75762306a36Sopenharmony_ci } 75862306a36Sopenharmony_ci} 75962306a36Sopenharmony_ci 76062306a36Sopenharmony_cistatic inline void nvme_tcp_end_request(struct request *rq, u16 status) 76162306a36Sopenharmony_ci{ 76262306a36Sopenharmony_ci union nvme_result res = {}; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res)) 76562306a36Sopenharmony_ci nvme_complete_rq(rq); 76662306a36Sopenharmony_ci} 76762306a36Sopenharmony_ci 76862306a36Sopenharmony_cistatic int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, 76962306a36Sopenharmony_ci unsigned int *offset, size_t *len) 77062306a36Sopenharmony_ci{ 77162306a36Sopenharmony_ci struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; 77262306a36Sopenharmony_ci struct request *rq = 77362306a36Sopenharmony_ci nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id); 77462306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci while (true) { 77762306a36Sopenharmony_ci int recv_len, ret; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci recv_len = min_t(size_t, *len, queue->data_remaining); 78062306a36Sopenharmony_ci if (!recv_len) 78162306a36Sopenharmony_ci break; 78262306a36Sopenharmony_ci 78362306a36Sopenharmony_ci if (!iov_iter_count(&req->iter)) { 78462306a36Sopenharmony_ci req->curr_bio = req->curr_bio->bi_next; 78562306a36Sopenharmony_ci 78662306a36Sopenharmony_ci /* 78762306a36Sopenharmony_ci * If we don`t have any bios it means that controller 78862306a36Sopenharmony_ci * sent more data than we requested, hence error 78962306a36Sopenharmony_ci */ 79062306a36Sopenharmony_ci if (!req->curr_bio) { 79162306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 79262306a36Sopenharmony_ci "queue %d no space in request %#x", 79362306a36Sopenharmony_ci nvme_tcp_queue_id(queue), rq->tag); 79462306a36Sopenharmony_ci nvme_tcp_init_recv_ctx(queue); 79562306a36Sopenharmony_ci return -EIO; 79662306a36Sopenharmony_ci } 79762306a36Sopenharmony_ci nvme_tcp_init_iter(req, ITER_DEST); 79862306a36Sopenharmony_ci } 79962306a36Sopenharmony_ci 80062306a36Sopenharmony_ci /* we can read only from what is left in this bio */ 80162306a36Sopenharmony_ci recv_len = min_t(size_t, recv_len, 80262306a36Sopenharmony_ci iov_iter_count(&req->iter)); 80362306a36Sopenharmony_ci 80462306a36Sopenharmony_ci if (queue->data_digest) 80562306a36Sopenharmony_ci ret = skb_copy_and_hash_datagram_iter(skb, *offset, 80662306a36Sopenharmony_ci &req->iter, recv_len, queue->rcv_hash); 80762306a36Sopenharmony_ci else 80862306a36Sopenharmony_ci ret = skb_copy_datagram_iter(skb, *offset, 80962306a36Sopenharmony_ci &req->iter, recv_len); 81062306a36Sopenharmony_ci if (ret) { 81162306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 81262306a36Sopenharmony_ci "queue %d failed to copy request %#x data", 81362306a36Sopenharmony_ci nvme_tcp_queue_id(queue), rq->tag); 81462306a36Sopenharmony_ci return ret; 81562306a36Sopenharmony_ci } 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci *len -= recv_len; 81862306a36Sopenharmony_ci *offset += recv_len; 81962306a36Sopenharmony_ci queue->data_remaining -= recv_len; 82062306a36Sopenharmony_ci } 82162306a36Sopenharmony_ci 82262306a36Sopenharmony_ci if (!queue->data_remaining) { 82362306a36Sopenharmony_ci if (queue->data_digest) { 82462306a36Sopenharmony_ci nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst); 82562306a36Sopenharmony_ci queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH; 82662306a36Sopenharmony_ci } else { 82762306a36Sopenharmony_ci if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { 82862306a36Sopenharmony_ci nvme_tcp_end_request(rq, 82962306a36Sopenharmony_ci le16_to_cpu(req->status)); 83062306a36Sopenharmony_ci queue->nr_cqe++; 83162306a36Sopenharmony_ci } 83262306a36Sopenharmony_ci nvme_tcp_init_recv_ctx(queue); 83362306a36Sopenharmony_ci } 83462306a36Sopenharmony_ci } 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci return 0; 83762306a36Sopenharmony_ci} 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_cistatic int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue, 84062306a36Sopenharmony_ci struct sk_buff *skb, unsigned int *offset, size_t *len) 84162306a36Sopenharmony_ci{ 84262306a36Sopenharmony_ci struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu; 84362306a36Sopenharmony_ci char *ddgst = (char *)&queue->recv_ddgst; 84462306a36Sopenharmony_ci size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining); 84562306a36Sopenharmony_ci off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining; 84662306a36Sopenharmony_ci int ret; 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_ci ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len); 84962306a36Sopenharmony_ci if (unlikely(ret)) 85062306a36Sopenharmony_ci return ret; 85162306a36Sopenharmony_ci 85262306a36Sopenharmony_ci queue->ddgst_remaining -= recv_len; 85362306a36Sopenharmony_ci *offset += recv_len; 85462306a36Sopenharmony_ci *len -= recv_len; 85562306a36Sopenharmony_ci if (queue->ddgst_remaining) 85662306a36Sopenharmony_ci return 0; 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci if (queue->recv_ddgst != queue->exp_ddgst) { 85962306a36Sopenharmony_ci struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), 86062306a36Sopenharmony_ci pdu->command_id); 86162306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR); 86462306a36Sopenharmony_ci 86562306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 86662306a36Sopenharmony_ci "data digest error: recv %#x expected %#x\n", 86762306a36Sopenharmony_ci le32_to_cpu(queue->recv_ddgst), 86862306a36Sopenharmony_ci le32_to_cpu(queue->exp_ddgst)); 86962306a36Sopenharmony_ci } 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_ci if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) { 87262306a36Sopenharmony_ci struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue), 87362306a36Sopenharmony_ci pdu->command_id); 87462306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci nvme_tcp_end_request(rq, le16_to_cpu(req->status)); 87762306a36Sopenharmony_ci queue->nr_cqe++; 87862306a36Sopenharmony_ci } 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci nvme_tcp_init_recv_ctx(queue); 88162306a36Sopenharmony_ci return 0; 88262306a36Sopenharmony_ci} 88362306a36Sopenharmony_ci 88462306a36Sopenharmony_cistatic int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, 88562306a36Sopenharmony_ci unsigned int offset, size_t len) 88662306a36Sopenharmony_ci{ 88762306a36Sopenharmony_ci struct nvme_tcp_queue *queue = desc->arg.data; 88862306a36Sopenharmony_ci size_t consumed = len; 88962306a36Sopenharmony_ci int result; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci if (unlikely(!queue->rd_enabled)) 89262306a36Sopenharmony_ci return -EFAULT; 89362306a36Sopenharmony_ci 89462306a36Sopenharmony_ci while (len) { 89562306a36Sopenharmony_ci switch (nvme_tcp_recv_state(queue)) { 89662306a36Sopenharmony_ci case NVME_TCP_RECV_PDU: 89762306a36Sopenharmony_ci result = nvme_tcp_recv_pdu(queue, skb, &offset, &len); 89862306a36Sopenharmony_ci break; 89962306a36Sopenharmony_ci case NVME_TCP_RECV_DATA: 90062306a36Sopenharmony_ci result = nvme_tcp_recv_data(queue, skb, &offset, &len); 90162306a36Sopenharmony_ci break; 90262306a36Sopenharmony_ci case NVME_TCP_RECV_DDGST: 90362306a36Sopenharmony_ci result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len); 90462306a36Sopenharmony_ci break; 90562306a36Sopenharmony_ci default: 90662306a36Sopenharmony_ci result = -EFAULT; 90762306a36Sopenharmony_ci } 90862306a36Sopenharmony_ci if (result) { 90962306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 91062306a36Sopenharmony_ci "receive failed: %d\n", result); 91162306a36Sopenharmony_ci queue->rd_enabled = false; 91262306a36Sopenharmony_ci nvme_tcp_error_recovery(&queue->ctrl->ctrl); 91362306a36Sopenharmony_ci return result; 91462306a36Sopenharmony_ci } 91562306a36Sopenharmony_ci } 91662306a36Sopenharmony_ci 91762306a36Sopenharmony_ci return consumed; 91862306a36Sopenharmony_ci} 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_cistatic void nvme_tcp_data_ready(struct sock *sk) 92162306a36Sopenharmony_ci{ 92262306a36Sopenharmony_ci struct nvme_tcp_queue *queue; 92362306a36Sopenharmony_ci 92462306a36Sopenharmony_ci trace_sk_data_ready(sk); 92562306a36Sopenharmony_ci 92662306a36Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 92762306a36Sopenharmony_ci queue = sk->sk_user_data; 92862306a36Sopenharmony_ci if (likely(queue && queue->rd_enabled) && 92962306a36Sopenharmony_ci !test_bit(NVME_TCP_Q_POLLING, &queue->flags)) 93062306a36Sopenharmony_ci queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 93162306a36Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 93262306a36Sopenharmony_ci} 93362306a36Sopenharmony_ci 93462306a36Sopenharmony_cistatic void nvme_tcp_write_space(struct sock *sk) 93562306a36Sopenharmony_ci{ 93662306a36Sopenharmony_ci struct nvme_tcp_queue *queue; 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 93962306a36Sopenharmony_ci queue = sk->sk_user_data; 94062306a36Sopenharmony_ci if (likely(queue && sk_stream_is_writeable(sk))) { 94162306a36Sopenharmony_ci clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); 94262306a36Sopenharmony_ci queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 94362306a36Sopenharmony_ci } 94462306a36Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 94562306a36Sopenharmony_ci} 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_cistatic void nvme_tcp_state_change(struct sock *sk) 94862306a36Sopenharmony_ci{ 94962306a36Sopenharmony_ci struct nvme_tcp_queue *queue; 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci read_lock_bh(&sk->sk_callback_lock); 95262306a36Sopenharmony_ci queue = sk->sk_user_data; 95362306a36Sopenharmony_ci if (!queue) 95462306a36Sopenharmony_ci goto done; 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci switch (sk->sk_state) { 95762306a36Sopenharmony_ci case TCP_CLOSE: 95862306a36Sopenharmony_ci case TCP_CLOSE_WAIT: 95962306a36Sopenharmony_ci case TCP_LAST_ACK: 96062306a36Sopenharmony_ci case TCP_FIN_WAIT1: 96162306a36Sopenharmony_ci case TCP_FIN_WAIT2: 96262306a36Sopenharmony_ci nvme_tcp_error_recovery(&queue->ctrl->ctrl); 96362306a36Sopenharmony_ci break; 96462306a36Sopenharmony_ci default: 96562306a36Sopenharmony_ci dev_info(queue->ctrl->ctrl.device, 96662306a36Sopenharmony_ci "queue %d socket state %d\n", 96762306a36Sopenharmony_ci nvme_tcp_queue_id(queue), sk->sk_state); 96862306a36Sopenharmony_ci } 96962306a36Sopenharmony_ci 97062306a36Sopenharmony_ci queue->state_change(sk); 97162306a36Sopenharmony_cidone: 97262306a36Sopenharmony_ci read_unlock_bh(&sk->sk_callback_lock); 97362306a36Sopenharmony_ci} 97462306a36Sopenharmony_ci 97562306a36Sopenharmony_cistatic inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue) 97662306a36Sopenharmony_ci{ 97762306a36Sopenharmony_ci queue->request = NULL; 97862306a36Sopenharmony_ci} 97962306a36Sopenharmony_ci 98062306a36Sopenharmony_cistatic void nvme_tcp_fail_request(struct nvme_tcp_request *req) 98162306a36Sopenharmony_ci{ 98262306a36Sopenharmony_ci if (nvme_tcp_async_req(req)) { 98362306a36Sopenharmony_ci union nvme_result res = {}; 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_ci nvme_complete_async_event(&req->queue->ctrl->ctrl, 98662306a36Sopenharmony_ci cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res); 98762306a36Sopenharmony_ci } else { 98862306a36Sopenharmony_ci nvme_tcp_end_request(blk_mq_rq_from_pdu(req), 98962306a36Sopenharmony_ci NVME_SC_HOST_PATH_ERROR); 99062306a36Sopenharmony_ci } 99162306a36Sopenharmony_ci} 99262306a36Sopenharmony_ci 99362306a36Sopenharmony_cistatic int nvme_tcp_try_send_data(struct nvme_tcp_request *req) 99462306a36Sopenharmony_ci{ 99562306a36Sopenharmony_ci struct nvme_tcp_queue *queue = req->queue; 99662306a36Sopenharmony_ci int req_data_len = req->data_len; 99762306a36Sopenharmony_ci u32 h2cdata_left = req->h2cdata_left; 99862306a36Sopenharmony_ci 99962306a36Sopenharmony_ci while (true) { 100062306a36Sopenharmony_ci struct bio_vec bvec; 100162306a36Sopenharmony_ci struct msghdr msg = { 100262306a36Sopenharmony_ci .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, 100362306a36Sopenharmony_ci }; 100462306a36Sopenharmony_ci struct page *page = nvme_tcp_req_cur_page(req); 100562306a36Sopenharmony_ci size_t offset = nvme_tcp_req_cur_offset(req); 100662306a36Sopenharmony_ci size_t len = nvme_tcp_req_cur_length(req); 100762306a36Sopenharmony_ci bool last = nvme_tcp_pdu_last_send(req, len); 100862306a36Sopenharmony_ci int req_data_sent = req->data_sent; 100962306a36Sopenharmony_ci int ret; 101062306a36Sopenharmony_ci 101162306a36Sopenharmony_ci if (last && !queue->data_digest && !nvme_tcp_queue_more(queue)) 101262306a36Sopenharmony_ci msg.msg_flags |= MSG_EOR; 101362306a36Sopenharmony_ci else 101462306a36Sopenharmony_ci msg.msg_flags |= MSG_MORE; 101562306a36Sopenharmony_ci 101662306a36Sopenharmony_ci if (!sendpage_ok(page)) 101762306a36Sopenharmony_ci msg.msg_flags &= ~MSG_SPLICE_PAGES; 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci bvec_set_page(&bvec, page, len, offset); 102062306a36Sopenharmony_ci iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); 102162306a36Sopenharmony_ci ret = sock_sendmsg(queue->sock, &msg); 102262306a36Sopenharmony_ci if (ret <= 0) 102362306a36Sopenharmony_ci return ret; 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_ci if (queue->data_digest) 102662306a36Sopenharmony_ci nvme_tcp_ddgst_update(queue->snd_hash, page, 102762306a36Sopenharmony_ci offset, ret); 102862306a36Sopenharmony_ci 102962306a36Sopenharmony_ci /* 103062306a36Sopenharmony_ci * update the request iterator except for the last payload send 103162306a36Sopenharmony_ci * in the request where we don't want to modify it as we may 103262306a36Sopenharmony_ci * compete with the RX path completing the request. 103362306a36Sopenharmony_ci */ 103462306a36Sopenharmony_ci if (req_data_sent + ret < req_data_len) 103562306a36Sopenharmony_ci nvme_tcp_advance_req(req, ret); 103662306a36Sopenharmony_ci 103762306a36Sopenharmony_ci /* fully successful last send in current PDU */ 103862306a36Sopenharmony_ci if (last && ret == len) { 103962306a36Sopenharmony_ci if (queue->data_digest) { 104062306a36Sopenharmony_ci nvme_tcp_ddgst_final(queue->snd_hash, 104162306a36Sopenharmony_ci &req->ddgst); 104262306a36Sopenharmony_ci req->state = NVME_TCP_SEND_DDGST; 104362306a36Sopenharmony_ci req->offset = 0; 104462306a36Sopenharmony_ci } else { 104562306a36Sopenharmony_ci if (h2cdata_left) 104662306a36Sopenharmony_ci nvme_tcp_setup_h2c_data_pdu(req); 104762306a36Sopenharmony_ci else 104862306a36Sopenharmony_ci nvme_tcp_done_send_req(queue); 104962306a36Sopenharmony_ci } 105062306a36Sopenharmony_ci return 1; 105162306a36Sopenharmony_ci } 105262306a36Sopenharmony_ci } 105362306a36Sopenharmony_ci return -EAGAIN; 105462306a36Sopenharmony_ci} 105562306a36Sopenharmony_ci 105662306a36Sopenharmony_cistatic int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) 105762306a36Sopenharmony_ci{ 105862306a36Sopenharmony_ci struct nvme_tcp_queue *queue = req->queue; 105962306a36Sopenharmony_ci struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); 106062306a36Sopenharmony_ci struct bio_vec bvec; 106162306a36Sopenharmony_ci struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, }; 106262306a36Sopenharmony_ci bool inline_data = nvme_tcp_has_inline_data(req); 106362306a36Sopenharmony_ci u8 hdgst = nvme_tcp_hdgst_len(queue); 106462306a36Sopenharmony_ci int len = sizeof(*pdu) + hdgst - req->offset; 106562306a36Sopenharmony_ci int ret; 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci if (inline_data || nvme_tcp_queue_more(queue)) 106862306a36Sopenharmony_ci msg.msg_flags |= MSG_MORE; 106962306a36Sopenharmony_ci else 107062306a36Sopenharmony_ci msg.msg_flags |= MSG_EOR; 107162306a36Sopenharmony_ci 107262306a36Sopenharmony_ci if (queue->hdr_digest && !req->offset) 107362306a36Sopenharmony_ci nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); 107462306a36Sopenharmony_ci 107562306a36Sopenharmony_ci bvec_set_virt(&bvec, (void *)pdu + req->offset, len); 107662306a36Sopenharmony_ci iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); 107762306a36Sopenharmony_ci ret = sock_sendmsg(queue->sock, &msg); 107862306a36Sopenharmony_ci if (unlikely(ret <= 0)) 107962306a36Sopenharmony_ci return ret; 108062306a36Sopenharmony_ci 108162306a36Sopenharmony_ci len -= ret; 108262306a36Sopenharmony_ci if (!len) { 108362306a36Sopenharmony_ci if (inline_data) { 108462306a36Sopenharmony_ci req->state = NVME_TCP_SEND_DATA; 108562306a36Sopenharmony_ci if (queue->data_digest) 108662306a36Sopenharmony_ci crypto_ahash_init(queue->snd_hash); 108762306a36Sopenharmony_ci } else { 108862306a36Sopenharmony_ci nvme_tcp_done_send_req(queue); 108962306a36Sopenharmony_ci } 109062306a36Sopenharmony_ci return 1; 109162306a36Sopenharmony_ci } 109262306a36Sopenharmony_ci req->offset += ret; 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci return -EAGAIN; 109562306a36Sopenharmony_ci} 109662306a36Sopenharmony_ci 109762306a36Sopenharmony_cistatic int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) 109862306a36Sopenharmony_ci{ 109962306a36Sopenharmony_ci struct nvme_tcp_queue *queue = req->queue; 110062306a36Sopenharmony_ci struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req); 110162306a36Sopenharmony_ci struct bio_vec bvec; 110262306a36Sopenharmony_ci struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_MORE, }; 110362306a36Sopenharmony_ci u8 hdgst = nvme_tcp_hdgst_len(queue); 110462306a36Sopenharmony_ci int len = sizeof(*pdu) - req->offset + hdgst; 110562306a36Sopenharmony_ci int ret; 110662306a36Sopenharmony_ci 110762306a36Sopenharmony_ci if (queue->hdr_digest && !req->offset) 110862306a36Sopenharmony_ci nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci if (!req->h2cdata_left) 111162306a36Sopenharmony_ci msg.msg_flags |= MSG_SPLICE_PAGES; 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_ci bvec_set_virt(&bvec, (void *)pdu + req->offset, len); 111462306a36Sopenharmony_ci iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); 111562306a36Sopenharmony_ci ret = sock_sendmsg(queue->sock, &msg); 111662306a36Sopenharmony_ci if (unlikely(ret <= 0)) 111762306a36Sopenharmony_ci return ret; 111862306a36Sopenharmony_ci 111962306a36Sopenharmony_ci len -= ret; 112062306a36Sopenharmony_ci if (!len) { 112162306a36Sopenharmony_ci req->state = NVME_TCP_SEND_DATA; 112262306a36Sopenharmony_ci if (queue->data_digest) 112362306a36Sopenharmony_ci crypto_ahash_init(queue->snd_hash); 112462306a36Sopenharmony_ci return 1; 112562306a36Sopenharmony_ci } 112662306a36Sopenharmony_ci req->offset += ret; 112762306a36Sopenharmony_ci 112862306a36Sopenharmony_ci return -EAGAIN; 112962306a36Sopenharmony_ci} 113062306a36Sopenharmony_ci 113162306a36Sopenharmony_cistatic int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req) 113262306a36Sopenharmony_ci{ 113362306a36Sopenharmony_ci struct nvme_tcp_queue *queue = req->queue; 113462306a36Sopenharmony_ci size_t offset = req->offset; 113562306a36Sopenharmony_ci u32 h2cdata_left = req->h2cdata_left; 113662306a36Sopenharmony_ci int ret; 113762306a36Sopenharmony_ci struct msghdr msg = { .msg_flags = MSG_DONTWAIT }; 113862306a36Sopenharmony_ci struct kvec iov = { 113962306a36Sopenharmony_ci .iov_base = (u8 *)&req->ddgst + req->offset, 114062306a36Sopenharmony_ci .iov_len = NVME_TCP_DIGEST_LENGTH - req->offset 114162306a36Sopenharmony_ci }; 114262306a36Sopenharmony_ci 114362306a36Sopenharmony_ci if (nvme_tcp_queue_more(queue)) 114462306a36Sopenharmony_ci msg.msg_flags |= MSG_MORE; 114562306a36Sopenharmony_ci else 114662306a36Sopenharmony_ci msg.msg_flags |= MSG_EOR; 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); 114962306a36Sopenharmony_ci if (unlikely(ret <= 0)) 115062306a36Sopenharmony_ci return ret; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci if (offset + ret == NVME_TCP_DIGEST_LENGTH) { 115362306a36Sopenharmony_ci if (h2cdata_left) 115462306a36Sopenharmony_ci nvme_tcp_setup_h2c_data_pdu(req); 115562306a36Sopenharmony_ci else 115662306a36Sopenharmony_ci nvme_tcp_done_send_req(queue); 115762306a36Sopenharmony_ci return 1; 115862306a36Sopenharmony_ci } 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci req->offset += ret; 116162306a36Sopenharmony_ci return -EAGAIN; 116262306a36Sopenharmony_ci} 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_cistatic int nvme_tcp_try_send(struct nvme_tcp_queue *queue) 116562306a36Sopenharmony_ci{ 116662306a36Sopenharmony_ci struct nvme_tcp_request *req; 116762306a36Sopenharmony_ci unsigned int noreclaim_flag; 116862306a36Sopenharmony_ci int ret = 1; 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci if (!queue->request) { 117162306a36Sopenharmony_ci queue->request = nvme_tcp_fetch_request(queue); 117262306a36Sopenharmony_ci if (!queue->request) 117362306a36Sopenharmony_ci return 0; 117462306a36Sopenharmony_ci } 117562306a36Sopenharmony_ci req = queue->request; 117662306a36Sopenharmony_ci 117762306a36Sopenharmony_ci noreclaim_flag = memalloc_noreclaim_save(); 117862306a36Sopenharmony_ci if (req->state == NVME_TCP_SEND_CMD_PDU) { 117962306a36Sopenharmony_ci ret = nvme_tcp_try_send_cmd_pdu(req); 118062306a36Sopenharmony_ci if (ret <= 0) 118162306a36Sopenharmony_ci goto done; 118262306a36Sopenharmony_ci if (!nvme_tcp_has_inline_data(req)) 118362306a36Sopenharmony_ci goto out; 118462306a36Sopenharmony_ci } 118562306a36Sopenharmony_ci 118662306a36Sopenharmony_ci if (req->state == NVME_TCP_SEND_H2C_PDU) { 118762306a36Sopenharmony_ci ret = nvme_tcp_try_send_data_pdu(req); 118862306a36Sopenharmony_ci if (ret <= 0) 118962306a36Sopenharmony_ci goto done; 119062306a36Sopenharmony_ci } 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci if (req->state == NVME_TCP_SEND_DATA) { 119362306a36Sopenharmony_ci ret = nvme_tcp_try_send_data(req); 119462306a36Sopenharmony_ci if (ret <= 0) 119562306a36Sopenharmony_ci goto done; 119662306a36Sopenharmony_ci } 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_ci if (req->state == NVME_TCP_SEND_DDGST) 119962306a36Sopenharmony_ci ret = nvme_tcp_try_send_ddgst(req); 120062306a36Sopenharmony_cidone: 120162306a36Sopenharmony_ci if (ret == -EAGAIN) { 120262306a36Sopenharmony_ci ret = 0; 120362306a36Sopenharmony_ci } else if (ret < 0) { 120462306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 120562306a36Sopenharmony_ci "failed to send request %d\n", ret); 120662306a36Sopenharmony_ci nvme_tcp_fail_request(queue->request); 120762306a36Sopenharmony_ci nvme_tcp_done_send_req(queue); 120862306a36Sopenharmony_ci } 120962306a36Sopenharmony_ciout: 121062306a36Sopenharmony_ci memalloc_noreclaim_restore(noreclaim_flag); 121162306a36Sopenharmony_ci return ret; 121262306a36Sopenharmony_ci} 121362306a36Sopenharmony_ci 121462306a36Sopenharmony_cistatic int nvme_tcp_try_recv(struct nvme_tcp_queue *queue) 121562306a36Sopenharmony_ci{ 121662306a36Sopenharmony_ci struct socket *sock = queue->sock; 121762306a36Sopenharmony_ci struct sock *sk = sock->sk; 121862306a36Sopenharmony_ci read_descriptor_t rd_desc; 121962306a36Sopenharmony_ci int consumed; 122062306a36Sopenharmony_ci 122162306a36Sopenharmony_ci rd_desc.arg.data = queue; 122262306a36Sopenharmony_ci rd_desc.count = 1; 122362306a36Sopenharmony_ci lock_sock(sk); 122462306a36Sopenharmony_ci queue->nr_cqe = 0; 122562306a36Sopenharmony_ci consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb); 122662306a36Sopenharmony_ci release_sock(sk); 122762306a36Sopenharmony_ci return consumed; 122862306a36Sopenharmony_ci} 122962306a36Sopenharmony_ci 123062306a36Sopenharmony_cistatic void nvme_tcp_io_work(struct work_struct *w) 123162306a36Sopenharmony_ci{ 123262306a36Sopenharmony_ci struct nvme_tcp_queue *queue = 123362306a36Sopenharmony_ci container_of(w, struct nvme_tcp_queue, io_work); 123462306a36Sopenharmony_ci unsigned long deadline = jiffies + msecs_to_jiffies(1); 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci do { 123762306a36Sopenharmony_ci bool pending = false; 123862306a36Sopenharmony_ci int result; 123962306a36Sopenharmony_ci 124062306a36Sopenharmony_ci if (mutex_trylock(&queue->send_mutex)) { 124162306a36Sopenharmony_ci result = nvme_tcp_try_send(queue); 124262306a36Sopenharmony_ci mutex_unlock(&queue->send_mutex); 124362306a36Sopenharmony_ci if (result > 0) 124462306a36Sopenharmony_ci pending = true; 124562306a36Sopenharmony_ci else if (unlikely(result < 0)) 124662306a36Sopenharmony_ci break; 124762306a36Sopenharmony_ci } 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_ci result = nvme_tcp_try_recv(queue); 125062306a36Sopenharmony_ci if (result > 0) 125162306a36Sopenharmony_ci pending = true; 125262306a36Sopenharmony_ci else if (unlikely(result < 0)) 125362306a36Sopenharmony_ci return; 125462306a36Sopenharmony_ci 125562306a36Sopenharmony_ci if (!pending || !queue->rd_enabled) 125662306a36Sopenharmony_ci return; 125762306a36Sopenharmony_ci 125862306a36Sopenharmony_ci } while (!time_after(jiffies, deadline)); /* quota is exhausted */ 125962306a36Sopenharmony_ci 126062306a36Sopenharmony_ci queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 126162306a36Sopenharmony_ci} 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_cistatic void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue) 126462306a36Sopenharmony_ci{ 126562306a36Sopenharmony_ci struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash); 126662306a36Sopenharmony_ci 126762306a36Sopenharmony_ci ahash_request_free(queue->rcv_hash); 126862306a36Sopenharmony_ci ahash_request_free(queue->snd_hash); 126962306a36Sopenharmony_ci crypto_free_ahash(tfm); 127062306a36Sopenharmony_ci} 127162306a36Sopenharmony_ci 127262306a36Sopenharmony_cistatic int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue) 127362306a36Sopenharmony_ci{ 127462306a36Sopenharmony_ci struct crypto_ahash *tfm; 127562306a36Sopenharmony_ci 127662306a36Sopenharmony_ci tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); 127762306a36Sopenharmony_ci if (IS_ERR(tfm)) 127862306a36Sopenharmony_ci return PTR_ERR(tfm); 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL); 128162306a36Sopenharmony_ci if (!queue->snd_hash) 128262306a36Sopenharmony_ci goto free_tfm; 128362306a36Sopenharmony_ci ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL); 128462306a36Sopenharmony_ci 128562306a36Sopenharmony_ci queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL); 128662306a36Sopenharmony_ci if (!queue->rcv_hash) 128762306a36Sopenharmony_ci goto free_snd_hash; 128862306a36Sopenharmony_ci ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL); 128962306a36Sopenharmony_ci 129062306a36Sopenharmony_ci return 0; 129162306a36Sopenharmony_cifree_snd_hash: 129262306a36Sopenharmony_ci ahash_request_free(queue->snd_hash); 129362306a36Sopenharmony_cifree_tfm: 129462306a36Sopenharmony_ci crypto_free_ahash(tfm); 129562306a36Sopenharmony_ci return -ENOMEM; 129662306a36Sopenharmony_ci} 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_cistatic void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl) 129962306a36Sopenharmony_ci{ 130062306a36Sopenharmony_ci struct nvme_tcp_request *async = &ctrl->async_req; 130162306a36Sopenharmony_ci 130262306a36Sopenharmony_ci page_frag_free(async->pdu); 130362306a36Sopenharmony_ci} 130462306a36Sopenharmony_ci 130562306a36Sopenharmony_cistatic int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl) 130662306a36Sopenharmony_ci{ 130762306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[0]; 130862306a36Sopenharmony_ci struct nvme_tcp_request *async = &ctrl->async_req; 130962306a36Sopenharmony_ci u8 hdgst = nvme_tcp_hdgst_len(queue); 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci async->pdu = page_frag_alloc(&queue->pf_cache, 131262306a36Sopenharmony_ci sizeof(struct nvme_tcp_cmd_pdu) + hdgst, 131362306a36Sopenharmony_ci GFP_KERNEL | __GFP_ZERO); 131462306a36Sopenharmony_ci if (!async->pdu) 131562306a36Sopenharmony_ci return -ENOMEM; 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci async->queue = &ctrl->queues[0]; 131862306a36Sopenharmony_ci return 0; 131962306a36Sopenharmony_ci} 132062306a36Sopenharmony_ci 132162306a36Sopenharmony_cistatic void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid) 132262306a36Sopenharmony_ci{ 132362306a36Sopenharmony_ci struct page *page; 132462306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); 132562306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[qid]; 132662306a36Sopenharmony_ci unsigned int noreclaim_flag; 132762306a36Sopenharmony_ci 132862306a36Sopenharmony_ci if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) 132962306a36Sopenharmony_ci return; 133062306a36Sopenharmony_ci 133162306a36Sopenharmony_ci if (queue->hdr_digest || queue->data_digest) 133262306a36Sopenharmony_ci nvme_tcp_free_crypto(queue); 133362306a36Sopenharmony_ci 133462306a36Sopenharmony_ci if (queue->pf_cache.va) { 133562306a36Sopenharmony_ci page = virt_to_head_page(queue->pf_cache.va); 133662306a36Sopenharmony_ci __page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias); 133762306a36Sopenharmony_ci queue->pf_cache.va = NULL; 133862306a36Sopenharmony_ci } 133962306a36Sopenharmony_ci 134062306a36Sopenharmony_ci noreclaim_flag = memalloc_noreclaim_save(); 134162306a36Sopenharmony_ci sock_release(queue->sock); 134262306a36Sopenharmony_ci memalloc_noreclaim_restore(noreclaim_flag); 134362306a36Sopenharmony_ci 134462306a36Sopenharmony_ci kfree(queue->pdu); 134562306a36Sopenharmony_ci mutex_destroy(&queue->send_mutex); 134662306a36Sopenharmony_ci mutex_destroy(&queue->queue_lock); 134762306a36Sopenharmony_ci} 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_cistatic int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) 135062306a36Sopenharmony_ci{ 135162306a36Sopenharmony_ci struct nvme_tcp_icreq_pdu *icreq; 135262306a36Sopenharmony_ci struct nvme_tcp_icresp_pdu *icresp; 135362306a36Sopenharmony_ci struct msghdr msg = {}; 135462306a36Sopenharmony_ci struct kvec iov; 135562306a36Sopenharmony_ci bool ctrl_hdgst, ctrl_ddgst; 135662306a36Sopenharmony_ci u32 maxh2cdata; 135762306a36Sopenharmony_ci int ret; 135862306a36Sopenharmony_ci 135962306a36Sopenharmony_ci icreq = kzalloc(sizeof(*icreq), GFP_KERNEL); 136062306a36Sopenharmony_ci if (!icreq) 136162306a36Sopenharmony_ci return -ENOMEM; 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_ci icresp = kzalloc(sizeof(*icresp), GFP_KERNEL); 136462306a36Sopenharmony_ci if (!icresp) { 136562306a36Sopenharmony_ci ret = -ENOMEM; 136662306a36Sopenharmony_ci goto free_icreq; 136762306a36Sopenharmony_ci } 136862306a36Sopenharmony_ci 136962306a36Sopenharmony_ci icreq->hdr.type = nvme_tcp_icreq; 137062306a36Sopenharmony_ci icreq->hdr.hlen = sizeof(*icreq); 137162306a36Sopenharmony_ci icreq->hdr.pdo = 0; 137262306a36Sopenharmony_ci icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen); 137362306a36Sopenharmony_ci icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0); 137462306a36Sopenharmony_ci icreq->maxr2t = 0; /* single inflight r2t supported */ 137562306a36Sopenharmony_ci icreq->hpda = 0; /* no alignment constraint */ 137662306a36Sopenharmony_ci if (queue->hdr_digest) 137762306a36Sopenharmony_ci icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE; 137862306a36Sopenharmony_ci if (queue->data_digest) 137962306a36Sopenharmony_ci icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE; 138062306a36Sopenharmony_ci 138162306a36Sopenharmony_ci iov.iov_base = icreq; 138262306a36Sopenharmony_ci iov.iov_len = sizeof(*icreq); 138362306a36Sopenharmony_ci ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); 138462306a36Sopenharmony_ci if (ret < 0) 138562306a36Sopenharmony_ci goto free_icresp; 138662306a36Sopenharmony_ci 138762306a36Sopenharmony_ci memset(&msg, 0, sizeof(msg)); 138862306a36Sopenharmony_ci iov.iov_base = icresp; 138962306a36Sopenharmony_ci iov.iov_len = sizeof(*icresp); 139062306a36Sopenharmony_ci ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, 139162306a36Sopenharmony_ci iov.iov_len, msg.msg_flags); 139262306a36Sopenharmony_ci if (ret < 0) 139362306a36Sopenharmony_ci goto free_icresp; 139462306a36Sopenharmony_ci 139562306a36Sopenharmony_ci ret = -EINVAL; 139662306a36Sopenharmony_ci if (icresp->hdr.type != nvme_tcp_icresp) { 139762306a36Sopenharmony_ci pr_err("queue %d: bad type returned %d\n", 139862306a36Sopenharmony_ci nvme_tcp_queue_id(queue), icresp->hdr.type); 139962306a36Sopenharmony_ci goto free_icresp; 140062306a36Sopenharmony_ci } 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) { 140362306a36Sopenharmony_ci pr_err("queue %d: bad pdu length returned %d\n", 140462306a36Sopenharmony_ci nvme_tcp_queue_id(queue), icresp->hdr.plen); 140562306a36Sopenharmony_ci goto free_icresp; 140662306a36Sopenharmony_ci } 140762306a36Sopenharmony_ci 140862306a36Sopenharmony_ci if (icresp->pfv != NVME_TCP_PFV_1_0) { 140962306a36Sopenharmony_ci pr_err("queue %d: bad pfv returned %d\n", 141062306a36Sopenharmony_ci nvme_tcp_queue_id(queue), icresp->pfv); 141162306a36Sopenharmony_ci goto free_icresp; 141262306a36Sopenharmony_ci } 141362306a36Sopenharmony_ci 141462306a36Sopenharmony_ci ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE); 141562306a36Sopenharmony_ci if ((queue->data_digest && !ctrl_ddgst) || 141662306a36Sopenharmony_ci (!queue->data_digest && ctrl_ddgst)) { 141762306a36Sopenharmony_ci pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n", 141862306a36Sopenharmony_ci nvme_tcp_queue_id(queue), 141962306a36Sopenharmony_ci queue->data_digest ? "enabled" : "disabled", 142062306a36Sopenharmony_ci ctrl_ddgst ? "enabled" : "disabled"); 142162306a36Sopenharmony_ci goto free_icresp; 142262306a36Sopenharmony_ci } 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_ci ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE); 142562306a36Sopenharmony_ci if ((queue->hdr_digest && !ctrl_hdgst) || 142662306a36Sopenharmony_ci (!queue->hdr_digest && ctrl_hdgst)) { 142762306a36Sopenharmony_ci pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n", 142862306a36Sopenharmony_ci nvme_tcp_queue_id(queue), 142962306a36Sopenharmony_ci queue->hdr_digest ? "enabled" : "disabled", 143062306a36Sopenharmony_ci ctrl_hdgst ? "enabled" : "disabled"); 143162306a36Sopenharmony_ci goto free_icresp; 143262306a36Sopenharmony_ci } 143362306a36Sopenharmony_ci 143462306a36Sopenharmony_ci if (icresp->cpda != 0) { 143562306a36Sopenharmony_ci pr_err("queue %d: unsupported cpda returned %d\n", 143662306a36Sopenharmony_ci nvme_tcp_queue_id(queue), icresp->cpda); 143762306a36Sopenharmony_ci goto free_icresp; 143862306a36Sopenharmony_ci } 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci maxh2cdata = le32_to_cpu(icresp->maxdata); 144162306a36Sopenharmony_ci if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) { 144262306a36Sopenharmony_ci pr_err("queue %d: invalid maxh2cdata returned %u\n", 144362306a36Sopenharmony_ci nvme_tcp_queue_id(queue), maxh2cdata); 144462306a36Sopenharmony_ci goto free_icresp; 144562306a36Sopenharmony_ci } 144662306a36Sopenharmony_ci queue->maxh2cdata = maxh2cdata; 144762306a36Sopenharmony_ci 144862306a36Sopenharmony_ci ret = 0; 144962306a36Sopenharmony_cifree_icresp: 145062306a36Sopenharmony_ci kfree(icresp); 145162306a36Sopenharmony_cifree_icreq: 145262306a36Sopenharmony_ci kfree(icreq); 145362306a36Sopenharmony_ci return ret; 145462306a36Sopenharmony_ci} 145562306a36Sopenharmony_ci 145662306a36Sopenharmony_cistatic bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue) 145762306a36Sopenharmony_ci{ 145862306a36Sopenharmony_ci return nvme_tcp_queue_id(queue) == 0; 145962306a36Sopenharmony_ci} 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_cistatic bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue) 146262306a36Sopenharmony_ci{ 146362306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = queue->ctrl; 146462306a36Sopenharmony_ci int qid = nvme_tcp_queue_id(queue); 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci return !nvme_tcp_admin_queue(queue) && 146762306a36Sopenharmony_ci qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT]; 146862306a36Sopenharmony_ci} 146962306a36Sopenharmony_ci 147062306a36Sopenharmony_cistatic bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue) 147162306a36Sopenharmony_ci{ 147262306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = queue->ctrl; 147362306a36Sopenharmony_ci int qid = nvme_tcp_queue_id(queue); 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci return !nvme_tcp_admin_queue(queue) && 147662306a36Sopenharmony_ci !nvme_tcp_default_queue(queue) && 147762306a36Sopenharmony_ci qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + 147862306a36Sopenharmony_ci ctrl->io_queues[HCTX_TYPE_READ]; 147962306a36Sopenharmony_ci} 148062306a36Sopenharmony_ci 148162306a36Sopenharmony_cistatic bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue) 148262306a36Sopenharmony_ci{ 148362306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = queue->ctrl; 148462306a36Sopenharmony_ci int qid = nvme_tcp_queue_id(queue); 148562306a36Sopenharmony_ci 148662306a36Sopenharmony_ci return !nvme_tcp_admin_queue(queue) && 148762306a36Sopenharmony_ci !nvme_tcp_default_queue(queue) && 148862306a36Sopenharmony_ci !nvme_tcp_read_queue(queue) && 148962306a36Sopenharmony_ci qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] + 149062306a36Sopenharmony_ci ctrl->io_queues[HCTX_TYPE_READ] + 149162306a36Sopenharmony_ci ctrl->io_queues[HCTX_TYPE_POLL]; 149262306a36Sopenharmony_ci} 149362306a36Sopenharmony_ci 149462306a36Sopenharmony_cistatic void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue) 149562306a36Sopenharmony_ci{ 149662306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = queue->ctrl; 149762306a36Sopenharmony_ci int qid = nvme_tcp_queue_id(queue); 149862306a36Sopenharmony_ci int n = 0; 149962306a36Sopenharmony_ci 150062306a36Sopenharmony_ci if (nvme_tcp_default_queue(queue)) 150162306a36Sopenharmony_ci n = qid - 1; 150262306a36Sopenharmony_ci else if (nvme_tcp_read_queue(queue)) 150362306a36Sopenharmony_ci n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1; 150462306a36Sopenharmony_ci else if (nvme_tcp_poll_queue(queue)) 150562306a36Sopenharmony_ci n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 150662306a36Sopenharmony_ci ctrl->io_queues[HCTX_TYPE_READ] - 1; 150762306a36Sopenharmony_ci queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false); 150862306a36Sopenharmony_ci} 150962306a36Sopenharmony_ci 151062306a36Sopenharmony_cistatic int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid) 151162306a36Sopenharmony_ci{ 151262306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); 151362306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[qid]; 151462306a36Sopenharmony_ci int ret, rcv_pdu_size; 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci mutex_init(&queue->queue_lock); 151762306a36Sopenharmony_ci queue->ctrl = ctrl; 151862306a36Sopenharmony_ci init_llist_head(&queue->req_list); 151962306a36Sopenharmony_ci INIT_LIST_HEAD(&queue->send_list); 152062306a36Sopenharmony_ci mutex_init(&queue->send_mutex); 152162306a36Sopenharmony_ci INIT_WORK(&queue->io_work, nvme_tcp_io_work); 152262306a36Sopenharmony_ci 152362306a36Sopenharmony_ci if (qid > 0) 152462306a36Sopenharmony_ci queue->cmnd_capsule_len = nctrl->ioccsz * 16; 152562306a36Sopenharmony_ci else 152662306a36Sopenharmony_ci queue->cmnd_capsule_len = sizeof(struct nvme_command) + 152762306a36Sopenharmony_ci NVME_TCP_ADMIN_CCSZ; 152862306a36Sopenharmony_ci 152962306a36Sopenharmony_ci ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM, 153062306a36Sopenharmony_ci IPPROTO_TCP, &queue->sock); 153162306a36Sopenharmony_ci if (ret) { 153262306a36Sopenharmony_ci dev_err(nctrl->device, 153362306a36Sopenharmony_ci "failed to create socket: %d\n", ret); 153462306a36Sopenharmony_ci goto err_destroy_mutex; 153562306a36Sopenharmony_ci } 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci nvme_tcp_reclassify_socket(queue->sock); 153862306a36Sopenharmony_ci 153962306a36Sopenharmony_ci /* Single syn retry */ 154062306a36Sopenharmony_ci tcp_sock_set_syncnt(queue->sock->sk, 1); 154162306a36Sopenharmony_ci 154262306a36Sopenharmony_ci /* Set TCP no delay */ 154362306a36Sopenharmony_ci tcp_sock_set_nodelay(queue->sock->sk); 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci /* 154662306a36Sopenharmony_ci * Cleanup whatever is sitting in the TCP transmit queue on socket 154762306a36Sopenharmony_ci * close. This is done to prevent stale data from being sent should 154862306a36Sopenharmony_ci * the network connection be restored before TCP times out. 154962306a36Sopenharmony_ci */ 155062306a36Sopenharmony_ci sock_no_linger(queue->sock->sk); 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_ci if (so_priority > 0) 155362306a36Sopenharmony_ci sock_set_priority(queue->sock->sk, so_priority); 155462306a36Sopenharmony_ci 155562306a36Sopenharmony_ci /* Set socket type of service */ 155662306a36Sopenharmony_ci if (nctrl->opts->tos >= 0) 155762306a36Sopenharmony_ci ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos); 155862306a36Sopenharmony_ci 155962306a36Sopenharmony_ci /* Set 10 seconds timeout for icresp recvmsg */ 156062306a36Sopenharmony_ci queue->sock->sk->sk_rcvtimeo = 10 * HZ; 156162306a36Sopenharmony_ci 156262306a36Sopenharmony_ci queue->sock->sk->sk_allocation = GFP_ATOMIC; 156362306a36Sopenharmony_ci queue->sock->sk->sk_use_task_frag = false; 156462306a36Sopenharmony_ci nvme_tcp_set_queue_io_cpu(queue); 156562306a36Sopenharmony_ci queue->request = NULL; 156662306a36Sopenharmony_ci queue->data_remaining = 0; 156762306a36Sopenharmony_ci queue->ddgst_remaining = 0; 156862306a36Sopenharmony_ci queue->pdu_remaining = 0; 156962306a36Sopenharmony_ci queue->pdu_offset = 0; 157062306a36Sopenharmony_ci sk_set_memalloc(queue->sock->sk); 157162306a36Sopenharmony_ci 157262306a36Sopenharmony_ci if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) { 157362306a36Sopenharmony_ci ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr, 157462306a36Sopenharmony_ci sizeof(ctrl->src_addr)); 157562306a36Sopenharmony_ci if (ret) { 157662306a36Sopenharmony_ci dev_err(nctrl->device, 157762306a36Sopenharmony_ci "failed to bind queue %d socket %d\n", 157862306a36Sopenharmony_ci qid, ret); 157962306a36Sopenharmony_ci goto err_sock; 158062306a36Sopenharmony_ci } 158162306a36Sopenharmony_ci } 158262306a36Sopenharmony_ci 158362306a36Sopenharmony_ci if (nctrl->opts->mask & NVMF_OPT_HOST_IFACE) { 158462306a36Sopenharmony_ci char *iface = nctrl->opts->host_iface; 158562306a36Sopenharmony_ci sockptr_t optval = KERNEL_SOCKPTR(iface); 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE, 158862306a36Sopenharmony_ci optval, strlen(iface)); 158962306a36Sopenharmony_ci if (ret) { 159062306a36Sopenharmony_ci dev_err(nctrl->device, 159162306a36Sopenharmony_ci "failed to bind to interface %s queue %d err %d\n", 159262306a36Sopenharmony_ci iface, qid, ret); 159362306a36Sopenharmony_ci goto err_sock; 159462306a36Sopenharmony_ci } 159562306a36Sopenharmony_ci } 159662306a36Sopenharmony_ci 159762306a36Sopenharmony_ci queue->hdr_digest = nctrl->opts->hdr_digest; 159862306a36Sopenharmony_ci queue->data_digest = nctrl->opts->data_digest; 159962306a36Sopenharmony_ci if (queue->hdr_digest || queue->data_digest) { 160062306a36Sopenharmony_ci ret = nvme_tcp_alloc_crypto(queue); 160162306a36Sopenharmony_ci if (ret) { 160262306a36Sopenharmony_ci dev_err(nctrl->device, 160362306a36Sopenharmony_ci "failed to allocate queue %d crypto\n", qid); 160462306a36Sopenharmony_ci goto err_sock; 160562306a36Sopenharmony_ci } 160662306a36Sopenharmony_ci } 160762306a36Sopenharmony_ci 160862306a36Sopenharmony_ci rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) + 160962306a36Sopenharmony_ci nvme_tcp_hdgst_len(queue); 161062306a36Sopenharmony_ci queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL); 161162306a36Sopenharmony_ci if (!queue->pdu) { 161262306a36Sopenharmony_ci ret = -ENOMEM; 161362306a36Sopenharmony_ci goto err_crypto; 161462306a36Sopenharmony_ci } 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci dev_dbg(nctrl->device, "connecting queue %d\n", 161762306a36Sopenharmony_ci nvme_tcp_queue_id(queue)); 161862306a36Sopenharmony_ci 161962306a36Sopenharmony_ci ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr, 162062306a36Sopenharmony_ci sizeof(ctrl->addr), 0); 162162306a36Sopenharmony_ci if (ret) { 162262306a36Sopenharmony_ci dev_err(nctrl->device, 162362306a36Sopenharmony_ci "failed to connect socket: %d\n", ret); 162462306a36Sopenharmony_ci goto err_rcv_pdu; 162562306a36Sopenharmony_ci } 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci ret = nvme_tcp_init_connection(queue); 162862306a36Sopenharmony_ci if (ret) 162962306a36Sopenharmony_ci goto err_init_connect; 163062306a36Sopenharmony_ci 163162306a36Sopenharmony_ci set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags); 163262306a36Sopenharmony_ci 163362306a36Sopenharmony_ci return 0; 163462306a36Sopenharmony_ci 163562306a36Sopenharmony_cierr_init_connect: 163662306a36Sopenharmony_ci kernel_sock_shutdown(queue->sock, SHUT_RDWR); 163762306a36Sopenharmony_cierr_rcv_pdu: 163862306a36Sopenharmony_ci kfree(queue->pdu); 163962306a36Sopenharmony_cierr_crypto: 164062306a36Sopenharmony_ci if (queue->hdr_digest || queue->data_digest) 164162306a36Sopenharmony_ci nvme_tcp_free_crypto(queue); 164262306a36Sopenharmony_cierr_sock: 164362306a36Sopenharmony_ci sock_release(queue->sock); 164462306a36Sopenharmony_ci queue->sock = NULL; 164562306a36Sopenharmony_cierr_destroy_mutex: 164662306a36Sopenharmony_ci mutex_destroy(&queue->send_mutex); 164762306a36Sopenharmony_ci mutex_destroy(&queue->queue_lock); 164862306a36Sopenharmony_ci return ret; 164962306a36Sopenharmony_ci} 165062306a36Sopenharmony_ci 165162306a36Sopenharmony_cistatic void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue) 165262306a36Sopenharmony_ci{ 165362306a36Sopenharmony_ci struct socket *sock = queue->sock; 165462306a36Sopenharmony_ci 165562306a36Sopenharmony_ci write_lock_bh(&sock->sk->sk_callback_lock); 165662306a36Sopenharmony_ci sock->sk->sk_user_data = NULL; 165762306a36Sopenharmony_ci sock->sk->sk_data_ready = queue->data_ready; 165862306a36Sopenharmony_ci sock->sk->sk_state_change = queue->state_change; 165962306a36Sopenharmony_ci sock->sk->sk_write_space = queue->write_space; 166062306a36Sopenharmony_ci write_unlock_bh(&sock->sk->sk_callback_lock); 166162306a36Sopenharmony_ci} 166262306a36Sopenharmony_ci 166362306a36Sopenharmony_cistatic void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue) 166462306a36Sopenharmony_ci{ 166562306a36Sopenharmony_ci kernel_sock_shutdown(queue->sock, SHUT_RDWR); 166662306a36Sopenharmony_ci nvme_tcp_restore_sock_ops(queue); 166762306a36Sopenharmony_ci cancel_work_sync(&queue->io_work); 166862306a36Sopenharmony_ci} 166962306a36Sopenharmony_ci 167062306a36Sopenharmony_cistatic void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid) 167162306a36Sopenharmony_ci{ 167262306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); 167362306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[qid]; 167462306a36Sopenharmony_ci 167562306a36Sopenharmony_ci if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) 167662306a36Sopenharmony_ci return; 167762306a36Sopenharmony_ci 167862306a36Sopenharmony_ci mutex_lock(&queue->queue_lock); 167962306a36Sopenharmony_ci if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags)) 168062306a36Sopenharmony_ci __nvme_tcp_stop_queue(queue); 168162306a36Sopenharmony_ci mutex_unlock(&queue->queue_lock); 168262306a36Sopenharmony_ci} 168362306a36Sopenharmony_ci 168462306a36Sopenharmony_cistatic void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue) 168562306a36Sopenharmony_ci{ 168662306a36Sopenharmony_ci write_lock_bh(&queue->sock->sk->sk_callback_lock); 168762306a36Sopenharmony_ci queue->sock->sk->sk_user_data = queue; 168862306a36Sopenharmony_ci queue->state_change = queue->sock->sk->sk_state_change; 168962306a36Sopenharmony_ci queue->data_ready = queue->sock->sk->sk_data_ready; 169062306a36Sopenharmony_ci queue->write_space = queue->sock->sk->sk_write_space; 169162306a36Sopenharmony_ci queue->sock->sk->sk_data_ready = nvme_tcp_data_ready; 169262306a36Sopenharmony_ci queue->sock->sk->sk_state_change = nvme_tcp_state_change; 169362306a36Sopenharmony_ci queue->sock->sk->sk_write_space = nvme_tcp_write_space; 169462306a36Sopenharmony_ci#ifdef CONFIG_NET_RX_BUSY_POLL 169562306a36Sopenharmony_ci queue->sock->sk->sk_ll_usec = 1; 169662306a36Sopenharmony_ci#endif 169762306a36Sopenharmony_ci write_unlock_bh(&queue->sock->sk->sk_callback_lock); 169862306a36Sopenharmony_ci} 169962306a36Sopenharmony_ci 170062306a36Sopenharmony_cistatic int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) 170162306a36Sopenharmony_ci{ 170262306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); 170362306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[idx]; 170462306a36Sopenharmony_ci int ret; 170562306a36Sopenharmony_ci 170662306a36Sopenharmony_ci queue->rd_enabled = true; 170762306a36Sopenharmony_ci nvme_tcp_init_recv_ctx(queue); 170862306a36Sopenharmony_ci nvme_tcp_setup_sock_ops(queue); 170962306a36Sopenharmony_ci 171062306a36Sopenharmony_ci if (idx) 171162306a36Sopenharmony_ci ret = nvmf_connect_io_queue(nctrl, idx); 171262306a36Sopenharmony_ci else 171362306a36Sopenharmony_ci ret = nvmf_connect_admin_queue(nctrl); 171462306a36Sopenharmony_ci 171562306a36Sopenharmony_ci if (!ret) { 171662306a36Sopenharmony_ci set_bit(NVME_TCP_Q_LIVE, &queue->flags); 171762306a36Sopenharmony_ci } else { 171862306a36Sopenharmony_ci if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags)) 171962306a36Sopenharmony_ci __nvme_tcp_stop_queue(queue); 172062306a36Sopenharmony_ci dev_err(nctrl->device, 172162306a36Sopenharmony_ci "failed to connect queue: %d ret=%d\n", idx, ret); 172262306a36Sopenharmony_ci } 172362306a36Sopenharmony_ci return ret; 172462306a36Sopenharmony_ci} 172562306a36Sopenharmony_ci 172662306a36Sopenharmony_cistatic void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl) 172762306a36Sopenharmony_ci{ 172862306a36Sopenharmony_ci if (to_tcp_ctrl(ctrl)->async_req.pdu) { 172962306a36Sopenharmony_ci cancel_work_sync(&ctrl->async_event_work); 173062306a36Sopenharmony_ci nvme_tcp_free_async_req(to_tcp_ctrl(ctrl)); 173162306a36Sopenharmony_ci to_tcp_ctrl(ctrl)->async_req.pdu = NULL; 173262306a36Sopenharmony_ci } 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_ci nvme_tcp_free_queue(ctrl, 0); 173562306a36Sopenharmony_ci} 173662306a36Sopenharmony_ci 173762306a36Sopenharmony_cistatic void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl) 173862306a36Sopenharmony_ci{ 173962306a36Sopenharmony_ci int i; 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci for (i = 1; i < ctrl->queue_count; i++) 174262306a36Sopenharmony_ci nvme_tcp_free_queue(ctrl, i); 174362306a36Sopenharmony_ci} 174462306a36Sopenharmony_ci 174562306a36Sopenharmony_cistatic void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl) 174662306a36Sopenharmony_ci{ 174762306a36Sopenharmony_ci int i; 174862306a36Sopenharmony_ci 174962306a36Sopenharmony_ci for (i = 1; i < ctrl->queue_count; i++) 175062306a36Sopenharmony_ci nvme_tcp_stop_queue(ctrl, i); 175162306a36Sopenharmony_ci} 175262306a36Sopenharmony_ci 175362306a36Sopenharmony_cistatic int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl, 175462306a36Sopenharmony_ci int first, int last) 175562306a36Sopenharmony_ci{ 175662306a36Sopenharmony_ci int i, ret; 175762306a36Sopenharmony_ci 175862306a36Sopenharmony_ci for (i = first; i < last; i++) { 175962306a36Sopenharmony_ci ret = nvme_tcp_start_queue(ctrl, i); 176062306a36Sopenharmony_ci if (ret) 176162306a36Sopenharmony_ci goto out_stop_queues; 176262306a36Sopenharmony_ci } 176362306a36Sopenharmony_ci 176462306a36Sopenharmony_ci return 0; 176562306a36Sopenharmony_ci 176662306a36Sopenharmony_ciout_stop_queues: 176762306a36Sopenharmony_ci for (i--; i >= first; i--) 176862306a36Sopenharmony_ci nvme_tcp_stop_queue(ctrl, i); 176962306a36Sopenharmony_ci return ret; 177062306a36Sopenharmony_ci} 177162306a36Sopenharmony_ci 177262306a36Sopenharmony_cistatic int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl) 177362306a36Sopenharmony_ci{ 177462306a36Sopenharmony_ci int ret; 177562306a36Sopenharmony_ci 177662306a36Sopenharmony_ci ret = nvme_tcp_alloc_queue(ctrl, 0); 177762306a36Sopenharmony_ci if (ret) 177862306a36Sopenharmony_ci return ret; 177962306a36Sopenharmony_ci 178062306a36Sopenharmony_ci ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl)); 178162306a36Sopenharmony_ci if (ret) 178262306a36Sopenharmony_ci goto out_free_queue; 178362306a36Sopenharmony_ci 178462306a36Sopenharmony_ci return 0; 178562306a36Sopenharmony_ci 178662306a36Sopenharmony_ciout_free_queue: 178762306a36Sopenharmony_ci nvme_tcp_free_queue(ctrl, 0); 178862306a36Sopenharmony_ci return ret; 178962306a36Sopenharmony_ci} 179062306a36Sopenharmony_ci 179162306a36Sopenharmony_cistatic int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) 179262306a36Sopenharmony_ci{ 179362306a36Sopenharmony_ci int i, ret; 179462306a36Sopenharmony_ci 179562306a36Sopenharmony_ci for (i = 1; i < ctrl->queue_count; i++) { 179662306a36Sopenharmony_ci ret = nvme_tcp_alloc_queue(ctrl, i); 179762306a36Sopenharmony_ci if (ret) 179862306a36Sopenharmony_ci goto out_free_queues; 179962306a36Sopenharmony_ci } 180062306a36Sopenharmony_ci 180162306a36Sopenharmony_ci return 0; 180262306a36Sopenharmony_ci 180362306a36Sopenharmony_ciout_free_queues: 180462306a36Sopenharmony_ci for (i--; i >= 1; i--) 180562306a36Sopenharmony_ci nvme_tcp_free_queue(ctrl, i); 180662306a36Sopenharmony_ci 180762306a36Sopenharmony_ci return ret; 180862306a36Sopenharmony_ci} 180962306a36Sopenharmony_ci 181062306a36Sopenharmony_cistatic int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) 181162306a36Sopenharmony_ci{ 181262306a36Sopenharmony_ci unsigned int nr_io_queues; 181362306a36Sopenharmony_ci int ret; 181462306a36Sopenharmony_ci 181562306a36Sopenharmony_ci nr_io_queues = nvmf_nr_io_queues(ctrl->opts); 181662306a36Sopenharmony_ci ret = nvme_set_queue_count(ctrl, &nr_io_queues); 181762306a36Sopenharmony_ci if (ret) 181862306a36Sopenharmony_ci return ret; 181962306a36Sopenharmony_ci 182062306a36Sopenharmony_ci if (nr_io_queues == 0) { 182162306a36Sopenharmony_ci dev_err(ctrl->device, 182262306a36Sopenharmony_ci "unable to set any I/O queues\n"); 182362306a36Sopenharmony_ci return -ENOMEM; 182462306a36Sopenharmony_ci } 182562306a36Sopenharmony_ci 182662306a36Sopenharmony_ci ctrl->queue_count = nr_io_queues + 1; 182762306a36Sopenharmony_ci dev_info(ctrl->device, 182862306a36Sopenharmony_ci "creating %d I/O queues.\n", nr_io_queues); 182962306a36Sopenharmony_ci 183062306a36Sopenharmony_ci nvmf_set_io_queues(ctrl->opts, nr_io_queues, 183162306a36Sopenharmony_ci to_tcp_ctrl(ctrl)->io_queues); 183262306a36Sopenharmony_ci return __nvme_tcp_alloc_io_queues(ctrl); 183362306a36Sopenharmony_ci} 183462306a36Sopenharmony_ci 183562306a36Sopenharmony_cistatic void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) 183662306a36Sopenharmony_ci{ 183762306a36Sopenharmony_ci nvme_tcp_stop_io_queues(ctrl); 183862306a36Sopenharmony_ci if (remove) 183962306a36Sopenharmony_ci nvme_remove_io_tag_set(ctrl); 184062306a36Sopenharmony_ci nvme_tcp_free_io_queues(ctrl); 184162306a36Sopenharmony_ci} 184262306a36Sopenharmony_ci 184362306a36Sopenharmony_cistatic int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) 184462306a36Sopenharmony_ci{ 184562306a36Sopenharmony_ci int ret, nr_queues; 184662306a36Sopenharmony_ci 184762306a36Sopenharmony_ci ret = nvme_tcp_alloc_io_queues(ctrl); 184862306a36Sopenharmony_ci if (ret) 184962306a36Sopenharmony_ci return ret; 185062306a36Sopenharmony_ci 185162306a36Sopenharmony_ci if (new) { 185262306a36Sopenharmony_ci ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set, 185362306a36Sopenharmony_ci &nvme_tcp_mq_ops, 185462306a36Sopenharmony_ci ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2, 185562306a36Sopenharmony_ci sizeof(struct nvme_tcp_request)); 185662306a36Sopenharmony_ci if (ret) 185762306a36Sopenharmony_ci goto out_free_io_queues; 185862306a36Sopenharmony_ci } 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_ci /* 186162306a36Sopenharmony_ci * Only start IO queues for which we have allocated the tagset 186262306a36Sopenharmony_ci * and limitted it to the available queues. On reconnects, the 186362306a36Sopenharmony_ci * queue number might have changed. 186462306a36Sopenharmony_ci */ 186562306a36Sopenharmony_ci nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count); 186662306a36Sopenharmony_ci ret = nvme_tcp_start_io_queues(ctrl, 1, nr_queues); 186762306a36Sopenharmony_ci if (ret) 186862306a36Sopenharmony_ci goto out_cleanup_connect_q; 186962306a36Sopenharmony_ci 187062306a36Sopenharmony_ci if (!new) { 187162306a36Sopenharmony_ci nvme_start_freeze(ctrl); 187262306a36Sopenharmony_ci nvme_unquiesce_io_queues(ctrl); 187362306a36Sopenharmony_ci if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) { 187462306a36Sopenharmony_ci /* 187562306a36Sopenharmony_ci * If we timed out waiting for freeze we are likely to 187662306a36Sopenharmony_ci * be stuck. Fail the controller initialization just 187762306a36Sopenharmony_ci * to be safe. 187862306a36Sopenharmony_ci */ 187962306a36Sopenharmony_ci ret = -ENODEV; 188062306a36Sopenharmony_ci nvme_unfreeze(ctrl); 188162306a36Sopenharmony_ci goto out_wait_freeze_timed_out; 188262306a36Sopenharmony_ci } 188362306a36Sopenharmony_ci blk_mq_update_nr_hw_queues(ctrl->tagset, 188462306a36Sopenharmony_ci ctrl->queue_count - 1); 188562306a36Sopenharmony_ci nvme_unfreeze(ctrl); 188662306a36Sopenharmony_ci } 188762306a36Sopenharmony_ci 188862306a36Sopenharmony_ci /* 188962306a36Sopenharmony_ci * If the number of queues has increased (reconnect case) 189062306a36Sopenharmony_ci * start all new queues now. 189162306a36Sopenharmony_ci */ 189262306a36Sopenharmony_ci ret = nvme_tcp_start_io_queues(ctrl, nr_queues, 189362306a36Sopenharmony_ci ctrl->tagset->nr_hw_queues + 1); 189462306a36Sopenharmony_ci if (ret) 189562306a36Sopenharmony_ci goto out_wait_freeze_timed_out; 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci return 0; 189862306a36Sopenharmony_ci 189962306a36Sopenharmony_ciout_wait_freeze_timed_out: 190062306a36Sopenharmony_ci nvme_quiesce_io_queues(ctrl); 190162306a36Sopenharmony_ci nvme_sync_io_queues(ctrl); 190262306a36Sopenharmony_ci nvme_tcp_stop_io_queues(ctrl); 190362306a36Sopenharmony_ciout_cleanup_connect_q: 190462306a36Sopenharmony_ci nvme_cancel_tagset(ctrl); 190562306a36Sopenharmony_ci if (new) 190662306a36Sopenharmony_ci nvme_remove_io_tag_set(ctrl); 190762306a36Sopenharmony_ciout_free_io_queues: 190862306a36Sopenharmony_ci nvme_tcp_free_io_queues(ctrl); 190962306a36Sopenharmony_ci return ret; 191062306a36Sopenharmony_ci} 191162306a36Sopenharmony_ci 191262306a36Sopenharmony_cistatic void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) 191362306a36Sopenharmony_ci{ 191462306a36Sopenharmony_ci nvme_tcp_stop_queue(ctrl, 0); 191562306a36Sopenharmony_ci if (remove) 191662306a36Sopenharmony_ci nvme_remove_admin_tag_set(ctrl); 191762306a36Sopenharmony_ci nvme_tcp_free_admin_queue(ctrl); 191862306a36Sopenharmony_ci} 191962306a36Sopenharmony_ci 192062306a36Sopenharmony_cistatic int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new) 192162306a36Sopenharmony_ci{ 192262306a36Sopenharmony_ci int error; 192362306a36Sopenharmony_ci 192462306a36Sopenharmony_ci error = nvme_tcp_alloc_admin_queue(ctrl); 192562306a36Sopenharmony_ci if (error) 192662306a36Sopenharmony_ci return error; 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_ci if (new) { 192962306a36Sopenharmony_ci error = nvme_alloc_admin_tag_set(ctrl, 193062306a36Sopenharmony_ci &to_tcp_ctrl(ctrl)->admin_tag_set, 193162306a36Sopenharmony_ci &nvme_tcp_admin_mq_ops, 193262306a36Sopenharmony_ci sizeof(struct nvme_tcp_request)); 193362306a36Sopenharmony_ci if (error) 193462306a36Sopenharmony_ci goto out_free_queue; 193562306a36Sopenharmony_ci } 193662306a36Sopenharmony_ci 193762306a36Sopenharmony_ci error = nvme_tcp_start_queue(ctrl, 0); 193862306a36Sopenharmony_ci if (error) 193962306a36Sopenharmony_ci goto out_cleanup_tagset; 194062306a36Sopenharmony_ci 194162306a36Sopenharmony_ci error = nvme_enable_ctrl(ctrl); 194262306a36Sopenharmony_ci if (error) 194362306a36Sopenharmony_ci goto out_stop_queue; 194462306a36Sopenharmony_ci 194562306a36Sopenharmony_ci nvme_unquiesce_admin_queue(ctrl); 194662306a36Sopenharmony_ci 194762306a36Sopenharmony_ci error = nvme_init_ctrl_finish(ctrl, false); 194862306a36Sopenharmony_ci if (error) 194962306a36Sopenharmony_ci goto out_quiesce_queue; 195062306a36Sopenharmony_ci 195162306a36Sopenharmony_ci return 0; 195262306a36Sopenharmony_ci 195362306a36Sopenharmony_ciout_quiesce_queue: 195462306a36Sopenharmony_ci nvme_quiesce_admin_queue(ctrl); 195562306a36Sopenharmony_ci blk_sync_queue(ctrl->admin_q); 195662306a36Sopenharmony_ciout_stop_queue: 195762306a36Sopenharmony_ci nvme_tcp_stop_queue(ctrl, 0); 195862306a36Sopenharmony_ci nvme_cancel_admin_tagset(ctrl); 195962306a36Sopenharmony_ciout_cleanup_tagset: 196062306a36Sopenharmony_ci if (new) 196162306a36Sopenharmony_ci nvme_remove_admin_tag_set(ctrl); 196262306a36Sopenharmony_ciout_free_queue: 196362306a36Sopenharmony_ci nvme_tcp_free_admin_queue(ctrl); 196462306a36Sopenharmony_ci return error; 196562306a36Sopenharmony_ci} 196662306a36Sopenharmony_ci 196762306a36Sopenharmony_cistatic void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, 196862306a36Sopenharmony_ci bool remove) 196962306a36Sopenharmony_ci{ 197062306a36Sopenharmony_ci nvme_quiesce_admin_queue(ctrl); 197162306a36Sopenharmony_ci blk_sync_queue(ctrl->admin_q); 197262306a36Sopenharmony_ci nvme_tcp_stop_queue(ctrl, 0); 197362306a36Sopenharmony_ci nvme_cancel_admin_tagset(ctrl); 197462306a36Sopenharmony_ci if (remove) 197562306a36Sopenharmony_ci nvme_unquiesce_admin_queue(ctrl); 197662306a36Sopenharmony_ci nvme_tcp_destroy_admin_queue(ctrl, remove); 197762306a36Sopenharmony_ci} 197862306a36Sopenharmony_ci 197962306a36Sopenharmony_cistatic void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, 198062306a36Sopenharmony_ci bool remove) 198162306a36Sopenharmony_ci{ 198262306a36Sopenharmony_ci if (ctrl->queue_count <= 1) 198362306a36Sopenharmony_ci return; 198462306a36Sopenharmony_ci nvme_quiesce_admin_queue(ctrl); 198562306a36Sopenharmony_ci nvme_quiesce_io_queues(ctrl); 198662306a36Sopenharmony_ci nvme_sync_io_queues(ctrl); 198762306a36Sopenharmony_ci nvme_tcp_stop_io_queues(ctrl); 198862306a36Sopenharmony_ci nvme_cancel_tagset(ctrl); 198962306a36Sopenharmony_ci if (remove) 199062306a36Sopenharmony_ci nvme_unquiesce_io_queues(ctrl); 199162306a36Sopenharmony_ci nvme_tcp_destroy_io_queues(ctrl, remove); 199262306a36Sopenharmony_ci} 199362306a36Sopenharmony_ci 199462306a36Sopenharmony_cistatic void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) 199562306a36Sopenharmony_ci{ 199662306a36Sopenharmony_ci enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); 199762306a36Sopenharmony_ci 199862306a36Sopenharmony_ci /* If we are resetting/deleting then do nothing */ 199962306a36Sopenharmony_ci if (state != NVME_CTRL_CONNECTING) { 200062306a36Sopenharmony_ci WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE); 200162306a36Sopenharmony_ci return; 200262306a36Sopenharmony_ci } 200362306a36Sopenharmony_ci 200462306a36Sopenharmony_ci if (nvmf_should_reconnect(ctrl)) { 200562306a36Sopenharmony_ci dev_info(ctrl->device, "Reconnecting in %d seconds...\n", 200662306a36Sopenharmony_ci ctrl->opts->reconnect_delay); 200762306a36Sopenharmony_ci queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work, 200862306a36Sopenharmony_ci ctrl->opts->reconnect_delay * HZ); 200962306a36Sopenharmony_ci } else { 201062306a36Sopenharmony_ci dev_info(ctrl->device, "Removing controller...\n"); 201162306a36Sopenharmony_ci nvme_delete_ctrl(ctrl); 201262306a36Sopenharmony_ci } 201362306a36Sopenharmony_ci} 201462306a36Sopenharmony_ci 201562306a36Sopenharmony_cistatic int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new) 201662306a36Sopenharmony_ci{ 201762306a36Sopenharmony_ci struct nvmf_ctrl_options *opts = ctrl->opts; 201862306a36Sopenharmony_ci int ret; 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_ci ret = nvme_tcp_configure_admin_queue(ctrl, new); 202162306a36Sopenharmony_ci if (ret) 202262306a36Sopenharmony_ci return ret; 202362306a36Sopenharmony_ci 202462306a36Sopenharmony_ci if (ctrl->icdoff) { 202562306a36Sopenharmony_ci ret = -EOPNOTSUPP; 202662306a36Sopenharmony_ci dev_err(ctrl->device, "icdoff is not supported!\n"); 202762306a36Sopenharmony_ci goto destroy_admin; 202862306a36Sopenharmony_ci } 202962306a36Sopenharmony_ci 203062306a36Sopenharmony_ci if (!nvme_ctrl_sgl_supported(ctrl)) { 203162306a36Sopenharmony_ci ret = -EOPNOTSUPP; 203262306a36Sopenharmony_ci dev_err(ctrl->device, "Mandatory sgls are not supported!\n"); 203362306a36Sopenharmony_ci goto destroy_admin; 203462306a36Sopenharmony_ci } 203562306a36Sopenharmony_ci 203662306a36Sopenharmony_ci if (opts->queue_size > ctrl->sqsize + 1) 203762306a36Sopenharmony_ci dev_warn(ctrl->device, 203862306a36Sopenharmony_ci "queue_size %zu > ctrl sqsize %u, clamping down\n", 203962306a36Sopenharmony_ci opts->queue_size, ctrl->sqsize + 1); 204062306a36Sopenharmony_ci 204162306a36Sopenharmony_ci if (ctrl->sqsize + 1 > ctrl->maxcmd) { 204262306a36Sopenharmony_ci dev_warn(ctrl->device, 204362306a36Sopenharmony_ci "sqsize %u > ctrl maxcmd %u, clamping down\n", 204462306a36Sopenharmony_ci ctrl->sqsize + 1, ctrl->maxcmd); 204562306a36Sopenharmony_ci ctrl->sqsize = ctrl->maxcmd - 1; 204662306a36Sopenharmony_ci } 204762306a36Sopenharmony_ci 204862306a36Sopenharmony_ci if (ctrl->queue_count > 1) { 204962306a36Sopenharmony_ci ret = nvme_tcp_configure_io_queues(ctrl, new); 205062306a36Sopenharmony_ci if (ret) 205162306a36Sopenharmony_ci goto destroy_admin; 205262306a36Sopenharmony_ci } 205362306a36Sopenharmony_ci 205462306a36Sopenharmony_ci if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) { 205562306a36Sopenharmony_ci /* 205662306a36Sopenharmony_ci * state change failure is ok if we started ctrl delete, 205762306a36Sopenharmony_ci * unless we're during creation of a new controller to 205862306a36Sopenharmony_ci * avoid races with teardown flow. 205962306a36Sopenharmony_ci */ 206062306a36Sopenharmony_ci enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); 206162306a36Sopenharmony_ci 206262306a36Sopenharmony_ci WARN_ON_ONCE(state != NVME_CTRL_DELETING && 206362306a36Sopenharmony_ci state != NVME_CTRL_DELETING_NOIO); 206462306a36Sopenharmony_ci WARN_ON_ONCE(new); 206562306a36Sopenharmony_ci ret = -EINVAL; 206662306a36Sopenharmony_ci goto destroy_io; 206762306a36Sopenharmony_ci } 206862306a36Sopenharmony_ci 206962306a36Sopenharmony_ci nvme_start_ctrl(ctrl); 207062306a36Sopenharmony_ci return 0; 207162306a36Sopenharmony_ci 207262306a36Sopenharmony_cidestroy_io: 207362306a36Sopenharmony_ci if (ctrl->queue_count > 1) { 207462306a36Sopenharmony_ci nvme_quiesce_io_queues(ctrl); 207562306a36Sopenharmony_ci nvme_sync_io_queues(ctrl); 207662306a36Sopenharmony_ci nvme_tcp_stop_io_queues(ctrl); 207762306a36Sopenharmony_ci nvme_cancel_tagset(ctrl); 207862306a36Sopenharmony_ci nvme_tcp_destroy_io_queues(ctrl, new); 207962306a36Sopenharmony_ci } 208062306a36Sopenharmony_cidestroy_admin: 208162306a36Sopenharmony_ci nvme_quiesce_admin_queue(ctrl); 208262306a36Sopenharmony_ci blk_sync_queue(ctrl->admin_q); 208362306a36Sopenharmony_ci nvme_tcp_stop_queue(ctrl, 0); 208462306a36Sopenharmony_ci nvme_cancel_admin_tagset(ctrl); 208562306a36Sopenharmony_ci nvme_tcp_destroy_admin_queue(ctrl, new); 208662306a36Sopenharmony_ci return ret; 208762306a36Sopenharmony_ci} 208862306a36Sopenharmony_ci 208962306a36Sopenharmony_cistatic void nvme_tcp_reconnect_ctrl_work(struct work_struct *work) 209062306a36Sopenharmony_ci{ 209162306a36Sopenharmony_ci struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work), 209262306a36Sopenharmony_ci struct nvme_tcp_ctrl, connect_work); 209362306a36Sopenharmony_ci struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; 209462306a36Sopenharmony_ci 209562306a36Sopenharmony_ci ++ctrl->nr_reconnects; 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_ci if (nvme_tcp_setup_ctrl(ctrl, false)) 209862306a36Sopenharmony_ci goto requeue; 209962306a36Sopenharmony_ci 210062306a36Sopenharmony_ci dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n", 210162306a36Sopenharmony_ci ctrl->nr_reconnects); 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_ci ctrl->nr_reconnects = 0; 210462306a36Sopenharmony_ci 210562306a36Sopenharmony_ci return; 210662306a36Sopenharmony_ci 210762306a36Sopenharmony_cirequeue: 210862306a36Sopenharmony_ci dev_info(ctrl->device, "Failed reconnect attempt %d\n", 210962306a36Sopenharmony_ci ctrl->nr_reconnects); 211062306a36Sopenharmony_ci nvme_tcp_reconnect_or_remove(ctrl); 211162306a36Sopenharmony_ci} 211262306a36Sopenharmony_ci 211362306a36Sopenharmony_cistatic void nvme_tcp_error_recovery_work(struct work_struct *work) 211462306a36Sopenharmony_ci{ 211562306a36Sopenharmony_ci struct nvme_tcp_ctrl *tcp_ctrl = container_of(work, 211662306a36Sopenharmony_ci struct nvme_tcp_ctrl, err_work); 211762306a36Sopenharmony_ci struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl; 211862306a36Sopenharmony_ci 211962306a36Sopenharmony_ci nvme_stop_keep_alive(ctrl); 212062306a36Sopenharmony_ci flush_work(&ctrl->async_event_work); 212162306a36Sopenharmony_ci nvme_tcp_teardown_io_queues(ctrl, false); 212262306a36Sopenharmony_ci /* unquiesce to fail fast pending requests */ 212362306a36Sopenharmony_ci nvme_unquiesce_io_queues(ctrl); 212462306a36Sopenharmony_ci nvme_tcp_teardown_admin_queue(ctrl, false); 212562306a36Sopenharmony_ci nvme_unquiesce_admin_queue(ctrl); 212662306a36Sopenharmony_ci nvme_auth_stop(ctrl); 212762306a36Sopenharmony_ci 212862306a36Sopenharmony_ci if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { 212962306a36Sopenharmony_ci /* state change failure is ok if we started ctrl delete */ 213062306a36Sopenharmony_ci enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_ci WARN_ON_ONCE(state != NVME_CTRL_DELETING && 213362306a36Sopenharmony_ci state != NVME_CTRL_DELETING_NOIO); 213462306a36Sopenharmony_ci return; 213562306a36Sopenharmony_ci } 213662306a36Sopenharmony_ci 213762306a36Sopenharmony_ci nvme_tcp_reconnect_or_remove(ctrl); 213862306a36Sopenharmony_ci} 213962306a36Sopenharmony_ci 214062306a36Sopenharmony_cistatic void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown) 214162306a36Sopenharmony_ci{ 214262306a36Sopenharmony_ci nvme_tcp_teardown_io_queues(ctrl, shutdown); 214362306a36Sopenharmony_ci nvme_quiesce_admin_queue(ctrl); 214462306a36Sopenharmony_ci nvme_disable_ctrl(ctrl, shutdown); 214562306a36Sopenharmony_ci nvme_tcp_teardown_admin_queue(ctrl, shutdown); 214662306a36Sopenharmony_ci} 214762306a36Sopenharmony_ci 214862306a36Sopenharmony_cistatic void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl) 214962306a36Sopenharmony_ci{ 215062306a36Sopenharmony_ci nvme_tcp_teardown_ctrl(ctrl, true); 215162306a36Sopenharmony_ci} 215262306a36Sopenharmony_ci 215362306a36Sopenharmony_cistatic void nvme_reset_ctrl_work(struct work_struct *work) 215462306a36Sopenharmony_ci{ 215562306a36Sopenharmony_ci struct nvme_ctrl *ctrl = 215662306a36Sopenharmony_ci container_of(work, struct nvme_ctrl, reset_work); 215762306a36Sopenharmony_ci 215862306a36Sopenharmony_ci nvme_stop_ctrl(ctrl); 215962306a36Sopenharmony_ci nvme_tcp_teardown_ctrl(ctrl, false); 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) { 216262306a36Sopenharmony_ci /* state change failure is ok if we started ctrl delete */ 216362306a36Sopenharmony_ci enum nvme_ctrl_state state = nvme_ctrl_state(ctrl); 216462306a36Sopenharmony_ci 216562306a36Sopenharmony_ci WARN_ON_ONCE(state != NVME_CTRL_DELETING && 216662306a36Sopenharmony_ci state != NVME_CTRL_DELETING_NOIO); 216762306a36Sopenharmony_ci return; 216862306a36Sopenharmony_ci } 216962306a36Sopenharmony_ci 217062306a36Sopenharmony_ci if (nvme_tcp_setup_ctrl(ctrl, false)) 217162306a36Sopenharmony_ci goto out_fail; 217262306a36Sopenharmony_ci 217362306a36Sopenharmony_ci return; 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ciout_fail: 217662306a36Sopenharmony_ci ++ctrl->nr_reconnects; 217762306a36Sopenharmony_ci nvme_tcp_reconnect_or_remove(ctrl); 217862306a36Sopenharmony_ci} 217962306a36Sopenharmony_ci 218062306a36Sopenharmony_cistatic void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl) 218162306a36Sopenharmony_ci{ 218262306a36Sopenharmony_ci flush_work(&to_tcp_ctrl(ctrl)->err_work); 218362306a36Sopenharmony_ci cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work); 218462306a36Sopenharmony_ci} 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_cistatic void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl) 218762306a36Sopenharmony_ci{ 218862306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl); 218962306a36Sopenharmony_ci 219062306a36Sopenharmony_ci if (list_empty(&ctrl->list)) 219162306a36Sopenharmony_ci goto free_ctrl; 219262306a36Sopenharmony_ci 219362306a36Sopenharmony_ci mutex_lock(&nvme_tcp_ctrl_mutex); 219462306a36Sopenharmony_ci list_del(&ctrl->list); 219562306a36Sopenharmony_ci mutex_unlock(&nvme_tcp_ctrl_mutex); 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci nvmf_free_options(nctrl->opts); 219862306a36Sopenharmony_cifree_ctrl: 219962306a36Sopenharmony_ci kfree(ctrl->queues); 220062306a36Sopenharmony_ci kfree(ctrl); 220162306a36Sopenharmony_ci} 220262306a36Sopenharmony_ci 220362306a36Sopenharmony_cistatic void nvme_tcp_set_sg_null(struct nvme_command *c) 220462306a36Sopenharmony_ci{ 220562306a36Sopenharmony_ci struct nvme_sgl_desc *sg = &c->common.dptr.sgl; 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci sg->addr = 0; 220862306a36Sopenharmony_ci sg->length = 0; 220962306a36Sopenharmony_ci sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | 221062306a36Sopenharmony_ci NVME_SGL_FMT_TRANSPORT_A; 221162306a36Sopenharmony_ci} 221262306a36Sopenharmony_ci 221362306a36Sopenharmony_cistatic void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue, 221462306a36Sopenharmony_ci struct nvme_command *c, u32 data_len) 221562306a36Sopenharmony_ci{ 221662306a36Sopenharmony_ci struct nvme_sgl_desc *sg = &c->common.dptr.sgl; 221762306a36Sopenharmony_ci 221862306a36Sopenharmony_ci sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff); 221962306a36Sopenharmony_ci sg->length = cpu_to_le32(data_len); 222062306a36Sopenharmony_ci sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET; 222162306a36Sopenharmony_ci} 222262306a36Sopenharmony_ci 222362306a36Sopenharmony_cistatic void nvme_tcp_set_sg_host_data(struct nvme_command *c, 222462306a36Sopenharmony_ci u32 data_len) 222562306a36Sopenharmony_ci{ 222662306a36Sopenharmony_ci struct nvme_sgl_desc *sg = &c->common.dptr.sgl; 222762306a36Sopenharmony_ci 222862306a36Sopenharmony_ci sg->addr = 0; 222962306a36Sopenharmony_ci sg->length = cpu_to_le32(data_len); 223062306a36Sopenharmony_ci sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | 223162306a36Sopenharmony_ci NVME_SGL_FMT_TRANSPORT_A; 223262306a36Sopenharmony_ci} 223362306a36Sopenharmony_ci 223462306a36Sopenharmony_cistatic void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) 223562306a36Sopenharmony_ci{ 223662306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg); 223762306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &ctrl->queues[0]; 223862306a36Sopenharmony_ci struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu; 223962306a36Sopenharmony_ci struct nvme_command *cmd = &pdu->cmd; 224062306a36Sopenharmony_ci u8 hdgst = nvme_tcp_hdgst_len(queue); 224162306a36Sopenharmony_ci 224262306a36Sopenharmony_ci memset(pdu, 0, sizeof(*pdu)); 224362306a36Sopenharmony_ci pdu->hdr.type = nvme_tcp_cmd; 224462306a36Sopenharmony_ci if (queue->hdr_digest) 224562306a36Sopenharmony_ci pdu->hdr.flags |= NVME_TCP_F_HDGST; 224662306a36Sopenharmony_ci pdu->hdr.hlen = sizeof(*pdu); 224762306a36Sopenharmony_ci pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst); 224862306a36Sopenharmony_ci 224962306a36Sopenharmony_ci cmd->common.opcode = nvme_admin_async_event; 225062306a36Sopenharmony_ci cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH; 225162306a36Sopenharmony_ci cmd->common.flags |= NVME_CMD_SGL_METABUF; 225262306a36Sopenharmony_ci nvme_tcp_set_sg_null(cmd); 225362306a36Sopenharmony_ci 225462306a36Sopenharmony_ci ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU; 225562306a36Sopenharmony_ci ctrl->async_req.offset = 0; 225662306a36Sopenharmony_ci ctrl->async_req.curr_bio = NULL; 225762306a36Sopenharmony_ci ctrl->async_req.data_len = 0; 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_ci nvme_tcp_queue_request(&ctrl->async_req, true, true); 226062306a36Sopenharmony_ci} 226162306a36Sopenharmony_ci 226262306a36Sopenharmony_cistatic void nvme_tcp_complete_timed_out(struct request *rq) 226362306a36Sopenharmony_ci{ 226462306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 226562306a36Sopenharmony_ci struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ci nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); 226862306a36Sopenharmony_ci nvmf_complete_timed_out_request(rq); 226962306a36Sopenharmony_ci} 227062306a36Sopenharmony_ci 227162306a36Sopenharmony_cistatic enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq) 227262306a36Sopenharmony_ci{ 227362306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 227462306a36Sopenharmony_ci struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; 227562306a36Sopenharmony_ci struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); 227662306a36Sopenharmony_ci u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype; 227762306a36Sopenharmony_ci int qid = nvme_tcp_queue_id(req->queue); 227862306a36Sopenharmony_ci 227962306a36Sopenharmony_ci dev_warn(ctrl->device, 228062306a36Sopenharmony_ci "queue %d: timeout cid %#x type %d opcode %#x (%s)\n", 228162306a36Sopenharmony_ci nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type, 228262306a36Sopenharmony_ci opc, nvme_opcode_str(qid, opc, fctype)); 228362306a36Sopenharmony_ci 228462306a36Sopenharmony_ci if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) { 228562306a36Sopenharmony_ci /* 228662306a36Sopenharmony_ci * If we are resetting, connecting or deleting we should 228762306a36Sopenharmony_ci * complete immediately because we may block controller 228862306a36Sopenharmony_ci * teardown or setup sequence 228962306a36Sopenharmony_ci * - ctrl disable/shutdown fabrics requests 229062306a36Sopenharmony_ci * - connect requests 229162306a36Sopenharmony_ci * - initialization admin requests 229262306a36Sopenharmony_ci * - I/O requests that entered after unquiescing and 229362306a36Sopenharmony_ci * the controller stopped responding 229462306a36Sopenharmony_ci * 229562306a36Sopenharmony_ci * All other requests should be cancelled by the error 229662306a36Sopenharmony_ci * recovery work, so it's fine that we fail it here. 229762306a36Sopenharmony_ci */ 229862306a36Sopenharmony_ci nvme_tcp_complete_timed_out(rq); 229962306a36Sopenharmony_ci return BLK_EH_DONE; 230062306a36Sopenharmony_ci } 230162306a36Sopenharmony_ci 230262306a36Sopenharmony_ci /* 230362306a36Sopenharmony_ci * LIVE state should trigger the normal error recovery which will 230462306a36Sopenharmony_ci * handle completing this request. 230562306a36Sopenharmony_ci */ 230662306a36Sopenharmony_ci nvme_tcp_error_recovery(ctrl); 230762306a36Sopenharmony_ci return BLK_EH_RESET_TIMER; 230862306a36Sopenharmony_ci} 230962306a36Sopenharmony_ci 231062306a36Sopenharmony_cistatic blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue, 231162306a36Sopenharmony_ci struct request *rq) 231262306a36Sopenharmony_ci{ 231362306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 231462306a36Sopenharmony_ci struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); 231562306a36Sopenharmony_ci struct nvme_command *c = &pdu->cmd; 231662306a36Sopenharmony_ci 231762306a36Sopenharmony_ci c->common.flags |= NVME_CMD_SGL_METABUF; 231862306a36Sopenharmony_ci 231962306a36Sopenharmony_ci if (!blk_rq_nr_phys_segments(rq)) 232062306a36Sopenharmony_ci nvme_tcp_set_sg_null(c); 232162306a36Sopenharmony_ci else if (rq_data_dir(rq) == WRITE && 232262306a36Sopenharmony_ci req->data_len <= nvme_tcp_inline_data_size(req)) 232362306a36Sopenharmony_ci nvme_tcp_set_sg_inline(queue, c, req->data_len); 232462306a36Sopenharmony_ci else 232562306a36Sopenharmony_ci nvme_tcp_set_sg_host_data(c, req->data_len); 232662306a36Sopenharmony_ci 232762306a36Sopenharmony_ci return 0; 232862306a36Sopenharmony_ci} 232962306a36Sopenharmony_ci 233062306a36Sopenharmony_cistatic blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns, 233162306a36Sopenharmony_ci struct request *rq) 233262306a36Sopenharmony_ci{ 233362306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 233462306a36Sopenharmony_ci struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req); 233562306a36Sopenharmony_ci struct nvme_tcp_queue *queue = req->queue; 233662306a36Sopenharmony_ci u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0; 233762306a36Sopenharmony_ci blk_status_t ret; 233862306a36Sopenharmony_ci 233962306a36Sopenharmony_ci ret = nvme_setup_cmd(ns, rq); 234062306a36Sopenharmony_ci if (ret) 234162306a36Sopenharmony_ci return ret; 234262306a36Sopenharmony_ci 234362306a36Sopenharmony_ci req->state = NVME_TCP_SEND_CMD_PDU; 234462306a36Sopenharmony_ci req->status = cpu_to_le16(NVME_SC_SUCCESS); 234562306a36Sopenharmony_ci req->offset = 0; 234662306a36Sopenharmony_ci req->data_sent = 0; 234762306a36Sopenharmony_ci req->pdu_len = 0; 234862306a36Sopenharmony_ci req->pdu_sent = 0; 234962306a36Sopenharmony_ci req->h2cdata_left = 0; 235062306a36Sopenharmony_ci req->data_len = blk_rq_nr_phys_segments(rq) ? 235162306a36Sopenharmony_ci blk_rq_payload_bytes(rq) : 0; 235262306a36Sopenharmony_ci req->curr_bio = rq->bio; 235362306a36Sopenharmony_ci if (req->curr_bio && req->data_len) 235462306a36Sopenharmony_ci nvme_tcp_init_iter(req, rq_data_dir(rq)); 235562306a36Sopenharmony_ci 235662306a36Sopenharmony_ci if (rq_data_dir(rq) == WRITE && 235762306a36Sopenharmony_ci req->data_len <= nvme_tcp_inline_data_size(req)) 235862306a36Sopenharmony_ci req->pdu_len = req->data_len; 235962306a36Sopenharmony_ci 236062306a36Sopenharmony_ci pdu->hdr.type = nvme_tcp_cmd; 236162306a36Sopenharmony_ci pdu->hdr.flags = 0; 236262306a36Sopenharmony_ci if (queue->hdr_digest) 236362306a36Sopenharmony_ci pdu->hdr.flags |= NVME_TCP_F_HDGST; 236462306a36Sopenharmony_ci if (queue->data_digest && req->pdu_len) { 236562306a36Sopenharmony_ci pdu->hdr.flags |= NVME_TCP_F_DDGST; 236662306a36Sopenharmony_ci ddgst = nvme_tcp_ddgst_len(queue); 236762306a36Sopenharmony_ci } 236862306a36Sopenharmony_ci pdu->hdr.hlen = sizeof(*pdu); 236962306a36Sopenharmony_ci pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0; 237062306a36Sopenharmony_ci pdu->hdr.plen = 237162306a36Sopenharmony_ci cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst); 237262306a36Sopenharmony_ci 237362306a36Sopenharmony_ci ret = nvme_tcp_map_data(queue, rq); 237462306a36Sopenharmony_ci if (unlikely(ret)) { 237562306a36Sopenharmony_ci nvme_cleanup_cmd(rq); 237662306a36Sopenharmony_ci dev_err(queue->ctrl->ctrl.device, 237762306a36Sopenharmony_ci "Failed to map data (%d)\n", ret); 237862306a36Sopenharmony_ci return ret; 237962306a36Sopenharmony_ci } 238062306a36Sopenharmony_ci 238162306a36Sopenharmony_ci return 0; 238262306a36Sopenharmony_ci} 238362306a36Sopenharmony_ci 238462306a36Sopenharmony_cistatic void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx) 238562306a36Sopenharmony_ci{ 238662306a36Sopenharmony_ci struct nvme_tcp_queue *queue = hctx->driver_data; 238762306a36Sopenharmony_ci 238862306a36Sopenharmony_ci if (!llist_empty(&queue->req_list)) 238962306a36Sopenharmony_ci queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 239062306a36Sopenharmony_ci} 239162306a36Sopenharmony_ci 239262306a36Sopenharmony_cistatic blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, 239362306a36Sopenharmony_ci const struct blk_mq_queue_data *bd) 239462306a36Sopenharmony_ci{ 239562306a36Sopenharmony_ci struct nvme_ns *ns = hctx->queue->queuedata; 239662306a36Sopenharmony_ci struct nvme_tcp_queue *queue = hctx->driver_data; 239762306a36Sopenharmony_ci struct request *rq = bd->rq; 239862306a36Sopenharmony_ci struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); 239962306a36Sopenharmony_ci bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags); 240062306a36Sopenharmony_ci blk_status_t ret; 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_ci if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 240362306a36Sopenharmony_ci return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq); 240462306a36Sopenharmony_ci 240562306a36Sopenharmony_ci ret = nvme_tcp_setup_cmd_pdu(ns, rq); 240662306a36Sopenharmony_ci if (unlikely(ret)) 240762306a36Sopenharmony_ci return ret; 240862306a36Sopenharmony_ci 240962306a36Sopenharmony_ci nvme_start_request(rq); 241062306a36Sopenharmony_ci 241162306a36Sopenharmony_ci nvme_tcp_queue_request(req, true, bd->last); 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_ci return BLK_STS_OK; 241462306a36Sopenharmony_ci} 241562306a36Sopenharmony_ci 241662306a36Sopenharmony_cistatic void nvme_tcp_map_queues(struct blk_mq_tag_set *set) 241762306a36Sopenharmony_ci{ 241862306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data); 241962306a36Sopenharmony_ci 242062306a36Sopenharmony_ci nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues); 242162306a36Sopenharmony_ci} 242262306a36Sopenharmony_ci 242362306a36Sopenharmony_cistatic int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) 242462306a36Sopenharmony_ci{ 242562306a36Sopenharmony_ci struct nvme_tcp_queue *queue = hctx->driver_data; 242662306a36Sopenharmony_ci struct sock *sk = queue->sock->sk; 242762306a36Sopenharmony_ci 242862306a36Sopenharmony_ci if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) 242962306a36Sopenharmony_ci return 0; 243062306a36Sopenharmony_ci 243162306a36Sopenharmony_ci set_bit(NVME_TCP_Q_POLLING, &queue->flags); 243262306a36Sopenharmony_ci if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) 243362306a36Sopenharmony_ci sk_busy_loop(sk, true); 243462306a36Sopenharmony_ci nvme_tcp_try_recv(queue); 243562306a36Sopenharmony_ci clear_bit(NVME_TCP_Q_POLLING, &queue->flags); 243662306a36Sopenharmony_ci return queue->nr_cqe; 243762306a36Sopenharmony_ci} 243862306a36Sopenharmony_ci 243962306a36Sopenharmony_cistatic int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) 244062306a36Sopenharmony_ci{ 244162306a36Sopenharmony_ci struct nvme_tcp_queue *queue = &to_tcp_ctrl(ctrl)->queues[0]; 244262306a36Sopenharmony_ci struct sockaddr_storage src_addr; 244362306a36Sopenharmony_ci int ret, len; 244462306a36Sopenharmony_ci 244562306a36Sopenharmony_ci len = nvmf_get_address(ctrl, buf, size); 244662306a36Sopenharmony_ci 244762306a36Sopenharmony_ci mutex_lock(&queue->queue_lock); 244862306a36Sopenharmony_ci 244962306a36Sopenharmony_ci if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) 245062306a36Sopenharmony_ci goto done; 245162306a36Sopenharmony_ci ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr); 245262306a36Sopenharmony_ci if (ret > 0) { 245362306a36Sopenharmony_ci if (len > 0) 245462306a36Sopenharmony_ci len--; /* strip trailing newline */ 245562306a36Sopenharmony_ci len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n", 245662306a36Sopenharmony_ci (len) ? "," : "", &src_addr); 245762306a36Sopenharmony_ci } 245862306a36Sopenharmony_cidone: 245962306a36Sopenharmony_ci mutex_unlock(&queue->queue_lock); 246062306a36Sopenharmony_ci 246162306a36Sopenharmony_ci return len; 246262306a36Sopenharmony_ci} 246362306a36Sopenharmony_ci 246462306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_tcp_mq_ops = { 246562306a36Sopenharmony_ci .queue_rq = nvme_tcp_queue_rq, 246662306a36Sopenharmony_ci .commit_rqs = nvme_tcp_commit_rqs, 246762306a36Sopenharmony_ci .complete = nvme_complete_rq, 246862306a36Sopenharmony_ci .init_request = nvme_tcp_init_request, 246962306a36Sopenharmony_ci .exit_request = nvme_tcp_exit_request, 247062306a36Sopenharmony_ci .init_hctx = nvme_tcp_init_hctx, 247162306a36Sopenharmony_ci .timeout = nvme_tcp_timeout, 247262306a36Sopenharmony_ci .map_queues = nvme_tcp_map_queues, 247362306a36Sopenharmony_ci .poll = nvme_tcp_poll, 247462306a36Sopenharmony_ci}; 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_tcp_admin_mq_ops = { 247762306a36Sopenharmony_ci .queue_rq = nvme_tcp_queue_rq, 247862306a36Sopenharmony_ci .complete = nvme_complete_rq, 247962306a36Sopenharmony_ci .init_request = nvme_tcp_init_request, 248062306a36Sopenharmony_ci .exit_request = nvme_tcp_exit_request, 248162306a36Sopenharmony_ci .init_hctx = nvme_tcp_init_admin_hctx, 248262306a36Sopenharmony_ci .timeout = nvme_tcp_timeout, 248362306a36Sopenharmony_ci}; 248462306a36Sopenharmony_ci 248562306a36Sopenharmony_cistatic const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { 248662306a36Sopenharmony_ci .name = "tcp", 248762306a36Sopenharmony_ci .module = THIS_MODULE, 248862306a36Sopenharmony_ci .flags = NVME_F_FABRICS | NVME_F_BLOCKING, 248962306a36Sopenharmony_ci .reg_read32 = nvmf_reg_read32, 249062306a36Sopenharmony_ci .reg_read64 = nvmf_reg_read64, 249162306a36Sopenharmony_ci .reg_write32 = nvmf_reg_write32, 249262306a36Sopenharmony_ci .free_ctrl = nvme_tcp_free_ctrl, 249362306a36Sopenharmony_ci .submit_async_event = nvme_tcp_submit_async_event, 249462306a36Sopenharmony_ci .delete_ctrl = nvme_tcp_delete_ctrl, 249562306a36Sopenharmony_ci .get_address = nvme_tcp_get_address, 249662306a36Sopenharmony_ci .stop_ctrl = nvme_tcp_stop_ctrl, 249762306a36Sopenharmony_ci}; 249862306a36Sopenharmony_ci 249962306a36Sopenharmony_cistatic bool 250062306a36Sopenharmony_cinvme_tcp_existing_controller(struct nvmf_ctrl_options *opts) 250162306a36Sopenharmony_ci{ 250262306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl; 250362306a36Sopenharmony_ci bool found = false; 250462306a36Sopenharmony_ci 250562306a36Sopenharmony_ci mutex_lock(&nvme_tcp_ctrl_mutex); 250662306a36Sopenharmony_ci list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) { 250762306a36Sopenharmony_ci found = nvmf_ip_options_match(&ctrl->ctrl, opts); 250862306a36Sopenharmony_ci if (found) 250962306a36Sopenharmony_ci break; 251062306a36Sopenharmony_ci } 251162306a36Sopenharmony_ci mutex_unlock(&nvme_tcp_ctrl_mutex); 251262306a36Sopenharmony_ci 251362306a36Sopenharmony_ci return found; 251462306a36Sopenharmony_ci} 251562306a36Sopenharmony_ci 251662306a36Sopenharmony_cistatic struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, 251762306a36Sopenharmony_ci struct nvmf_ctrl_options *opts) 251862306a36Sopenharmony_ci{ 251962306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl; 252062306a36Sopenharmony_ci int ret; 252162306a36Sopenharmony_ci 252262306a36Sopenharmony_ci ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 252362306a36Sopenharmony_ci if (!ctrl) 252462306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 252562306a36Sopenharmony_ci 252662306a36Sopenharmony_ci INIT_LIST_HEAD(&ctrl->list); 252762306a36Sopenharmony_ci ctrl->ctrl.opts = opts; 252862306a36Sopenharmony_ci ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues + 252962306a36Sopenharmony_ci opts->nr_poll_queues + 1; 253062306a36Sopenharmony_ci ctrl->ctrl.sqsize = opts->queue_size - 1; 253162306a36Sopenharmony_ci ctrl->ctrl.kato = opts->kato; 253262306a36Sopenharmony_ci 253362306a36Sopenharmony_ci INIT_DELAYED_WORK(&ctrl->connect_work, 253462306a36Sopenharmony_ci nvme_tcp_reconnect_ctrl_work); 253562306a36Sopenharmony_ci INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); 253662306a36Sopenharmony_ci INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); 253762306a36Sopenharmony_ci 253862306a36Sopenharmony_ci if (!(opts->mask & NVMF_OPT_TRSVCID)) { 253962306a36Sopenharmony_ci opts->trsvcid = 254062306a36Sopenharmony_ci kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL); 254162306a36Sopenharmony_ci if (!opts->trsvcid) { 254262306a36Sopenharmony_ci ret = -ENOMEM; 254362306a36Sopenharmony_ci goto out_free_ctrl; 254462306a36Sopenharmony_ci } 254562306a36Sopenharmony_ci opts->mask |= NVMF_OPT_TRSVCID; 254662306a36Sopenharmony_ci } 254762306a36Sopenharmony_ci 254862306a36Sopenharmony_ci ret = inet_pton_with_scope(&init_net, AF_UNSPEC, 254962306a36Sopenharmony_ci opts->traddr, opts->trsvcid, &ctrl->addr); 255062306a36Sopenharmony_ci if (ret) { 255162306a36Sopenharmony_ci pr_err("malformed address passed: %s:%s\n", 255262306a36Sopenharmony_ci opts->traddr, opts->trsvcid); 255362306a36Sopenharmony_ci goto out_free_ctrl; 255462306a36Sopenharmony_ci } 255562306a36Sopenharmony_ci 255662306a36Sopenharmony_ci if (opts->mask & NVMF_OPT_HOST_TRADDR) { 255762306a36Sopenharmony_ci ret = inet_pton_with_scope(&init_net, AF_UNSPEC, 255862306a36Sopenharmony_ci opts->host_traddr, NULL, &ctrl->src_addr); 255962306a36Sopenharmony_ci if (ret) { 256062306a36Sopenharmony_ci pr_err("malformed src address passed: %s\n", 256162306a36Sopenharmony_ci opts->host_traddr); 256262306a36Sopenharmony_ci goto out_free_ctrl; 256362306a36Sopenharmony_ci } 256462306a36Sopenharmony_ci } 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci if (opts->mask & NVMF_OPT_HOST_IFACE) { 256762306a36Sopenharmony_ci if (!__dev_get_by_name(&init_net, opts->host_iface)) { 256862306a36Sopenharmony_ci pr_err("invalid interface passed: %s\n", 256962306a36Sopenharmony_ci opts->host_iface); 257062306a36Sopenharmony_ci ret = -ENODEV; 257162306a36Sopenharmony_ci goto out_free_ctrl; 257262306a36Sopenharmony_ci } 257362306a36Sopenharmony_ci } 257462306a36Sopenharmony_ci 257562306a36Sopenharmony_ci if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) { 257662306a36Sopenharmony_ci ret = -EALREADY; 257762306a36Sopenharmony_ci goto out_free_ctrl; 257862306a36Sopenharmony_ci } 257962306a36Sopenharmony_ci 258062306a36Sopenharmony_ci ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues), 258162306a36Sopenharmony_ci GFP_KERNEL); 258262306a36Sopenharmony_ci if (!ctrl->queues) { 258362306a36Sopenharmony_ci ret = -ENOMEM; 258462306a36Sopenharmony_ci goto out_free_ctrl; 258562306a36Sopenharmony_ci } 258662306a36Sopenharmony_ci 258762306a36Sopenharmony_ci ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0); 258862306a36Sopenharmony_ci if (ret) 258962306a36Sopenharmony_ci goto out_kfree_queues; 259062306a36Sopenharmony_ci 259162306a36Sopenharmony_ci if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 259262306a36Sopenharmony_ci WARN_ON_ONCE(1); 259362306a36Sopenharmony_ci ret = -EINTR; 259462306a36Sopenharmony_ci goto out_uninit_ctrl; 259562306a36Sopenharmony_ci } 259662306a36Sopenharmony_ci 259762306a36Sopenharmony_ci ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true); 259862306a36Sopenharmony_ci if (ret) 259962306a36Sopenharmony_ci goto out_uninit_ctrl; 260062306a36Sopenharmony_ci 260162306a36Sopenharmony_ci dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n", 260262306a36Sopenharmony_ci nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr); 260362306a36Sopenharmony_ci 260462306a36Sopenharmony_ci mutex_lock(&nvme_tcp_ctrl_mutex); 260562306a36Sopenharmony_ci list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list); 260662306a36Sopenharmony_ci mutex_unlock(&nvme_tcp_ctrl_mutex); 260762306a36Sopenharmony_ci 260862306a36Sopenharmony_ci return &ctrl->ctrl; 260962306a36Sopenharmony_ci 261062306a36Sopenharmony_ciout_uninit_ctrl: 261162306a36Sopenharmony_ci nvme_uninit_ctrl(&ctrl->ctrl); 261262306a36Sopenharmony_ci nvme_put_ctrl(&ctrl->ctrl); 261362306a36Sopenharmony_ci if (ret > 0) 261462306a36Sopenharmony_ci ret = -EIO; 261562306a36Sopenharmony_ci return ERR_PTR(ret); 261662306a36Sopenharmony_ciout_kfree_queues: 261762306a36Sopenharmony_ci kfree(ctrl->queues); 261862306a36Sopenharmony_ciout_free_ctrl: 261962306a36Sopenharmony_ci kfree(ctrl); 262062306a36Sopenharmony_ci return ERR_PTR(ret); 262162306a36Sopenharmony_ci} 262262306a36Sopenharmony_ci 262362306a36Sopenharmony_cistatic struct nvmf_transport_ops nvme_tcp_transport = { 262462306a36Sopenharmony_ci .name = "tcp", 262562306a36Sopenharmony_ci .module = THIS_MODULE, 262662306a36Sopenharmony_ci .required_opts = NVMF_OPT_TRADDR, 262762306a36Sopenharmony_ci .allowed_opts = NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY | 262862306a36Sopenharmony_ci NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO | 262962306a36Sopenharmony_ci NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST | 263062306a36Sopenharmony_ci NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES | 263162306a36Sopenharmony_ci NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE, 263262306a36Sopenharmony_ci .create_ctrl = nvme_tcp_create_ctrl, 263362306a36Sopenharmony_ci}; 263462306a36Sopenharmony_ci 263562306a36Sopenharmony_cistatic int __init nvme_tcp_init_module(void) 263662306a36Sopenharmony_ci{ 263762306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8); 263862306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72); 263962306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24); 264062306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_tcp_rsp_pdu) != 24); 264162306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_tcp_r2t_pdu) != 24); 264262306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_tcp_icreq_pdu) != 128); 264362306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128); 264462306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24); 264562306a36Sopenharmony_ci 264662306a36Sopenharmony_ci nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", 264762306a36Sopenharmony_ci WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); 264862306a36Sopenharmony_ci if (!nvme_tcp_wq) 264962306a36Sopenharmony_ci return -ENOMEM; 265062306a36Sopenharmony_ci 265162306a36Sopenharmony_ci nvmf_register_transport(&nvme_tcp_transport); 265262306a36Sopenharmony_ci return 0; 265362306a36Sopenharmony_ci} 265462306a36Sopenharmony_ci 265562306a36Sopenharmony_cistatic void __exit nvme_tcp_cleanup_module(void) 265662306a36Sopenharmony_ci{ 265762306a36Sopenharmony_ci struct nvme_tcp_ctrl *ctrl; 265862306a36Sopenharmony_ci 265962306a36Sopenharmony_ci nvmf_unregister_transport(&nvme_tcp_transport); 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci mutex_lock(&nvme_tcp_ctrl_mutex); 266262306a36Sopenharmony_ci list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) 266362306a36Sopenharmony_ci nvme_delete_ctrl(&ctrl->ctrl); 266462306a36Sopenharmony_ci mutex_unlock(&nvme_tcp_ctrl_mutex); 266562306a36Sopenharmony_ci flush_workqueue(nvme_delete_wq); 266662306a36Sopenharmony_ci 266762306a36Sopenharmony_ci destroy_workqueue(nvme_tcp_wq); 266862306a36Sopenharmony_ci} 266962306a36Sopenharmony_ci 267062306a36Sopenharmony_cimodule_init(nvme_tcp_init_module); 267162306a36Sopenharmony_cimodule_exit(nvme_tcp_cleanup_module); 267262306a36Sopenharmony_ci 267362306a36Sopenharmony_ciMODULE_LICENSE("GPL v2"); 2674