162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * NVMe over Fabrics TCP host.
462306a36Sopenharmony_ci * Copyright (c) 2018 Lightbits Labs. All rights reserved.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
762306a36Sopenharmony_ci#include <linux/module.h>
862306a36Sopenharmony_ci#include <linux/init.h>
962306a36Sopenharmony_ci#include <linux/slab.h>
1062306a36Sopenharmony_ci#include <linux/err.h>
1162306a36Sopenharmony_ci#include <linux/nvme-tcp.h>
1262306a36Sopenharmony_ci#include <net/sock.h>
1362306a36Sopenharmony_ci#include <net/tcp.h>
1462306a36Sopenharmony_ci#include <linux/blk-mq.h>
1562306a36Sopenharmony_ci#include <crypto/hash.h>
1662306a36Sopenharmony_ci#include <net/busy_poll.h>
1762306a36Sopenharmony_ci#include <trace/events/sock.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include "nvme.h"
2062306a36Sopenharmony_ci#include "fabrics.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cistruct nvme_tcp_queue;
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_ci/* Define the socket priority to use for connections were it is desirable
2562306a36Sopenharmony_ci * that the NIC consider performing optimized packet processing or filtering.
2662306a36Sopenharmony_ci * A non-zero value being sufficient to indicate general consideration of any
2762306a36Sopenharmony_ci * possible optimization.  Making it a module param allows for alternative
2862306a36Sopenharmony_ci * values that may be unique for some NIC implementations.
2962306a36Sopenharmony_ci */
3062306a36Sopenharmony_cistatic int so_priority;
3162306a36Sopenharmony_cimodule_param(so_priority, int, 0644);
3262306a36Sopenharmony_ciMODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_LOCK_ALLOC
3562306a36Sopenharmony_ci/* lockdep can detect a circular dependency of the form
3662306a36Sopenharmony_ci *   sk_lock -> mmap_lock (page fault) -> fs locks -> sk_lock
3762306a36Sopenharmony_ci * because dependencies are tracked for both nvme-tcp and user contexts. Using
3862306a36Sopenharmony_ci * a separate class prevents lockdep from conflating nvme-tcp socket use with
3962306a36Sopenharmony_ci * user-space socket API use.
4062306a36Sopenharmony_ci */
4162306a36Sopenharmony_cistatic struct lock_class_key nvme_tcp_sk_key[2];
4262306a36Sopenharmony_cistatic struct lock_class_key nvme_tcp_slock_key[2];
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistatic void nvme_tcp_reclassify_socket(struct socket *sock)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	struct sock *sk = sock->sk;
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci	if (WARN_ON_ONCE(!sock_allow_reclassification(sk)))
4962306a36Sopenharmony_ci		return;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	switch (sk->sk_family) {
5262306a36Sopenharmony_ci	case AF_INET:
5362306a36Sopenharmony_ci		sock_lock_init_class_and_name(sk, "slock-AF_INET-NVME",
5462306a36Sopenharmony_ci					      &nvme_tcp_slock_key[0],
5562306a36Sopenharmony_ci					      "sk_lock-AF_INET-NVME",
5662306a36Sopenharmony_ci					      &nvme_tcp_sk_key[0]);
5762306a36Sopenharmony_ci		break;
5862306a36Sopenharmony_ci	case AF_INET6:
5962306a36Sopenharmony_ci		sock_lock_init_class_and_name(sk, "slock-AF_INET6-NVME",
6062306a36Sopenharmony_ci					      &nvme_tcp_slock_key[1],
6162306a36Sopenharmony_ci					      "sk_lock-AF_INET6-NVME",
6262306a36Sopenharmony_ci					      &nvme_tcp_sk_key[1]);
6362306a36Sopenharmony_ci		break;
6462306a36Sopenharmony_ci	default:
6562306a36Sopenharmony_ci		WARN_ON_ONCE(1);
6662306a36Sopenharmony_ci	}
6762306a36Sopenharmony_ci}
6862306a36Sopenharmony_ci#else
6962306a36Sopenharmony_cistatic void nvme_tcp_reclassify_socket(struct socket *sock) { }
7062306a36Sopenharmony_ci#endif
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_cienum nvme_tcp_send_state {
7362306a36Sopenharmony_ci	NVME_TCP_SEND_CMD_PDU = 0,
7462306a36Sopenharmony_ci	NVME_TCP_SEND_H2C_PDU,
7562306a36Sopenharmony_ci	NVME_TCP_SEND_DATA,
7662306a36Sopenharmony_ci	NVME_TCP_SEND_DDGST,
7762306a36Sopenharmony_ci};
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_cistruct nvme_tcp_request {
8062306a36Sopenharmony_ci	struct nvme_request	req;
8162306a36Sopenharmony_ci	void			*pdu;
8262306a36Sopenharmony_ci	struct nvme_tcp_queue	*queue;
8362306a36Sopenharmony_ci	u32			data_len;
8462306a36Sopenharmony_ci	u32			pdu_len;
8562306a36Sopenharmony_ci	u32			pdu_sent;
8662306a36Sopenharmony_ci	u32			h2cdata_left;
8762306a36Sopenharmony_ci	u32			h2cdata_offset;
8862306a36Sopenharmony_ci	u16			ttag;
8962306a36Sopenharmony_ci	__le16			status;
9062306a36Sopenharmony_ci	struct list_head	entry;
9162306a36Sopenharmony_ci	struct llist_node	lentry;
9262306a36Sopenharmony_ci	__le32			ddgst;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci	struct bio		*curr_bio;
9562306a36Sopenharmony_ci	struct iov_iter		iter;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	/* send state */
9862306a36Sopenharmony_ci	size_t			offset;
9962306a36Sopenharmony_ci	size_t			data_sent;
10062306a36Sopenharmony_ci	enum nvme_tcp_send_state state;
10162306a36Sopenharmony_ci};
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_cienum nvme_tcp_queue_flags {
10462306a36Sopenharmony_ci	NVME_TCP_Q_ALLOCATED	= 0,
10562306a36Sopenharmony_ci	NVME_TCP_Q_LIVE		= 1,
10662306a36Sopenharmony_ci	NVME_TCP_Q_POLLING	= 2,
10762306a36Sopenharmony_ci};
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_cienum nvme_tcp_recv_state {
11062306a36Sopenharmony_ci	NVME_TCP_RECV_PDU = 0,
11162306a36Sopenharmony_ci	NVME_TCP_RECV_DATA,
11262306a36Sopenharmony_ci	NVME_TCP_RECV_DDGST,
11362306a36Sopenharmony_ci};
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_cistruct nvme_tcp_ctrl;
11662306a36Sopenharmony_cistruct nvme_tcp_queue {
11762306a36Sopenharmony_ci	struct socket		*sock;
11862306a36Sopenharmony_ci	struct work_struct	io_work;
11962306a36Sopenharmony_ci	int			io_cpu;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	struct mutex		queue_lock;
12262306a36Sopenharmony_ci	struct mutex		send_mutex;
12362306a36Sopenharmony_ci	struct llist_head	req_list;
12462306a36Sopenharmony_ci	struct list_head	send_list;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	/* recv state */
12762306a36Sopenharmony_ci	void			*pdu;
12862306a36Sopenharmony_ci	int			pdu_remaining;
12962306a36Sopenharmony_ci	int			pdu_offset;
13062306a36Sopenharmony_ci	size_t			data_remaining;
13162306a36Sopenharmony_ci	size_t			ddgst_remaining;
13262306a36Sopenharmony_ci	unsigned int		nr_cqe;
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ci	/* send state */
13562306a36Sopenharmony_ci	struct nvme_tcp_request *request;
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_ci	u32			maxh2cdata;
13862306a36Sopenharmony_ci	size_t			cmnd_capsule_len;
13962306a36Sopenharmony_ci	struct nvme_tcp_ctrl	*ctrl;
14062306a36Sopenharmony_ci	unsigned long		flags;
14162306a36Sopenharmony_ci	bool			rd_enabled;
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	bool			hdr_digest;
14462306a36Sopenharmony_ci	bool			data_digest;
14562306a36Sopenharmony_ci	struct ahash_request	*rcv_hash;
14662306a36Sopenharmony_ci	struct ahash_request	*snd_hash;
14762306a36Sopenharmony_ci	__le32			exp_ddgst;
14862306a36Sopenharmony_ci	__le32			recv_ddgst;
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci	struct page_frag_cache	pf_cache;
15162306a36Sopenharmony_ci
15262306a36Sopenharmony_ci	void (*state_change)(struct sock *);
15362306a36Sopenharmony_ci	void (*data_ready)(struct sock *);
15462306a36Sopenharmony_ci	void (*write_space)(struct sock *);
15562306a36Sopenharmony_ci};
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_cistruct nvme_tcp_ctrl {
15862306a36Sopenharmony_ci	/* read only in the hot path */
15962306a36Sopenharmony_ci	struct nvme_tcp_queue	*queues;
16062306a36Sopenharmony_ci	struct blk_mq_tag_set	tag_set;
16162306a36Sopenharmony_ci
16262306a36Sopenharmony_ci	/* other member variables */
16362306a36Sopenharmony_ci	struct list_head	list;
16462306a36Sopenharmony_ci	struct blk_mq_tag_set	admin_tag_set;
16562306a36Sopenharmony_ci	struct sockaddr_storage addr;
16662306a36Sopenharmony_ci	struct sockaddr_storage src_addr;
16762306a36Sopenharmony_ci	struct nvme_ctrl	ctrl;
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci	struct work_struct	err_work;
17062306a36Sopenharmony_ci	struct delayed_work	connect_work;
17162306a36Sopenharmony_ci	struct nvme_tcp_request async_req;
17262306a36Sopenharmony_ci	u32			io_queues[HCTX_MAX_TYPES];
17362306a36Sopenharmony_ci};
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_cistatic LIST_HEAD(nvme_tcp_ctrl_list);
17662306a36Sopenharmony_cistatic DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
17762306a36Sopenharmony_cistatic struct workqueue_struct *nvme_tcp_wq;
17862306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_tcp_mq_ops;
17962306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_tcp_admin_mq_ops;
18062306a36Sopenharmony_cistatic int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_cistatic inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
18362306a36Sopenharmony_ci{
18462306a36Sopenharmony_ci	return container_of(ctrl, struct nvme_tcp_ctrl, ctrl);
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_cistatic inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue)
18862306a36Sopenharmony_ci{
18962306a36Sopenharmony_ci	return queue - queue->ctrl->queues;
19062306a36Sopenharmony_ci}
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_cistatic inline struct blk_mq_tags *nvme_tcp_tagset(struct nvme_tcp_queue *queue)
19362306a36Sopenharmony_ci{
19462306a36Sopenharmony_ci	u32 queue_idx = nvme_tcp_queue_id(queue);
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	if (queue_idx == 0)
19762306a36Sopenharmony_ci		return queue->ctrl->admin_tag_set.tags[queue_idx];
19862306a36Sopenharmony_ci	return queue->ctrl->tag_set.tags[queue_idx - 1];
19962306a36Sopenharmony_ci}
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_cistatic inline u8 nvme_tcp_hdgst_len(struct nvme_tcp_queue *queue)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	return queue->hdr_digest ? NVME_TCP_DIGEST_LENGTH : 0;
20462306a36Sopenharmony_ci}
20562306a36Sopenharmony_ci
20662306a36Sopenharmony_cistatic inline u8 nvme_tcp_ddgst_len(struct nvme_tcp_queue *queue)
20762306a36Sopenharmony_ci{
20862306a36Sopenharmony_ci	return queue->data_digest ? NVME_TCP_DIGEST_LENGTH : 0;
20962306a36Sopenharmony_ci}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_cistatic inline void *nvme_tcp_req_cmd_pdu(struct nvme_tcp_request *req)
21262306a36Sopenharmony_ci{
21362306a36Sopenharmony_ci	return req->pdu;
21462306a36Sopenharmony_ci}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_cistatic inline void *nvme_tcp_req_data_pdu(struct nvme_tcp_request *req)
21762306a36Sopenharmony_ci{
21862306a36Sopenharmony_ci	/* use the pdu space in the back for the data pdu */
21962306a36Sopenharmony_ci	return req->pdu + sizeof(struct nvme_tcp_cmd_pdu) -
22062306a36Sopenharmony_ci		sizeof(struct nvme_tcp_data_pdu);
22162306a36Sopenharmony_ci}
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_cistatic inline size_t nvme_tcp_inline_data_size(struct nvme_tcp_request *req)
22462306a36Sopenharmony_ci{
22562306a36Sopenharmony_ci	if (nvme_is_fabrics(req->req.cmd))
22662306a36Sopenharmony_ci		return NVME_TCP_ADMIN_CCSZ;
22762306a36Sopenharmony_ci	return req->queue->cmnd_capsule_len - sizeof(struct nvme_command);
22862306a36Sopenharmony_ci}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_cistatic inline bool nvme_tcp_async_req(struct nvme_tcp_request *req)
23162306a36Sopenharmony_ci{
23262306a36Sopenharmony_ci	return req == &req->queue->ctrl->async_req;
23362306a36Sopenharmony_ci}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_cistatic inline bool nvme_tcp_has_inline_data(struct nvme_tcp_request *req)
23662306a36Sopenharmony_ci{
23762306a36Sopenharmony_ci	struct request *rq;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	if (unlikely(nvme_tcp_async_req(req)))
24062306a36Sopenharmony_ci		return false; /* async events don't have a request */
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	rq = blk_mq_rq_from_pdu(req);
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	return rq_data_dir(rq) == WRITE && req->data_len &&
24562306a36Sopenharmony_ci		req->data_len <= nvme_tcp_inline_data_size(req);
24662306a36Sopenharmony_ci}
24762306a36Sopenharmony_ci
24862306a36Sopenharmony_cistatic inline struct page *nvme_tcp_req_cur_page(struct nvme_tcp_request *req)
24962306a36Sopenharmony_ci{
25062306a36Sopenharmony_ci	return req->iter.bvec->bv_page;
25162306a36Sopenharmony_ci}
25262306a36Sopenharmony_ci
25362306a36Sopenharmony_cistatic inline size_t nvme_tcp_req_cur_offset(struct nvme_tcp_request *req)
25462306a36Sopenharmony_ci{
25562306a36Sopenharmony_ci	return req->iter.bvec->bv_offset + req->iter.iov_offset;
25662306a36Sopenharmony_ci}
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_cistatic inline size_t nvme_tcp_req_cur_length(struct nvme_tcp_request *req)
25962306a36Sopenharmony_ci{
26062306a36Sopenharmony_ci	return min_t(size_t, iov_iter_single_seg_count(&req->iter),
26162306a36Sopenharmony_ci			req->pdu_len - req->pdu_sent);
26262306a36Sopenharmony_ci}
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_cistatic inline size_t nvme_tcp_pdu_data_left(struct nvme_tcp_request *req)
26562306a36Sopenharmony_ci{
26662306a36Sopenharmony_ci	return rq_data_dir(blk_mq_rq_from_pdu(req)) == WRITE ?
26762306a36Sopenharmony_ci			req->pdu_len - req->pdu_sent : 0;
26862306a36Sopenharmony_ci}
26962306a36Sopenharmony_ci
27062306a36Sopenharmony_cistatic inline size_t nvme_tcp_pdu_last_send(struct nvme_tcp_request *req,
27162306a36Sopenharmony_ci		int len)
27262306a36Sopenharmony_ci{
27362306a36Sopenharmony_ci	return nvme_tcp_pdu_data_left(req) <= len;
27462306a36Sopenharmony_ci}
27562306a36Sopenharmony_ci
27662306a36Sopenharmony_cistatic void nvme_tcp_init_iter(struct nvme_tcp_request *req,
27762306a36Sopenharmony_ci		unsigned int dir)
27862306a36Sopenharmony_ci{
27962306a36Sopenharmony_ci	struct request *rq = blk_mq_rq_from_pdu(req);
28062306a36Sopenharmony_ci	struct bio_vec *vec;
28162306a36Sopenharmony_ci	unsigned int size;
28262306a36Sopenharmony_ci	int nr_bvec;
28362306a36Sopenharmony_ci	size_t offset;
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) {
28662306a36Sopenharmony_ci		vec = &rq->special_vec;
28762306a36Sopenharmony_ci		nr_bvec = 1;
28862306a36Sopenharmony_ci		size = blk_rq_payload_bytes(rq);
28962306a36Sopenharmony_ci		offset = 0;
29062306a36Sopenharmony_ci	} else {
29162306a36Sopenharmony_ci		struct bio *bio = req->curr_bio;
29262306a36Sopenharmony_ci		struct bvec_iter bi;
29362306a36Sopenharmony_ci		struct bio_vec bv;
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci		vec = __bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
29662306a36Sopenharmony_ci		nr_bvec = 0;
29762306a36Sopenharmony_ci		bio_for_each_bvec(bv, bio, bi) {
29862306a36Sopenharmony_ci			nr_bvec++;
29962306a36Sopenharmony_ci		}
30062306a36Sopenharmony_ci		size = bio->bi_iter.bi_size;
30162306a36Sopenharmony_ci		offset = bio->bi_iter.bi_bvec_done;
30262306a36Sopenharmony_ci	}
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	iov_iter_bvec(&req->iter, dir, vec, nr_bvec, size);
30562306a36Sopenharmony_ci	req->iter.iov_offset = offset;
30662306a36Sopenharmony_ci}
30762306a36Sopenharmony_ci
30862306a36Sopenharmony_cistatic inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
30962306a36Sopenharmony_ci		int len)
31062306a36Sopenharmony_ci{
31162306a36Sopenharmony_ci	req->data_sent += len;
31262306a36Sopenharmony_ci	req->pdu_sent += len;
31362306a36Sopenharmony_ci	iov_iter_advance(&req->iter, len);
31462306a36Sopenharmony_ci	if (!iov_iter_count(&req->iter) &&
31562306a36Sopenharmony_ci	    req->data_sent < req->data_len) {
31662306a36Sopenharmony_ci		req->curr_bio = req->curr_bio->bi_next;
31762306a36Sopenharmony_ci		nvme_tcp_init_iter(req, ITER_SOURCE);
31862306a36Sopenharmony_ci	}
31962306a36Sopenharmony_ci}
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_cistatic inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
32262306a36Sopenharmony_ci{
32362306a36Sopenharmony_ci	int ret;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	/* drain the send queue as much as we can... */
32662306a36Sopenharmony_ci	do {
32762306a36Sopenharmony_ci		ret = nvme_tcp_try_send(queue);
32862306a36Sopenharmony_ci	} while (ret > 0);
32962306a36Sopenharmony_ci}
33062306a36Sopenharmony_ci
33162306a36Sopenharmony_cistatic inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
33262306a36Sopenharmony_ci{
33362306a36Sopenharmony_ci	return !list_empty(&queue->send_list) ||
33462306a36Sopenharmony_ci		!llist_empty(&queue->req_list);
33562306a36Sopenharmony_ci}
33662306a36Sopenharmony_ci
33762306a36Sopenharmony_cistatic inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
33862306a36Sopenharmony_ci		bool sync, bool last)
33962306a36Sopenharmony_ci{
34062306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = req->queue;
34162306a36Sopenharmony_ci	bool empty;
34262306a36Sopenharmony_ci
34362306a36Sopenharmony_ci	empty = llist_add(&req->lentry, &queue->req_list) &&
34462306a36Sopenharmony_ci		list_empty(&queue->send_list) && !queue->request;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	/*
34762306a36Sopenharmony_ci	 * if we're the first on the send_list and we can try to send
34862306a36Sopenharmony_ci	 * directly, otherwise queue io_work. Also, only do that if we
34962306a36Sopenharmony_ci	 * are on the same cpu, so we don't introduce contention.
35062306a36Sopenharmony_ci	 */
35162306a36Sopenharmony_ci	if (queue->io_cpu == raw_smp_processor_id() &&
35262306a36Sopenharmony_ci	    sync && empty && mutex_trylock(&queue->send_mutex)) {
35362306a36Sopenharmony_ci		nvme_tcp_send_all(queue);
35462306a36Sopenharmony_ci		mutex_unlock(&queue->send_mutex);
35562306a36Sopenharmony_ci	}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	if (last && nvme_tcp_queue_more(queue))
35862306a36Sopenharmony_ci		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
35962306a36Sopenharmony_ci}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_cistatic void nvme_tcp_process_req_list(struct nvme_tcp_queue *queue)
36262306a36Sopenharmony_ci{
36362306a36Sopenharmony_ci	struct nvme_tcp_request *req;
36462306a36Sopenharmony_ci	struct llist_node *node;
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	for (node = llist_del_all(&queue->req_list); node; node = node->next) {
36762306a36Sopenharmony_ci		req = llist_entry(node, struct nvme_tcp_request, lentry);
36862306a36Sopenharmony_ci		list_add(&req->entry, &queue->send_list);
36962306a36Sopenharmony_ci	}
37062306a36Sopenharmony_ci}
37162306a36Sopenharmony_ci
37262306a36Sopenharmony_cistatic inline struct nvme_tcp_request *
37362306a36Sopenharmony_cinvme_tcp_fetch_request(struct nvme_tcp_queue *queue)
37462306a36Sopenharmony_ci{
37562306a36Sopenharmony_ci	struct nvme_tcp_request *req;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	req = list_first_entry_or_null(&queue->send_list,
37862306a36Sopenharmony_ci			struct nvme_tcp_request, entry);
37962306a36Sopenharmony_ci	if (!req) {
38062306a36Sopenharmony_ci		nvme_tcp_process_req_list(queue);
38162306a36Sopenharmony_ci		req = list_first_entry_or_null(&queue->send_list,
38262306a36Sopenharmony_ci				struct nvme_tcp_request, entry);
38362306a36Sopenharmony_ci		if (unlikely(!req))
38462306a36Sopenharmony_ci			return NULL;
38562306a36Sopenharmony_ci	}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	list_del(&req->entry);
38862306a36Sopenharmony_ci	return req;
38962306a36Sopenharmony_ci}
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_cistatic inline void nvme_tcp_ddgst_final(struct ahash_request *hash,
39262306a36Sopenharmony_ci		__le32 *dgst)
39362306a36Sopenharmony_ci{
39462306a36Sopenharmony_ci	ahash_request_set_crypt(hash, NULL, (u8 *)dgst, 0);
39562306a36Sopenharmony_ci	crypto_ahash_final(hash);
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_cistatic inline void nvme_tcp_ddgst_update(struct ahash_request *hash,
39962306a36Sopenharmony_ci		struct page *page, off_t off, size_t len)
40062306a36Sopenharmony_ci{
40162306a36Sopenharmony_ci	struct scatterlist sg;
40262306a36Sopenharmony_ci
40362306a36Sopenharmony_ci	sg_init_table(&sg, 1);
40462306a36Sopenharmony_ci	sg_set_page(&sg, page, len, off);
40562306a36Sopenharmony_ci	ahash_request_set_crypt(hash, &sg, NULL, len);
40662306a36Sopenharmony_ci	crypto_ahash_update(hash);
40762306a36Sopenharmony_ci}
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_cistatic inline void nvme_tcp_hdgst(struct ahash_request *hash,
41062306a36Sopenharmony_ci		void *pdu, size_t len)
41162306a36Sopenharmony_ci{
41262306a36Sopenharmony_ci	struct scatterlist sg;
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci	sg_init_one(&sg, pdu, len);
41562306a36Sopenharmony_ci	ahash_request_set_crypt(hash, &sg, pdu + len, len);
41662306a36Sopenharmony_ci	crypto_ahash_digest(hash);
41762306a36Sopenharmony_ci}
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cistatic int nvme_tcp_verify_hdgst(struct nvme_tcp_queue *queue,
42062306a36Sopenharmony_ci		void *pdu, size_t pdu_len)
42162306a36Sopenharmony_ci{
42262306a36Sopenharmony_ci	struct nvme_tcp_hdr *hdr = pdu;
42362306a36Sopenharmony_ci	__le32 recv_digest;
42462306a36Sopenharmony_ci	__le32 exp_digest;
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	if (unlikely(!(hdr->flags & NVME_TCP_F_HDGST))) {
42762306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
42862306a36Sopenharmony_ci			"queue %d: header digest flag is cleared\n",
42962306a36Sopenharmony_ci			nvme_tcp_queue_id(queue));
43062306a36Sopenharmony_ci		return -EPROTO;
43162306a36Sopenharmony_ci	}
43262306a36Sopenharmony_ci
43362306a36Sopenharmony_ci	recv_digest = *(__le32 *)(pdu + hdr->hlen);
43462306a36Sopenharmony_ci	nvme_tcp_hdgst(queue->rcv_hash, pdu, pdu_len);
43562306a36Sopenharmony_ci	exp_digest = *(__le32 *)(pdu + hdr->hlen);
43662306a36Sopenharmony_ci	if (recv_digest != exp_digest) {
43762306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
43862306a36Sopenharmony_ci			"header digest error: recv %#x expected %#x\n",
43962306a36Sopenharmony_ci			le32_to_cpu(recv_digest), le32_to_cpu(exp_digest));
44062306a36Sopenharmony_ci		return -EIO;
44162306a36Sopenharmony_ci	}
44262306a36Sopenharmony_ci
44362306a36Sopenharmony_ci	return 0;
44462306a36Sopenharmony_ci}
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_cistatic int nvme_tcp_check_ddgst(struct nvme_tcp_queue *queue, void *pdu)
44762306a36Sopenharmony_ci{
44862306a36Sopenharmony_ci	struct nvme_tcp_hdr *hdr = pdu;
44962306a36Sopenharmony_ci	u8 digest_len = nvme_tcp_hdgst_len(queue);
45062306a36Sopenharmony_ci	u32 len;
45162306a36Sopenharmony_ci
45262306a36Sopenharmony_ci	len = le32_to_cpu(hdr->plen) - hdr->hlen -
45362306a36Sopenharmony_ci		((hdr->flags & NVME_TCP_F_HDGST) ? digest_len : 0);
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	if (unlikely(len && !(hdr->flags & NVME_TCP_F_DDGST))) {
45662306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
45762306a36Sopenharmony_ci			"queue %d: data digest flag is cleared\n",
45862306a36Sopenharmony_ci		nvme_tcp_queue_id(queue));
45962306a36Sopenharmony_ci		return -EPROTO;
46062306a36Sopenharmony_ci	}
46162306a36Sopenharmony_ci	crypto_ahash_init(queue->rcv_hash);
46262306a36Sopenharmony_ci
46362306a36Sopenharmony_ci	return 0;
46462306a36Sopenharmony_ci}
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_cistatic void nvme_tcp_exit_request(struct blk_mq_tag_set *set,
46762306a36Sopenharmony_ci		struct request *rq, unsigned int hctx_idx)
46862306a36Sopenharmony_ci{
46962306a36Sopenharmony_ci	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	page_frag_free(req->pdu);
47262306a36Sopenharmony_ci}
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_cistatic int nvme_tcp_init_request(struct blk_mq_tag_set *set,
47562306a36Sopenharmony_ci		struct request *rq, unsigned int hctx_idx,
47662306a36Sopenharmony_ci		unsigned int numa_node)
47762306a36Sopenharmony_ci{
47862306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
47962306a36Sopenharmony_ci	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
48062306a36Sopenharmony_ci	struct nvme_tcp_cmd_pdu *pdu;
48162306a36Sopenharmony_ci	int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
48262306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[queue_idx];
48362306a36Sopenharmony_ci	u8 hdgst = nvme_tcp_hdgst_len(queue);
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	req->pdu = page_frag_alloc(&queue->pf_cache,
48662306a36Sopenharmony_ci		sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
48762306a36Sopenharmony_ci		GFP_KERNEL | __GFP_ZERO);
48862306a36Sopenharmony_ci	if (!req->pdu)
48962306a36Sopenharmony_ci		return -ENOMEM;
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	pdu = req->pdu;
49262306a36Sopenharmony_ci	req->queue = queue;
49362306a36Sopenharmony_ci	nvme_req(rq)->ctrl = &ctrl->ctrl;
49462306a36Sopenharmony_ci	nvme_req(rq)->cmd = &pdu->cmd;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	return 0;
49762306a36Sopenharmony_ci}
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_cistatic int nvme_tcp_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
50062306a36Sopenharmony_ci		unsigned int hctx_idx)
50162306a36Sopenharmony_ci{
50262306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data);
50362306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[hctx_idx + 1];
50462306a36Sopenharmony_ci
50562306a36Sopenharmony_ci	hctx->driver_data = queue;
50662306a36Sopenharmony_ci	return 0;
50762306a36Sopenharmony_ci}
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_cistatic int nvme_tcp_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
51062306a36Sopenharmony_ci		unsigned int hctx_idx)
51162306a36Sopenharmony_ci{
51262306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(data);
51362306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[0];
51462306a36Sopenharmony_ci
51562306a36Sopenharmony_ci	hctx->driver_data = queue;
51662306a36Sopenharmony_ci	return 0;
51762306a36Sopenharmony_ci}
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_cistatic enum nvme_tcp_recv_state
52062306a36Sopenharmony_cinvme_tcp_recv_state(struct nvme_tcp_queue *queue)
52162306a36Sopenharmony_ci{
52262306a36Sopenharmony_ci	return  (queue->pdu_remaining) ? NVME_TCP_RECV_PDU :
52362306a36Sopenharmony_ci		(queue->ddgst_remaining) ? NVME_TCP_RECV_DDGST :
52462306a36Sopenharmony_ci		NVME_TCP_RECV_DATA;
52562306a36Sopenharmony_ci}
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_cistatic void nvme_tcp_init_recv_ctx(struct nvme_tcp_queue *queue)
52862306a36Sopenharmony_ci{
52962306a36Sopenharmony_ci	queue->pdu_remaining = sizeof(struct nvme_tcp_rsp_pdu) +
53062306a36Sopenharmony_ci				nvme_tcp_hdgst_len(queue);
53162306a36Sopenharmony_ci	queue->pdu_offset = 0;
53262306a36Sopenharmony_ci	queue->data_remaining = -1;
53362306a36Sopenharmony_ci	queue->ddgst_remaining = 0;
53462306a36Sopenharmony_ci}
53562306a36Sopenharmony_ci
53662306a36Sopenharmony_cistatic void nvme_tcp_error_recovery(struct nvme_ctrl *ctrl)
53762306a36Sopenharmony_ci{
53862306a36Sopenharmony_ci	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
53962306a36Sopenharmony_ci		return;
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	dev_warn(ctrl->device, "starting error recovery\n");
54262306a36Sopenharmony_ci	queue_work(nvme_reset_wq, &to_tcp_ctrl(ctrl)->err_work);
54362306a36Sopenharmony_ci}
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_cistatic int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
54662306a36Sopenharmony_ci		struct nvme_completion *cqe)
54762306a36Sopenharmony_ci{
54862306a36Sopenharmony_ci	struct nvme_tcp_request *req;
54962306a36Sopenharmony_ci	struct request *rq;
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci	rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
55262306a36Sopenharmony_ci	if (!rq) {
55362306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
55462306a36Sopenharmony_ci			"got bad cqe.command_id %#x on queue %d\n",
55562306a36Sopenharmony_ci			cqe->command_id, nvme_tcp_queue_id(queue));
55662306a36Sopenharmony_ci		nvme_tcp_error_recovery(&queue->ctrl->ctrl);
55762306a36Sopenharmony_ci		return -EINVAL;
55862306a36Sopenharmony_ci	}
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	req = blk_mq_rq_to_pdu(rq);
56162306a36Sopenharmony_ci	if (req->status == cpu_to_le16(NVME_SC_SUCCESS))
56262306a36Sopenharmony_ci		req->status = cqe->status;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	if (!nvme_try_complete_req(rq, req->status, cqe->result))
56562306a36Sopenharmony_ci		nvme_complete_rq(rq);
56662306a36Sopenharmony_ci	queue->nr_cqe++;
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	return 0;
56962306a36Sopenharmony_ci}
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_cistatic int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
57262306a36Sopenharmony_ci		struct nvme_tcp_data_pdu *pdu)
57362306a36Sopenharmony_ci{
57462306a36Sopenharmony_ci	struct request *rq;
57562306a36Sopenharmony_ci
57662306a36Sopenharmony_ci	rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
57762306a36Sopenharmony_ci	if (!rq) {
57862306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
57962306a36Sopenharmony_ci			"got bad c2hdata.command_id %#x on queue %d\n",
58062306a36Sopenharmony_ci			pdu->command_id, nvme_tcp_queue_id(queue));
58162306a36Sopenharmony_ci		return -ENOENT;
58262306a36Sopenharmony_ci	}
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	if (!blk_rq_payload_bytes(rq)) {
58562306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
58662306a36Sopenharmony_ci			"queue %d tag %#x unexpected data\n",
58762306a36Sopenharmony_ci			nvme_tcp_queue_id(queue), rq->tag);
58862306a36Sopenharmony_ci		return -EIO;
58962306a36Sopenharmony_ci	}
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	queue->data_remaining = le32_to_cpu(pdu->data_length);
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS &&
59462306a36Sopenharmony_ci	    unlikely(!(pdu->hdr.flags & NVME_TCP_F_DATA_LAST))) {
59562306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
59662306a36Sopenharmony_ci			"queue %d tag %#x SUCCESS set but not last PDU\n",
59762306a36Sopenharmony_ci			nvme_tcp_queue_id(queue), rq->tag);
59862306a36Sopenharmony_ci		nvme_tcp_error_recovery(&queue->ctrl->ctrl);
59962306a36Sopenharmony_ci		return -EPROTO;
60062306a36Sopenharmony_ci	}
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	return 0;
60362306a36Sopenharmony_ci}
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_cistatic int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
60662306a36Sopenharmony_ci		struct nvme_tcp_rsp_pdu *pdu)
60762306a36Sopenharmony_ci{
60862306a36Sopenharmony_ci	struct nvme_completion *cqe = &pdu->cqe;
60962306a36Sopenharmony_ci	int ret = 0;
61062306a36Sopenharmony_ci
61162306a36Sopenharmony_ci	/*
61262306a36Sopenharmony_ci	 * AEN requests are special as they don't time out and can
61362306a36Sopenharmony_ci	 * survive any kind of queue freeze and often don't respond to
61462306a36Sopenharmony_ci	 * aborts.  We don't even bother to allocate a struct request
61562306a36Sopenharmony_ci	 * for them but rather special case them here.
61662306a36Sopenharmony_ci	 */
61762306a36Sopenharmony_ci	if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue),
61862306a36Sopenharmony_ci				     cqe->command_id)))
61962306a36Sopenharmony_ci		nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status,
62062306a36Sopenharmony_ci				&cqe->result);
62162306a36Sopenharmony_ci	else
62262306a36Sopenharmony_ci		ret = nvme_tcp_process_nvme_cqe(queue, cqe);
62362306a36Sopenharmony_ci
62462306a36Sopenharmony_ci	return ret;
62562306a36Sopenharmony_ci}
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_cistatic void nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req)
62862306a36Sopenharmony_ci{
62962306a36Sopenharmony_ci	struct nvme_tcp_data_pdu *data = nvme_tcp_req_data_pdu(req);
63062306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = req->queue;
63162306a36Sopenharmony_ci	struct request *rq = blk_mq_rq_from_pdu(req);
63262306a36Sopenharmony_ci	u32 h2cdata_sent = req->pdu_len;
63362306a36Sopenharmony_ci	u8 hdgst = nvme_tcp_hdgst_len(queue);
63462306a36Sopenharmony_ci	u8 ddgst = nvme_tcp_ddgst_len(queue);
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	req->state = NVME_TCP_SEND_H2C_PDU;
63762306a36Sopenharmony_ci	req->offset = 0;
63862306a36Sopenharmony_ci	req->pdu_len = min(req->h2cdata_left, queue->maxh2cdata);
63962306a36Sopenharmony_ci	req->pdu_sent = 0;
64062306a36Sopenharmony_ci	req->h2cdata_left -= req->pdu_len;
64162306a36Sopenharmony_ci	req->h2cdata_offset += h2cdata_sent;
64262306a36Sopenharmony_ci
64362306a36Sopenharmony_ci	memset(data, 0, sizeof(*data));
64462306a36Sopenharmony_ci	data->hdr.type = nvme_tcp_h2c_data;
64562306a36Sopenharmony_ci	if (!req->h2cdata_left)
64662306a36Sopenharmony_ci		data->hdr.flags = NVME_TCP_F_DATA_LAST;
64762306a36Sopenharmony_ci	if (queue->hdr_digest)
64862306a36Sopenharmony_ci		data->hdr.flags |= NVME_TCP_F_HDGST;
64962306a36Sopenharmony_ci	if (queue->data_digest)
65062306a36Sopenharmony_ci		data->hdr.flags |= NVME_TCP_F_DDGST;
65162306a36Sopenharmony_ci	data->hdr.hlen = sizeof(*data);
65262306a36Sopenharmony_ci	data->hdr.pdo = data->hdr.hlen + hdgst;
65362306a36Sopenharmony_ci	data->hdr.plen =
65462306a36Sopenharmony_ci		cpu_to_le32(data->hdr.hlen + hdgst + req->pdu_len + ddgst);
65562306a36Sopenharmony_ci	data->ttag = req->ttag;
65662306a36Sopenharmony_ci	data->command_id = nvme_cid(rq);
65762306a36Sopenharmony_ci	data->data_offset = cpu_to_le32(req->h2cdata_offset);
65862306a36Sopenharmony_ci	data->data_length = cpu_to_le32(req->pdu_len);
65962306a36Sopenharmony_ci}
66062306a36Sopenharmony_ci
66162306a36Sopenharmony_cistatic int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
66262306a36Sopenharmony_ci		struct nvme_tcp_r2t_pdu *pdu)
66362306a36Sopenharmony_ci{
66462306a36Sopenharmony_ci	struct nvme_tcp_request *req;
66562306a36Sopenharmony_ci	struct request *rq;
66662306a36Sopenharmony_ci	u32 r2t_length = le32_to_cpu(pdu->r2t_length);
66762306a36Sopenharmony_ci	u32 r2t_offset = le32_to_cpu(pdu->r2t_offset);
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ci	rq = nvme_find_rq(nvme_tcp_tagset(queue), pdu->command_id);
67062306a36Sopenharmony_ci	if (!rq) {
67162306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
67262306a36Sopenharmony_ci			"got bad r2t.command_id %#x on queue %d\n",
67362306a36Sopenharmony_ci			pdu->command_id, nvme_tcp_queue_id(queue));
67462306a36Sopenharmony_ci		return -ENOENT;
67562306a36Sopenharmony_ci	}
67662306a36Sopenharmony_ci	req = blk_mq_rq_to_pdu(rq);
67762306a36Sopenharmony_ci
67862306a36Sopenharmony_ci	if (unlikely(!r2t_length)) {
67962306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
68062306a36Sopenharmony_ci			"req %d r2t len is %u, probably a bug...\n",
68162306a36Sopenharmony_ci			rq->tag, r2t_length);
68262306a36Sopenharmony_ci		return -EPROTO;
68362306a36Sopenharmony_ci	}
68462306a36Sopenharmony_ci
68562306a36Sopenharmony_ci	if (unlikely(req->data_sent + r2t_length > req->data_len)) {
68662306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
68762306a36Sopenharmony_ci			"req %d r2t len %u exceeded data len %u (%zu sent)\n",
68862306a36Sopenharmony_ci			rq->tag, r2t_length, req->data_len, req->data_sent);
68962306a36Sopenharmony_ci		return -EPROTO;
69062306a36Sopenharmony_ci	}
69162306a36Sopenharmony_ci
69262306a36Sopenharmony_ci	if (unlikely(r2t_offset < req->data_sent)) {
69362306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
69462306a36Sopenharmony_ci			"req %d unexpected r2t offset %u (expected %zu)\n",
69562306a36Sopenharmony_ci			rq->tag, r2t_offset, req->data_sent);
69662306a36Sopenharmony_ci		return -EPROTO;
69762306a36Sopenharmony_ci	}
69862306a36Sopenharmony_ci
69962306a36Sopenharmony_ci	req->pdu_len = 0;
70062306a36Sopenharmony_ci	req->h2cdata_left = r2t_length;
70162306a36Sopenharmony_ci	req->h2cdata_offset = r2t_offset;
70262306a36Sopenharmony_ci	req->ttag = pdu->ttag;
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	nvme_tcp_setup_h2c_data_pdu(req);
70562306a36Sopenharmony_ci	nvme_tcp_queue_request(req, false, true);
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	return 0;
70862306a36Sopenharmony_ci}
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_cistatic int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb,
71162306a36Sopenharmony_ci		unsigned int *offset, size_t *len)
71262306a36Sopenharmony_ci{
71362306a36Sopenharmony_ci	struct nvme_tcp_hdr *hdr;
71462306a36Sopenharmony_ci	char *pdu = queue->pdu;
71562306a36Sopenharmony_ci	size_t rcv_len = min_t(size_t, *len, queue->pdu_remaining);
71662306a36Sopenharmony_ci	int ret;
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	ret = skb_copy_bits(skb, *offset,
71962306a36Sopenharmony_ci		&pdu[queue->pdu_offset], rcv_len);
72062306a36Sopenharmony_ci	if (unlikely(ret))
72162306a36Sopenharmony_ci		return ret;
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	queue->pdu_remaining -= rcv_len;
72462306a36Sopenharmony_ci	queue->pdu_offset += rcv_len;
72562306a36Sopenharmony_ci	*offset += rcv_len;
72662306a36Sopenharmony_ci	*len -= rcv_len;
72762306a36Sopenharmony_ci	if (queue->pdu_remaining)
72862306a36Sopenharmony_ci		return 0;
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_ci	hdr = queue->pdu;
73162306a36Sopenharmony_ci	if (queue->hdr_digest) {
73262306a36Sopenharmony_ci		ret = nvme_tcp_verify_hdgst(queue, queue->pdu, hdr->hlen);
73362306a36Sopenharmony_ci		if (unlikely(ret))
73462306a36Sopenharmony_ci			return ret;
73562306a36Sopenharmony_ci	}
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	if (queue->data_digest) {
73962306a36Sopenharmony_ci		ret = nvme_tcp_check_ddgst(queue, queue->pdu);
74062306a36Sopenharmony_ci		if (unlikely(ret))
74162306a36Sopenharmony_ci			return ret;
74262306a36Sopenharmony_ci	}
74362306a36Sopenharmony_ci
74462306a36Sopenharmony_ci	switch (hdr->type) {
74562306a36Sopenharmony_ci	case nvme_tcp_c2h_data:
74662306a36Sopenharmony_ci		return nvme_tcp_handle_c2h_data(queue, (void *)queue->pdu);
74762306a36Sopenharmony_ci	case nvme_tcp_rsp:
74862306a36Sopenharmony_ci		nvme_tcp_init_recv_ctx(queue);
74962306a36Sopenharmony_ci		return nvme_tcp_handle_comp(queue, (void *)queue->pdu);
75062306a36Sopenharmony_ci	case nvme_tcp_r2t:
75162306a36Sopenharmony_ci		nvme_tcp_init_recv_ctx(queue);
75262306a36Sopenharmony_ci		return nvme_tcp_handle_r2t(queue, (void *)queue->pdu);
75362306a36Sopenharmony_ci	default:
75462306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
75562306a36Sopenharmony_ci			"unsupported pdu type (%d)\n", hdr->type);
75662306a36Sopenharmony_ci		return -EINVAL;
75762306a36Sopenharmony_ci	}
75862306a36Sopenharmony_ci}
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_cistatic inline void nvme_tcp_end_request(struct request *rq, u16 status)
76162306a36Sopenharmony_ci{
76262306a36Sopenharmony_ci	union nvme_result res = {};
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	if (!nvme_try_complete_req(rq, cpu_to_le16(status << 1), res))
76562306a36Sopenharmony_ci		nvme_complete_rq(rq);
76662306a36Sopenharmony_ci}
76762306a36Sopenharmony_ci
76862306a36Sopenharmony_cistatic int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
76962306a36Sopenharmony_ci			      unsigned int *offset, size_t *len)
77062306a36Sopenharmony_ci{
77162306a36Sopenharmony_ci	struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
77262306a36Sopenharmony_ci	struct request *rq =
77362306a36Sopenharmony_ci		nvme_cid_to_rq(nvme_tcp_tagset(queue), pdu->command_id);
77462306a36Sopenharmony_ci	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	while (true) {
77762306a36Sopenharmony_ci		int recv_len, ret;
77862306a36Sopenharmony_ci
77962306a36Sopenharmony_ci		recv_len = min_t(size_t, *len, queue->data_remaining);
78062306a36Sopenharmony_ci		if (!recv_len)
78162306a36Sopenharmony_ci			break;
78262306a36Sopenharmony_ci
78362306a36Sopenharmony_ci		if (!iov_iter_count(&req->iter)) {
78462306a36Sopenharmony_ci			req->curr_bio = req->curr_bio->bi_next;
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci			/*
78762306a36Sopenharmony_ci			 * If we don`t have any bios it means that controller
78862306a36Sopenharmony_ci			 * sent more data than we requested, hence error
78962306a36Sopenharmony_ci			 */
79062306a36Sopenharmony_ci			if (!req->curr_bio) {
79162306a36Sopenharmony_ci				dev_err(queue->ctrl->ctrl.device,
79262306a36Sopenharmony_ci					"queue %d no space in request %#x",
79362306a36Sopenharmony_ci					nvme_tcp_queue_id(queue), rq->tag);
79462306a36Sopenharmony_ci				nvme_tcp_init_recv_ctx(queue);
79562306a36Sopenharmony_ci				return -EIO;
79662306a36Sopenharmony_ci			}
79762306a36Sopenharmony_ci			nvme_tcp_init_iter(req, ITER_DEST);
79862306a36Sopenharmony_ci		}
79962306a36Sopenharmony_ci
80062306a36Sopenharmony_ci		/* we can read only from what is left in this bio */
80162306a36Sopenharmony_ci		recv_len = min_t(size_t, recv_len,
80262306a36Sopenharmony_ci				iov_iter_count(&req->iter));
80362306a36Sopenharmony_ci
80462306a36Sopenharmony_ci		if (queue->data_digest)
80562306a36Sopenharmony_ci			ret = skb_copy_and_hash_datagram_iter(skb, *offset,
80662306a36Sopenharmony_ci				&req->iter, recv_len, queue->rcv_hash);
80762306a36Sopenharmony_ci		else
80862306a36Sopenharmony_ci			ret = skb_copy_datagram_iter(skb, *offset,
80962306a36Sopenharmony_ci					&req->iter, recv_len);
81062306a36Sopenharmony_ci		if (ret) {
81162306a36Sopenharmony_ci			dev_err(queue->ctrl->ctrl.device,
81262306a36Sopenharmony_ci				"queue %d failed to copy request %#x data",
81362306a36Sopenharmony_ci				nvme_tcp_queue_id(queue), rq->tag);
81462306a36Sopenharmony_ci			return ret;
81562306a36Sopenharmony_ci		}
81662306a36Sopenharmony_ci
81762306a36Sopenharmony_ci		*len -= recv_len;
81862306a36Sopenharmony_ci		*offset += recv_len;
81962306a36Sopenharmony_ci		queue->data_remaining -= recv_len;
82062306a36Sopenharmony_ci	}
82162306a36Sopenharmony_ci
82262306a36Sopenharmony_ci	if (!queue->data_remaining) {
82362306a36Sopenharmony_ci		if (queue->data_digest) {
82462306a36Sopenharmony_ci			nvme_tcp_ddgst_final(queue->rcv_hash, &queue->exp_ddgst);
82562306a36Sopenharmony_ci			queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
82662306a36Sopenharmony_ci		} else {
82762306a36Sopenharmony_ci			if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
82862306a36Sopenharmony_ci				nvme_tcp_end_request(rq,
82962306a36Sopenharmony_ci						le16_to_cpu(req->status));
83062306a36Sopenharmony_ci				queue->nr_cqe++;
83162306a36Sopenharmony_ci			}
83262306a36Sopenharmony_ci			nvme_tcp_init_recv_ctx(queue);
83362306a36Sopenharmony_ci		}
83462306a36Sopenharmony_ci	}
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	return 0;
83762306a36Sopenharmony_ci}
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_cistatic int nvme_tcp_recv_ddgst(struct nvme_tcp_queue *queue,
84062306a36Sopenharmony_ci		struct sk_buff *skb, unsigned int *offset, size_t *len)
84162306a36Sopenharmony_ci{
84262306a36Sopenharmony_ci	struct nvme_tcp_data_pdu *pdu = (void *)queue->pdu;
84362306a36Sopenharmony_ci	char *ddgst = (char *)&queue->recv_ddgst;
84462306a36Sopenharmony_ci	size_t recv_len = min_t(size_t, *len, queue->ddgst_remaining);
84562306a36Sopenharmony_ci	off_t off = NVME_TCP_DIGEST_LENGTH - queue->ddgst_remaining;
84662306a36Sopenharmony_ci	int ret;
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_ci	ret = skb_copy_bits(skb, *offset, &ddgst[off], recv_len);
84962306a36Sopenharmony_ci	if (unlikely(ret))
85062306a36Sopenharmony_ci		return ret;
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci	queue->ddgst_remaining -= recv_len;
85362306a36Sopenharmony_ci	*offset += recv_len;
85462306a36Sopenharmony_ci	*len -= recv_len;
85562306a36Sopenharmony_ci	if (queue->ddgst_remaining)
85662306a36Sopenharmony_ci		return 0;
85762306a36Sopenharmony_ci
85862306a36Sopenharmony_ci	if (queue->recv_ddgst != queue->exp_ddgst) {
85962306a36Sopenharmony_ci		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
86062306a36Sopenharmony_ci					pdu->command_id);
86162306a36Sopenharmony_ci		struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci		req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR);
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
86662306a36Sopenharmony_ci			"data digest error: recv %#x expected %#x\n",
86762306a36Sopenharmony_ci			le32_to_cpu(queue->recv_ddgst),
86862306a36Sopenharmony_ci			le32_to_cpu(queue->exp_ddgst));
86962306a36Sopenharmony_ci	}
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
87262306a36Sopenharmony_ci		struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
87362306a36Sopenharmony_ci					pdu->command_id);
87462306a36Sopenharmony_ci		struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci		nvme_tcp_end_request(rq, le16_to_cpu(req->status));
87762306a36Sopenharmony_ci		queue->nr_cqe++;
87862306a36Sopenharmony_ci	}
87962306a36Sopenharmony_ci
88062306a36Sopenharmony_ci	nvme_tcp_init_recv_ctx(queue);
88162306a36Sopenharmony_ci	return 0;
88262306a36Sopenharmony_ci}
88362306a36Sopenharmony_ci
88462306a36Sopenharmony_cistatic int nvme_tcp_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
88562306a36Sopenharmony_ci			     unsigned int offset, size_t len)
88662306a36Sopenharmony_ci{
88762306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = desc->arg.data;
88862306a36Sopenharmony_ci	size_t consumed = len;
88962306a36Sopenharmony_ci	int result;
89062306a36Sopenharmony_ci
89162306a36Sopenharmony_ci	if (unlikely(!queue->rd_enabled))
89262306a36Sopenharmony_ci		return -EFAULT;
89362306a36Sopenharmony_ci
89462306a36Sopenharmony_ci	while (len) {
89562306a36Sopenharmony_ci		switch (nvme_tcp_recv_state(queue)) {
89662306a36Sopenharmony_ci		case NVME_TCP_RECV_PDU:
89762306a36Sopenharmony_ci			result = nvme_tcp_recv_pdu(queue, skb, &offset, &len);
89862306a36Sopenharmony_ci			break;
89962306a36Sopenharmony_ci		case NVME_TCP_RECV_DATA:
90062306a36Sopenharmony_ci			result = nvme_tcp_recv_data(queue, skb, &offset, &len);
90162306a36Sopenharmony_ci			break;
90262306a36Sopenharmony_ci		case NVME_TCP_RECV_DDGST:
90362306a36Sopenharmony_ci			result = nvme_tcp_recv_ddgst(queue, skb, &offset, &len);
90462306a36Sopenharmony_ci			break;
90562306a36Sopenharmony_ci		default:
90662306a36Sopenharmony_ci			result = -EFAULT;
90762306a36Sopenharmony_ci		}
90862306a36Sopenharmony_ci		if (result) {
90962306a36Sopenharmony_ci			dev_err(queue->ctrl->ctrl.device,
91062306a36Sopenharmony_ci				"receive failed:  %d\n", result);
91162306a36Sopenharmony_ci			queue->rd_enabled = false;
91262306a36Sopenharmony_ci			nvme_tcp_error_recovery(&queue->ctrl->ctrl);
91362306a36Sopenharmony_ci			return result;
91462306a36Sopenharmony_ci		}
91562306a36Sopenharmony_ci	}
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	return consumed;
91862306a36Sopenharmony_ci}
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_cistatic void nvme_tcp_data_ready(struct sock *sk)
92162306a36Sopenharmony_ci{
92262306a36Sopenharmony_ci	struct nvme_tcp_queue *queue;
92362306a36Sopenharmony_ci
92462306a36Sopenharmony_ci	trace_sk_data_ready(sk);
92562306a36Sopenharmony_ci
92662306a36Sopenharmony_ci	read_lock_bh(&sk->sk_callback_lock);
92762306a36Sopenharmony_ci	queue = sk->sk_user_data;
92862306a36Sopenharmony_ci	if (likely(queue && queue->rd_enabled) &&
92962306a36Sopenharmony_ci	    !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
93062306a36Sopenharmony_ci		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
93162306a36Sopenharmony_ci	read_unlock_bh(&sk->sk_callback_lock);
93262306a36Sopenharmony_ci}
93362306a36Sopenharmony_ci
93462306a36Sopenharmony_cistatic void nvme_tcp_write_space(struct sock *sk)
93562306a36Sopenharmony_ci{
93662306a36Sopenharmony_ci	struct nvme_tcp_queue *queue;
93762306a36Sopenharmony_ci
93862306a36Sopenharmony_ci	read_lock_bh(&sk->sk_callback_lock);
93962306a36Sopenharmony_ci	queue = sk->sk_user_data;
94062306a36Sopenharmony_ci	if (likely(queue && sk_stream_is_writeable(sk))) {
94162306a36Sopenharmony_ci		clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
94262306a36Sopenharmony_ci		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
94362306a36Sopenharmony_ci	}
94462306a36Sopenharmony_ci	read_unlock_bh(&sk->sk_callback_lock);
94562306a36Sopenharmony_ci}
94662306a36Sopenharmony_ci
94762306a36Sopenharmony_cistatic void nvme_tcp_state_change(struct sock *sk)
94862306a36Sopenharmony_ci{
94962306a36Sopenharmony_ci	struct nvme_tcp_queue *queue;
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_ci	read_lock_bh(&sk->sk_callback_lock);
95262306a36Sopenharmony_ci	queue = sk->sk_user_data;
95362306a36Sopenharmony_ci	if (!queue)
95462306a36Sopenharmony_ci		goto done;
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	switch (sk->sk_state) {
95762306a36Sopenharmony_ci	case TCP_CLOSE:
95862306a36Sopenharmony_ci	case TCP_CLOSE_WAIT:
95962306a36Sopenharmony_ci	case TCP_LAST_ACK:
96062306a36Sopenharmony_ci	case TCP_FIN_WAIT1:
96162306a36Sopenharmony_ci	case TCP_FIN_WAIT2:
96262306a36Sopenharmony_ci		nvme_tcp_error_recovery(&queue->ctrl->ctrl);
96362306a36Sopenharmony_ci		break;
96462306a36Sopenharmony_ci	default:
96562306a36Sopenharmony_ci		dev_info(queue->ctrl->ctrl.device,
96662306a36Sopenharmony_ci			"queue %d socket state %d\n",
96762306a36Sopenharmony_ci			nvme_tcp_queue_id(queue), sk->sk_state);
96862306a36Sopenharmony_ci	}
96962306a36Sopenharmony_ci
97062306a36Sopenharmony_ci	queue->state_change(sk);
97162306a36Sopenharmony_cidone:
97262306a36Sopenharmony_ci	read_unlock_bh(&sk->sk_callback_lock);
97362306a36Sopenharmony_ci}
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_cistatic inline void nvme_tcp_done_send_req(struct nvme_tcp_queue *queue)
97662306a36Sopenharmony_ci{
97762306a36Sopenharmony_ci	queue->request = NULL;
97862306a36Sopenharmony_ci}
97962306a36Sopenharmony_ci
98062306a36Sopenharmony_cistatic void nvme_tcp_fail_request(struct nvme_tcp_request *req)
98162306a36Sopenharmony_ci{
98262306a36Sopenharmony_ci	if (nvme_tcp_async_req(req)) {
98362306a36Sopenharmony_ci		union nvme_result res = {};
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_ci		nvme_complete_async_event(&req->queue->ctrl->ctrl,
98662306a36Sopenharmony_ci				cpu_to_le16(NVME_SC_HOST_PATH_ERROR), &res);
98762306a36Sopenharmony_ci	} else {
98862306a36Sopenharmony_ci		nvme_tcp_end_request(blk_mq_rq_from_pdu(req),
98962306a36Sopenharmony_ci				NVME_SC_HOST_PATH_ERROR);
99062306a36Sopenharmony_ci	}
99162306a36Sopenharmony_ci}
99262306a36Sopenharmony_ci
99362306a36Sopenharmony_cistatic int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
99462306a36Sopenharmony_ci{
99562306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = req->queue;
99662306a36Sopenharmony_ci	int req_data_len = req->data_len;
99762306a36Sopenharmony_ci	u32 h2cdata_left = req->h2cdata_left;
99862306a36Sopenharmony_ci
99962306a36Sopenharmony_ci	while (true) {
100062306a36Sopenharmony_ci		struct bio_vec bvec;
100162306a36Sopenharmony_ci		struct msghdr msg = {
100262306a36Sopenharmony_ci			.msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES,
100362306a36Sopenharmony_ci		};
100462306a36Sopenharmony_ci		struct page *page = nvme_tcp_req_cur_page(req);
100562306a36Sopenharmony_ci		size_t offset = nvme_tcp_req_cur_offset(req);
100662306a36Sopenharmony_ci		size_t len = nvme_tcp_req_cur_length(req);
100762306a36Sopenharmony_ci		bool last = nvme_tcp_pdu_last_send(req, len);
100862306a36Sopenharmony_ci		int req_data_sent = req->data_sent;
100962306a36Sopenharmony_ci		int ret;
101062306a36Sopenharmony_ci
101162306a36Sopenharmony_ci		if (last && !queue->data_digest && !nvme_tcp_queue_more(queue))
101262306a36Sopenharmony_ci			msg.msg_flags |= MSG_EOR;
101362306a36Sopenharmony_ci		else
101462306a36Sopenharmony_ci			msg.msg_flags |= MSG_MORE;
101562306a36Sopenharmony_ci
101662306a36Sopenharmony_ci		if (!sendpage_ok(page))
101762306a36Sopenharmony_ci			msg.msg_flags &= ~MSG_SPLICE_PAGES;
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci		bvec_set_page(&bvec, page, len, offset);
102062306a36Sopenharmony_ci		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
102162306a36Sopenharmony_ci		ret = sock_sendmsg(queue->sock, &msg);
102262306a36Sopenharmony_ci		if (ret <= 0)
102362306a36Sopenharmony_ci			return ret;
102462306a36Sopenharmony_ci
102562306a36Sopenharmony_ci		if (queue->data_digest)
102662306a36Sopenharmony_ci			nvme_tcp_ddgst_update(queue->snd_hash, page,
102762306a36Sopenharmony_ci					offset, ret);
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci		/*
103062306a36Sopenharmony_ci		 * update the request iterator except for the last payload send
103162306a36Sopenharmony_ci		 * in the request where we don't want to modify it as we may
103262306a36Sopenharmony_ci		 * compete with the RX path completing the request.
103362306a36Sopenharmony_ci		 */
103462306a36Sopenharmony_ci		if (req_data_sent + ret < req_data_len)
103562306a36Sopenharmony_ci			nvme_tcp_advance_req(req, ret);
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ci		/* fully successful last send in current PDU */
103862306a36Sopenharmony_ci		if (last && ret == len) {
103962306a36Sopenharmony_ci			if (queue->data_digest) {
104062306a36Sopenharmony_ci				nvme_tcp_ddgst_final(queue->snd_hash,
104162306a36Sopenharmony_ci					&req->ddgst);
104262306a36Sopenharmony_ci				req->state = NVME_TCP_SEND_DDGST;
104362306a36Sopenharmony_ci				req->offset = 0;
104462306a36Sopenharmony_ci			} else {
104562306a36Sopenharmony_ci				if (h2cdata_left)
104662306a36Sopenharmony_ci					nvme_tcp_setup_h2c_data_pdu(req);
104762306a36Sopenharmony_ci				else
104862306a36Sopenharmony_ci					nvme_tcp_done_send_req(queue);
104962306a36Sopenharmony_ci			}
105062306a36Sopenharmony_ci			return 1;
105162306a36Sopenharmony_ci		}
105262306a36Sopenharmony_ci	}
105362306a36Sopenharmony_ci	return -EAGAIN;
105462306a36Sopenharmony_ci}
105562306a36Sopenharmony_ci
105662306a36Sopenharmony_cistatic int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
105762306a36Sopenharmony_ci{
105862306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = req->queue;
105962306a36Sopenharmony_ci	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
106062306a36Sopenharmony_ci	struct bio_vec bvec;
106162306a36Sopenharmony_ci	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES, };
106262306a36Sopenharmony_ci	bool inline_data = nvme_tcp_has_inline_data(req);
106362306a36Sopenharmony_ci	u8 hdgst = nvme_tcp_hdgst_len(queue);
106462306a36Sopenharmony_ci	int len = sizeof(*pdu) + hdgst - req->offset;
106562306a36Sopenharmony_ci	int ret;
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci	if (inline_data || nvme_tcp_queue_more(queue))
106862306a36Sopenharmony_ci		msg.msg_flags |= MSG_MORE;
106962306a36Sopenharmony_ci	else
107062306a36Sopenharmony_ci		msg.msg_flags |= MSG_EOR;
107162306a36Sopenharmony_ci
107262306a36Sopenharmony_ci	if (queue->hdr_digest && !req->offset)
107362306a36Sopenharmony_ci		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
107462306a36Sopenharmony_ci
107562306a36Sopenharmony_ci	bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
107662306a36Sopenharmony_ci	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
107762306a36Sopenharmony_ci	ret = sock_sendmsg(queue->sock, &msg);
107862306a36Sopenharmony_ci	if (unlikely(ret <= 0))
107962306a36Sopenharmony_ci		return ret;
108062306a36Sopenharmony_ci
108162306a36Sopenharmony_ci	len -= ret;
108262306a36Sopenharmony_ci	if (!len) {
108362306a36Sopenharmony_ci		if (inline_data) {
108462306a36Sopenharmony_ci			req->state = NVME_TCP_SEND_DATA;
108562306a36Sopenharmony_ci			if (queue->data_digest)
108662306a36Sopenharmony_ci				crypto_ahash_init(queue->snd_hash);
108762306a36Sopenharmony_ci		} else {
108862306a36Sopenharmony_ci			nvme_tcp_done_send_req(queue);
108962306a36Sopenharmony_ci		}
109062306a36Sopenharmony_ci		return 1;
109162306a36Sopenharmony_ci	}
109262306a36Sopenharmony_ci	req->offset += ret;
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci	return -EAGAIN;
109562306a36Sopenharmony_ci}
109662306a36Sopenharmony_ci
109762306a36Sopenharmony_cistatic int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
109862306a36Sopenharmony_ci{
109962306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = req->queue;
110062306a36Sopenharmony_ci	struct nvme_tcp_data_pdu *pdu = nvme_tcp_req_data_pdu(req);
110162306a36Sopenharmony_ci	struct bio_vec bvec;
110262306a36Sopenharmony_ci	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_MORE, };
110362306a36Sopenharmony_ci	u8 hdgst = nvme_tcp_hdgst_len(queue);
110462306a36Sopenharmony_ci	int len = sizeof(*pdu) - req->offset + hdgst;
110562306a36Sopenharmony_ci	int ret;
110662306a36Sopenharmony_ci
110762306a36Sopenharmony_ci	if (queue->hdr_digest && !req->offset)
110862306a36Sopenharmony_ci		nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	if (!req->h2cdata_left)
111162306a36Sopenharmony_ci		msg.msg_flags |= MSG_SPLICE_PAGES;
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_ci	bvec_set_virt(&bvec, (void *)pdu + req->offset, len);
111462306a36Sopenharmony_ci	iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len);
111562306a36Sopenharmony_ci	ret = sock_sendmsg(queue->sock, &msg);
111662306a36Sopenharmony_ci	if (unlikely(ret <= 0))
111762306a36Sopenharmony_ci		return ret;
111862306a36Sopenharmony_ci
111962306a36Sopenharmony_ci	len -= ret;
112062306a36Sopenharmony_ci	if (!len) {
112162306a36Sopenharmony_ci		req->state = NVME_TCP_SEND_DATA;
112262306a36Sopenharmony_ci		if (queue->data_digest)
112362306a36Sopenharmony_ci			crypto_ahash_init(queue->snd_hash);
112462306a36Sopenharmony_ci		return 1;
112562306a36Sopenharmony_ci	}
112662306a36Sopenharmony_ci	req->offset += ret;
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	return -EAGAIN;
112962306a36Sopenharmony_ci}
113062306a36Sopenharmony_ci
113162306a36Sopenharmony_cistatic int nvme_tcp_try_send_ddgst(struct nvme_tcp_request *req)
113262306a36Sopenharmony_ci{
113362306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = req->queue;
113462306a36Sopenharmony_ci	size_t offset = req->offset;
113562306a36Sopenharmony_ci	u32 h2cdata_left = req->h2cdata_left;
113662306a36Sopenharmony_ci	int ret;
113762306a36Sopenharmony_ci	struct msghdr msg = { .msg_flags = MSG_DONTWAIT };
113862306a36Sopenharmony_ci	struct kvec iov = {
113962306a36Sopenharmony_ci		.iov_base = (u8 *)&req->ddgst + req->offset,
114062306a36Sopenharmony_ci		.iov_len = NVME_TCP_DIGEST_LENGTH - req->offset
114162306a36Sopenharmony_ci	};
114262306a36Sopenharmony_ci
114362306a36Sopenharmony_ci	if (nvme_tcp_queue_more(queue))
114462306a36Sopenharmony_ci		msg.msg_flags |= MSG_MORE;
114562306a36Sopenharmony_ci	else
114662306a36Sopenharmony_ci		msg.msg_flags |= MSG_EOR;
114762306a36Sopenharmony_ci
114862306a36Sopenharmony_ci	ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
114962306a36Sopenharmony_ci	if (unlikely(ret <= 0))
115062306a36Sopenharmony_ci		return ret;
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci	if (offset + ret == NVME_TCP_DIGEST_LENGTH) {
115362306a36Sopenharmony_ci		if (h2cdata_left)
115462306a36Sopenharmony_ci			nvme_tcp_setup_h2c_data_pdu(req);
115562306a36Sopenharmony_ci		else
115662306a36Sopenharmony_ci			nvme_tcp_done_send_req(queue);
115762306a36Sopenharmony_ci		return 1;
115862306a36Sopenharmony_ci	}
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci	req->offset += ret;
116162306a36Sopenharmony_ci	return -EAGAIN;
116262306a36Sopenharmony_ci}
116362306a36Sopenharmony_ci
116462306a36Sopenharmony_cistatic int nvme_tcp_try_send(struct nvme_tcp_queue *queue)
116562306a36Sopenharmony_ci{
116662306a36Sopenharmony_ci	struct nvme_tcp_request *req;
116762306a36Sopenharmony_ci	unsigned int noreclaim_flag;
116862306a36Sopenharmony_ci	int ret = 1;
116962306a36Sopenharmony_ci
117062306a36Sopenharmony_ci	if (!queue->request) {
117162306a36Sopenharmony_ci		queue->request = nvme_tcp_fetch_request(queue);
117262306a36Sopenharmony_ci		if (!queue->request)
117362306a36Sopenharmony_ci			return 0;
117462306a36Sopenharmony_ci	}
117562306a36Sopenharmony_ci	req = queue->request;
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci	noreclaim_flag = memalloc_noreclaim_save();
117862306a36Sopenharmony_ci	if (req->state == NVME_TCP_SEND_CMD_PDU) {
117962306a36Sopenharmony_ci		ret = nvme_tcp_try_send_cmd_pdu(req);
118062306a36Sopenharmony_ci		if (ret <= 0)
118162306a36Sopenharmony_ci			goto done;
118262306a36Sopenharmony_ci		if (!nvme_tcp_has_inline_data(req))
118362306a36Sopenharmony_ci			goto out;
118462306a36Sopenharmony_ci	}
118562306a36Sopenharmony_ci
118662306a36Sopenharmony_ci	if (req->state == NVME_TCP_SEND_H2C_PDU) {
118762306a36Sopenharmony_ci		ret = nvme_tcp_try_send_data_pdu(req);
118862306a36Sopenharmony_ci		if (ret <= 0)
118962306a36Sopenharmony_ci			goto done;
119062306a36Sopenharmony_ci	}
119162306a36Sopenharmony_ci
119262306a36Sopenharmony_ci	if (req->state == NVME_TCP_SEND_DATA) {
119362306a36Sopenharmony_ci		ret = nvme_tcp_try_send_data(req);
119462306a36Sopenharmony_ci		if (ret <= 0)
119562306a36Sopenharmony_ci			goto done;
119662306a36Sopenharmony_ci	}
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ci	if (req->state == NVME_TCP_SEND_DDGST)
119962306a36Sopenharmony_ci		ret = nvme_tcp_try_send_ddgst(req);
120062306a36Sopenharmony_cidone:
120162306a36Sopenharmony_ci	if (ret == -EAGAIN) {
120262306a36Sopenharmony_ci		ret = 0;
120362306a36Sopenharmony_ci	} else if (ret < 0) {
120462306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
120562306a36Sopenharmony_ci			"failed to send request %d\n", ret);
120662306a36Sopenharmony_ci		nvme_tcp_fail_request(queue->request);
120762306a36Sopenharmony_ci		nvme_tcp_done_send_req(queue);
120862306a36Sopenharmony_ci	}
120962306a36Sopenharmony_ciout:
121062306a36Sopenharmony_ci	memalloc_noreclaim_restore(noreclaim_flag);
121162306a36Sopenharmony_ci	return ret;
121262306a36Sopenharmony_ci}
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_cistatic int nvme_tcp_try_recv(struct nvme_tcp_queue *queue)
121562306a36Sopenharmony_ci{
121662306a36Sopenharmony_ci	struct socket *sock = queue->sock;
121762306a36Sopenharmony_ci	struct sock *sk = sock->sk;
121862306a36Sopenharmony_ci	read_descriptor_t rd_desc;
121962306a36Sopenharmony_ci	int consumed;
122062306a36Sopenharmony_ci
122162306a36Sopenharmony_ci	rd_desc.arg.data = queue;
122262306a36Sopenharmony_ci	rd_desc.count = 1;
122362306a36Sopenharmony_ci	lock_sock(sk);
122462306a36Sopenharmony_ci	queue->nr_cqe = 0;
122562306a36Sopenharmony_ci	consumed = sock->ops->read_sock(sk, &rd_desc, nvme_tcp_recv_skb);
122662306a36Sopenharmony_ci	release_sock(sk);
122762306a36Sopenharmony_ci	return consumed;
122862306a36Sopenharmony_ci}
122962306a36Sopenharmony_ci
123062306a36Sopenharmony_cistatic void nvme_tcp_io_work(struct work_struct *w)
123162306a36Sopenharmony_ci{
123262306a36Sopenharmony_ci	struct nvme_tcp_queue *queue =
123362306a36Sopenharmony_ci		container_of(w, struct nvme_tcp_queue, io_work);
123462306a36Sopenharmony_ci	unsigned long deadline = jiffies + msecs_to_jiffies(1);
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ci	do {
123762306a36Sopenharmony_ci		bool pending = false;
123862306a36Sopenharmony_ci		int result;
123962306a36Sopenharmony_ci
124062306a36Sopenharmony_ci		if (mutex_trylock(&queue->send_mutex)) {
124162306a36Sopenharmony_ci			result = nvme_tcp_try_send(queue);
124262306a36Sopenharmony_ci			mutex_unlock(&queue->send_mutex);
124362306a36Sopenharmony_ci			if (result > 0)
124462306a36Sopenharmony_ci				pending = true;
124562306a36Sopenharmony_ci			else if (unlikely(result < 0))
124662306a36Sopenharmony_ci				break;
124762306a36Sopenharmony_ci		}
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_ci		result = nvme_tcp_try_recv(queue);
125062306a36Sopenharmony_ci		if (result > 0)
125162306a36Sopenharmony_ci			pending = true;
125262306a36Sopenharmony_ci		else if (unlikely(result < 0))
125362306a36Sopenharmony_ci			return;
125462306a36Sopenharmony_ci
125562306a36Sopenharmony_ci		if (!pending || !queue->rd_enabled)
125662306a36Sopenharmony_ci			return;
125762306a36Sopenharmony_ci
125862306a36Sopenharmony_ci	} while (!time_after(jiffies, deadline)); /* quota is exhausted */
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
126162306a36Sopenharmony_ci}
126262306a36Sopenharmony_ci
126362306a36Sopenharmony_cistatic void nvme_tcp_free_crypto(struct nvme_tcp_queue *queue)
126462306a36Sopenharmony_ci{
126562306a36Sopenharmony_ci	struct crypto_ahash *tfm = crypto_ahash_reqtfm(queue->rcv_hash);
126662306a36Sopenharmony_ci
126762306a36Sopenharmony_ci	ahash_request_free(queue->rcv_hash);
126862306a36Sopenharmony_ci	ahash_request_free(queue->snd_hash);
126962306a36Sopenharmony_ci	crypto_free_ahash(tfm);
127062306a36Sopenharmony_ci}
127162306a36Sopenharmony_ci
127262306a36Sopenharmony_cistatic int nvme_tcp_alloc_crypto(struct nvme_tcp_queue *queue)
127362306a36Sopenharmony_ci{
127462306a36Sopenharmony_ci	struct crypto_ahash *tfm;
127562306a36Sopenharmony_ci
127662306a36Sopenharmony_ci	tfm = crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC);
127762306a36Sopenharmony_ci	if (IS_ERR(tfm))
127862306a36Sopenharmony_ci		return PTR_ERR(tfm);
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_ci	queue->snd_hash = ahash_request_alloc(tfm, GFP_KERNEL);
128162306a36Sopenharmony_ci	if (!queue->snd_hash)
128262306a36Sopenharmony_ci		goto free_tfm;
128362306a36Sopenharmony_ci	ahash_request_set_callback(queue->snd_hash, 0, NULL, NULL);
128462306a36Sopenharmony_ci
128562306a36Sopenharmony_ci	queue->rcv_hash = ahash_request_alloc(tfm, GFP_KERNEL);
128662306a36Sopenharmony_ci	if (!queue->rcv_hash)
128762306a36Sopenharmony_ci		goto free_snd_hash;
128862306a36Sopenharmony_ci	ahash_request_set_callback(queue->rcv_hash, 0, NULL, NULL);
128962306a36Sopenharmony_ci
129062306a36Sopenharmony_ci	return 0;
129162306a36Sopenharmony_cifree_snd_hash:
129262306a36Sopenharmony_ci	ahash_request_free(queue->snd_hash);
129362306a36Sopenharmony_cifree_tfm:
129462306a36Sopenharmony_ci	crypto_free_ahash(tfm);
129562306a36Sopenharmony_ci	return -ENOMEM;
129662306a36Sopenharmony_ci}
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_cistatic void nvme_tcp_free_async_req(struct nvme_tcp_ctrl *ctrl)
129962306a36Sopenharmony_ci{
130062306a36Sopenharmony_ci	struct nvme_tcp_request *async = &ctrl->async_req;
130162306a36Sopenharmony_ci
130262306a36Sopenharmony_ci	page_frag_free(async->pdu);
130362306a36Sopenharmony_ci}
130462306a36Sopenharmony_ci
130562306a36Sopenharmony_cistatic int nvme_tcp_alloc_async_req(struct nvme_tcp_ctrl *ctrl)
130662306a36Sopenharmony_ci{
130762306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[0];
130862306a36Sopenharmony_ci	struct nvme_tcp_request *async = &ctrl->async_req;
130962306a36Sopenharmony_ci	u8 hdgst = nvme_tcp_hdgst_len(queue);
131062306a36Sopenharmony_ci
131162306a36Sopenharmony_ci	async->pdu = page_frag_alloc(&queue->pf_cache,
131262306a36Sopenharmony_ci		sizeof(struct nvme_tcp_cmd_pdu) + hdgst,
131362306a36Sopenharmony_ci		GFP_KERNEL | __GFP_ZERO);
131462306a36Sopenharmony_ci	if (!async->pdu)
131562306a36Sopenharmony_ci		return -ENOMEM;
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci	async->queue = &ctrl->queues[0];
131862306a36Sopenharmony_ci	return 0;
131962306a36Sopenharmony_ci}
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_cistatic void nvme_tcp_free_queue(struct nvme_ctrl *nctrl, int qid)
132262306a36Sopenharmony_ci{
132362306a36Sopenharmony_ci	struct page *page;
132462306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
132562306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
132662306a36Sopenharmony_ci	unsigned int noreclaim_flag;
132762306a36Sopenharmony_ci
132862306a36Sopenharmony_ci	if (!test_and_clear_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
132962306a36Sopenharmony_ci		return;
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	if (queue->hdr_digest || queue->data_digest)
133262306a36Sopenharmony_ci		nvme_tcp_free_crypto(queue);
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci	if (queue->pf_cache.va) {
133562306a36Sopenharmony_ci		page = virt_to_head_page(queue->pf_cache.va);
133662306a36Sopenharmony_ci		__page_frag_cache_drain(page, queue->pf_cache.pagecnt_bias);
133762306a36Sopenharmony_ci		queue->pf_cache.va = NULL;
133862306a36Sopenharmony_ci	}
133962306a36Sopenharmony_ci
134062306a36Sopenharmony_ci	noreclaim_flag = memalloc_noreclaim_save();
134162306a36Sopenharmony_ci	sock_release(queue->sock);
134262306a36Sopenharmony_ci	memalloc_noreclaim_restore(noreclaim_flag);
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_ci	kfree(queue->pdu);
134562306a36Sopenharmony_ci	mutex_destroy(&queue->send_mutex);
134662306a36Sopenharmony_ci	mutex_destroy(&queue->queue_lock);
134762306a36Sopenharmony_ci}
134862306a36Sopenharmony_ci
134962306a36Sopenharmony_cistatic int nvme_tcp_init_connection(struct nvme_tcp_queue *queue)
135062306a36Sopenharmony_ci{
135162306a36Sopenharmony_ci	struct nvme_tcp_icreq_pdu *icreq;
135262306a36Sopenharmony_ci	struct nvme_tcp_icresp_pdu *icresp;
135362306a36Sopenharmony_ci	struct msghdr msg = {};
135462306a36Sopenharmony_ci	struct kvec iov;
135562306a36Sopenharmony_ci	bool ctrl_hdgst, ctrl_ddgst;
135662306a36Sopenharmony_ci	u32 maxh2cdata;
135762306a36Sopenharmony_ci	int ret;
135862306a36Sopenharmony_ci
135962306a36Sopenharmony_ci	icreq = kzalloc(sizeof(*icreq), GFP_KERNEL);
136062306a36Sopenharmony_ci	if (!icreq)
136162306a36Sopenharmony_ci		return -ENOMEM;
136262306a36Sopenharmony_ci
136362306a36Sopenharmony_ci	icresp = kzalloc(sizeof(*icresp), GFP_KERNEL);
136462306a36Sopenharmony_ci	if (!icresp) {
136562306a36Sopenharmony_ci		ret = -ENOMEM;
136662306a36Sopenharmony_ci		goto free_icreq;
136762306a36Sopenharmony_ci	}
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci	icreq->hdr.type = nvme_tcp_icreq;
137062306a36Sopenharmony_ci	icreq->hdr.hlen = sizeof(*icreq);
137162306a36Sopenharmony_ci	icreq->hdr.pdo = 0;
137262306a36Sopenharmony_ci	icreq->hdr.plen = cpu_to_le32(icreq->hdr.hlen);
137362306a36Sopenharmony_ci	icreq->pfv = cpu_to_le16(NVME_TCP_PFV_1_0);
137462306a36Sopenharmony_ci	icreq->maxr2t = 0; /* single inflight r2t supported */
137562306a36Sopenharmony_ci	icreq->hpda = 0; /* no alignment constraint */
137662306a36Sopenharmony_ci	if (queue->hdr_digest)
137762306a36Sopenharmony_ci		icreq->digest |= NVME_TCP_HDR_DIGEST_ENABLE;
137862306a36Sopenharmony_ci	if (queue->data_digest)
137962306a36Sopenharmony_ci		icreq->digest |= NVME_TCP_DATA_DIGEST_ENABLE;
138062306a36Sopenharmony_ci
138162306a36Sopenharmony_ci	iov.iov_base = icreq;
138262306a36Sopenharmony_ci	iov.iov_len = sizeof(*icreq);
138362306a36Sopenharmony_ci	ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
138462306a36Sopenharmony_ci	if (ret < 0)
138562306a36Sopenharmony_ci		goto free_icresp;
138662306a36Sopenharmony_ci
138762306a36Sopenharmony_ci	memset(&msg, 0, sizeof(msg));
138862306a36Sopenharmony_ci	iov.iov_base = icresp;
138962306a36Sopenharmony_ci	iov.iov_len = sizeof(*icresp);
139062306a36Sopenharmony_ci	ret = kernel_recvmsg(queue->sock, &msg, &iov, 1,
139162306a36Sopenharmony_ci			iov.iov_len, msg.msg_flags);
139262306a36Sopenharmony_ci	if (ret < 0)
139362306a36Sopenharmony_ci		goto free_icresp;
139462306a36Sopenharmony_ci
139562306a36Sopenharmony_ci	ret = -EINVAL;
139662306a36Sopenharmony_ci	if (icresp->hdr.type != nvme_tcp_icresp) {
139762306a36Sopenharmony_ci		pr_err("queue %d: bad type returned %d\n",
139862306a36Sopenharmony_ci			nvme_tcp_queue_id(queue), icresp->hdr.type);
139962306a36Sopenharmony_ci		goto free_icresp;
140062306a36Sopenharmony_ci	}
140162306a36Sopenharmony_ci
140262306a36Sopenharmony_ci	if (le32_to_cpu(icresp->hdr.plen) != sizeof(*icresp)) {
140362306a36Sopenharmony_ci		pr_err("queue %d: bad pdu length returned %d\n",
140462306a36Sopenharmony_ci			nvme_tcp_queue_id(queue), icresp->hdr.plen);
140562306a36Sopenharmony_ci		goto free_icresp;
140662306a36Sopenharmony_ci	}
140762306a36Sopenharmony_ci
140862306a36Sopenharmony_ci	if (icresp->pfv != NVME_TCP_PFV_1_0) {
140962306a36Sopenharmony_ci		pr_err("queue %d: bad pfv returned %d\n",
141062306a36Sopenharmony_ci			nvme_tcp_queue_id(queue), icresp->pfv);
141162306a36Sopenharmony_ci		goto free_icresp;
141262306a36Sopenharmony_ci	}
141362306a36Sopenharmony_ci
141462306a36Sopenharmony_ci	ctrl_ddgst = !!(icresp->digest & NVME_TCP_DATA_DIGEST_ENABLE);
141562306a36Sopenharmony_ci	if ((queue->data_digest && !ctrl_ddgst) ||
141662306a36Sopenharmony_ci	    (!queue->data_digest && ctrl_ddgst)) {
141762306a36Sopenharmony_ci		pr_err("queue %d: data digest mismatch host: %s ctrl: %s\n",
141862306a36Sopenharmony_ci			nvme_tcp_queue_id(queue),
141962306a36Sopenharmony_ci			queue->data_digest ? "enabled" : "disabled",
142062306a36Sopenharmony_ci			ctrl_ddgst ? "enabled" : "disabled");
142162306a36Sopenharmony_ci		goto free_icresp;
142262306a36Sopenharmony_ci	}
142362306a36Sopenharmony_ci
142462306a36Sopenharmony_ci	ctrl_hdgst = !!(icresp->digest & NVME_TCP_HDR_DIGEST_ENABLE);
142562306a36Sopenharmony_ci	if ((queue->hdr_digest && !ctrl_hdgst) ||
142662306a36Sopenharmony_ci	    (!queue->hdr_digest && ctrl_hdgst)) {
142762306a36Sopenharmony_ci		pr_err("queue %d: header digest mismatch host: %s ctrl: %s\n",
142862306a36Sopenharmony_ci			nvme_tcp_queue_id(queue),
142962306a36Sopenharmony_ci			queue->hdr_digest ? "enabled" : "disabled",
143062306a36Sopenharmony_ci			ctrl_hdgst ? "enabled" : "disabled");
143162306a36Sopenharmony_ci		goto free_icresp;
143262306a36Sopenharmony_ci	}
143362306a36Sopenharmony_ci
143462306a36Sopenharmony_ci	if (icresp->cpda != 0) {
143562306a36Sopenharmony_ci		pr_err("queue %d: unsupported cpda returned %d\n",
143662306a36Sopenharmony_ci			nvme_tcp_queue_id(queue), icresp->cpda);
143762306a36Sopenharmony_ci		goto free_icresp;
143862306a36Sopenharmony_ci	}
143962306a36Sopenharmony_ci
144062306a36Sopenharmony_ci	maxh2cdata = le32_to_cpu(icresp->maxdata);
144162306a36Sopenharmony_ci	if ((maxh2cdata % 4) || (maxh2cdata < NVME_TCP_MIN_MAXH2CDATA)) {
144262306a36Sopenharmony_ci		pr_err("queue %d: invalid maxh2cdata returned %u\n",
144362306a36Sopenharmony_ci		       nvme_tcp_queue_id(queue), maxh2cdata);
144462306a36Sopenharmony_ci		goto free_icresp;
144562306a36Sopenharmony_ci	}
144662306a36Sopenharmony_ci	queue->maxh2cdata = maxh2cdata;
144762306a36Sopenharmony_ci
144862306a36Sopenharmony_ci	ret = 0;
144962306a36Sopenharmony_cifree_icresp:
145062306a36Sopenharmony_ci	kfree(icresp);
145162306a36Sopenharmony_cifree_icreq:
145262306a36Sopenharmony_ci	kfree(icreq);
145362306a36Sopenharmony_ci	return ret;
145462306a36Sopenharmony_ci}
145562306a36Sopenharmony_ci
145662306a36Sopenharmony_cistatic bool nvme_tcp_admin_queue(struct nvme_tcp_queue *queue)
145762306a36Sopenharmony_ci{
145862306a36Sopenharmony_ci	return nvme_tcp_queue_id(queue) == 0;
145962306a36Sopenharmony_ci}
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_cistatic bool nvme_tcp_default_queue(struct nvme_tcp_queue *queue)
146262306a36Sopenharmony_ci{
146362306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = queue->ctrl;
146462306a36Sopenharmony_ci	int qid = nvme_tcp_queue_id(queue);
146562306a36Sopenharmony_ci
146662306a36Sopenharmony_ci	return !nvme_tcp_admin_queue(queue) &&
146762306a36Sopenharmony_ci		qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT];
146862306a36Sopenharmony_ci}
146962306a36Sopenharmony_ci
147062306a36Sopenharmony_cistatic bool nvme_tcp_read_queue(struct nvme_tcp_queue *queue)
147162306a36Sopenharmony_ci{
147262306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = queue->ctrl;
147362306a36Sopenharmony_ci	int qid = nvme_tcp_queue_id(queue);
147462306a36Sopenharmony_ci
147562306a36Sopenharmony_ci	return !nvme_tcp_admin_queue(queue) &&
147662306a36Sopenharmony_ci		!nvme_tcp_default_queue(queue) &&
147762306a36Sopenharmony_ci		qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
147862306a36Sopenharmony_ci			  ctrl->io_queues[HCTX_TYPE_READ];
147962306a36Sopenharmony_ci}
148062306a36Sopenharmony_ci
148162306a36Sopenharmony_cistatic bool nvme_tcp_poll_queue(struct nvme_tcp_queue *queue)
148262306a36Sopenharmony_ci{
148362306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = queue->ctrl;
148462306a36Sopenharmony_ci	int qid = nvme_tcp_queue_id(queue);
148562306a36Sopenharmony_ci
148662306a36Sopenharmony_ci	return !nvme_tcp_admin_queue(queue) &&
148762306a36Sopenharmony_ci		!nvme_tcp_default_queue(queue) &&
148862306a36Sopenharmony_ci		!nvme_tcp_read_queue(queue) &&
148962306a36Sopenharmony_ci		qid < 1 + ctrl->io_queues[HCTX_TYPE_DEFAULT] +
149062306a36Sopenharmony_ci			  ctrl->io_queues[HCTX_TYPE_READ] +
149162306a36Sopenharmony_ci			  ctrl->io_queues[HCTX_TYPE_POLL];
149262306a36Sopenharmony_ci}
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_cistatic void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
149562306a36Sopenharmony_ci{
149662306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = queue->ctrl;
149762306a36Sopenharmony_ci	int qid = nvme_tcp_queue_id(queue);
149862306a36Sopenharmony_ci	int n = 0;
149962306a36Sopenharmony_ci
150062306a36Sopenharmony_ci	if (nvme_tcp_default_queue(queue))
150162306a36Sopenharmony_ci		n = qid - 1;
150262306a36Sopenharmony_ci	else if (nvme_tcp_read_queue(queue))
150362306a36Sopenharmony_ci		n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] - 1;
150462306a36Sopenharmony_ci	else if (nvme_tcp_poll_queue(queue))
150562306a36Sopenharmony_ci		n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
150662306a36Sopenharmony_ci				ctrl->io_queues[HCTX_TYPE_READ] - 1;
150762306a36Sopenharmony_ci	queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
150862306a36Sopenharmony_ci}
150962306a36Sopenharmony_ci
151062306a36Sopenharmony_cistatic int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
151162306a36Sopenharmony_ci{
151262306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
151362306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
151462306a36Sopenharmony_ci	int ret, rcv_pdu_size;
151562306a36Sopenharmony_ci
151662306a36Sopenharmony_ci	mutex_init(&queue->queue_lock);
151762306a36Sopenharmony_ci	queue->ctrl = ctrl;
151862306a36Sopenharmony_ci	init_llist_head(&queue->req_list);
151962306a36Sopenharmony_ci	INIT_LIST_HEAD(&queue->send_list);
152062306a36Sopenharmony_ci	mutex_init(&queue->send_mutex);
152162306a36Sopenharmony_ci	INIT_WORK(&queue->io_work, nvme_tcp_io_work);
152262306a36Sopenharmony_ci
152362306a36Sopenharmony_ci	if (qid > 0)
152462306a36Sopenharmony_ci		queue->cmnd_capsule_len = nctrl->ioccsz * 16;
152562306a36Sopenharmony_ci	else
152662306a36Sopenharmony_ci		queue->cmnd_capsule_len = sizeof(struct nvme_command) +
152762306a36Sopenharmony_ci						NVME_TCP_ADMIN_CCSZ;
152862306a36Sopenharmony_ci
152962306a36Sopenharmony_ci	ret = sock_create(ctrl->addr.ss_family, SOCK_STREAM,
153062306a36Sopenharmony_ci			IPPROTO_TCP, &queue->sock);
153162306a36Sopenharmony_ci	if (ret) {
153262306a36Sopenharmony_ci		dev_err(nctrl->device,
153362306a36Sopenharmony_ci			"failed to create socket: %d\n", ret);
153462306a36Sopenharmony_ci		goto err_destroy_mutex;
153562306a36Sopenharmony_ci	}
153662306a36Sopenharmony_ci
153762306a36Sopenharmony_ci	nvme_tcp_reclassify_socket(queue->sock);
153862306a36Sopenharmony_ci
153962306a36Sopenharmony_ci	/* Single syn retry */
154062306a36Sopenharmony_ci	tcp_sock_set_syncnt(queue->sock->sk, 1);
154162306a36Sopenharmony_ci
154262306a36Sopenharmony_ci	/* Set TCP no delay */
154362306a36Sopenharmony_ci	tcp_sock_set_nodelay(queue->sock->sk);
154462306a36Sopenharmony_ci
154562306a36Sopenharmony_ci	/*
154662306a36Sopenharmony_ci	 * Cleanup whatever is sitting in the TCP transmit queue on socket
154762306a36Sopenharmony_ci	 * close. This is done to prevent stale data from being sent should
154862306a36Sopenharmony_ci	 * the network connection be restored before TCP times out.
154962306a36Sopenharmony_ci	 */
155062306a36Sopenharmony_ci	sock_no_linger(queue->sock->sk);
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_ci	if (so_priority > 0)
155362306a36Sopenharmony_ci		sock_set_priority(queue->sock->sk, so_priority);
155462306a36Sopenharmony_ci
155562306a36Sopenharmony_ci	/* Set socket type of service */
155662306a36Sopenharmony_ci	if (nctrl->opts->tos >= 0)
155762306a36Sopenharmony_ci		ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
155862306a36Sopenharmony_ci
155962306a36Sopenharmony_ci	/* Set 10 seconds timeout for icresp recvmsg */
156062306a36Sopenharmony_ci	queue->sock->sk->sk_rcvtimeo = 10 * HZ;
156162306a36Sopenharmony_ci
156262306a36Sopenharmony_ci	queue->sock->sk->sk_allocation = GFP_ATOMIC;
156362306a36Sopenharmony_ci	queue->sock->sk->sk_use_task_frag = false;
156462306a36Sopenharmony_ci	nvme_tcp_set_queue_io_cpu(queue);
156562306a36Sopenharmony_ci	queue->request = NULL;
156662306a36Sopenharmony_ci	queue->data_remaining = 0;
156762306a36Sopenharmony_ci	queue->ddgst_remaining = 0;
156862306a36Sopenharmony_ci	queue->pdu_remaining = 0;
156962306a36Sopenharmony_ci	queue->pdu_offset = 0;
157062306a36Sopenharmony_ci	sk_set_memalloc(queue->sock->sk);
157162306a36Sopenharmony_ci
157262306a36Sopenharmony_ci	if (nctrl->opts->mask & NVMF_OPT_HOST_TRADDR) {
157362306a36Sopenharmony_ci		ret = kernel_bind(queue->sock, (struct sockaddr *)&ctrl->src_addr,
157462306a36Sopenharmony_ci			sizeof(ctrl->src_addr));
157562306a36Sopenharmony_ci		if (ret) {
157662306a36Sopenharmony_ci			dev_err(nctrl->device,
157762306a36Sopenharmony_ci				"failed to bind queue %d socket %d\n",
157862306a36Sopenharmony_ci				qid, ret);
157962306a36Sopenharmony_ci			goto err_sock;
158062306a36Sopenharmony_ci		}
158162306a36Sopenharmony_ci	}
158262306a36Sopenharmony_ci
158362306a36Sopenharmony_ci	if (nctrl->opts->mask & NVMF_OPT_HOST_IFACE) {
158462306a36Sopenharmony_ci		char *iface = nctrl->opts->host_iface;
158562306a36Sopenharmony_ci		sockptr_t optval = KERNEL_SOCKPTR(iface);
158662306a36Sopenharmony_ci
158762306a36Sopenharmony_ci		ret = sock_setsockopt(queue->sock, SOL_SOCKET, SO_BINDTODEVICE,
158862306a36Sopenharmony_ci				      optval, strlen(iface));
158962306a36Sopenharmony_ci		if (ret) {
159062306a36Sopenharmony_ci			dev_err(nctrl->device,
159162306a36Sopenharmony_ci			  "failed to bind to interface %s queue %d err %d\n",
159262306a36Sopenharmony_ci			  iface, qid, ret);
159362306a36Sopenharmony_ci			goto err_sock;
159462306a36Sopenharmony_ci		}
159562306a36Sopenharmony_ci	}
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci	queue->hdr_digest = nctrl->opts->hdr_digest;
159862306a36Sopenharmony_ci	queue->data_digest = nctrl->opts->data_digest;
159962306a36Sopenharmony_ci	if (queue->hdr_digest || queue->data_digest) {
160062306a36Sopenharmony_ci		ret = nvme_tcp_alloc_crypto(queue);
160162306a36Sopenharmony_ci		if (ret) {
160262306a36Sopenharmony_ci			dev_err(nctrl->device,
160362306a36Sopenharmony_ci				"failed to allocate queue %d crypto\n", qid);
160462306a36Sopenharmony_ci			goto err_sock;
160562306a36Sopenharmony_ci		}
160662306a36Sopenharmony_ci	}
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	rcv_pdu_size = sizeof(struct nvme_tcp_rsp_pdu) +
160962306a36Sopenharmony_ci			nvme_tcp_hdgst_len(queue);
161062306a36Sopenharmony_ci	queue->pdu = kmalloc(rcv_pdu_size, GFP_KERNEL);
161162306a36Sopenharmony_ci	if (!queue->pdu) {
161262306a36Sopenharmony_ci		ret = -ENOMEM;
161362306a36Sopenharmony_ci		goto err_crypto;
161462306a36Sopenharmony_ci	}
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci	dev_dbg(nctrl->device, "connecting queue %d\n",
161762306a36Sopenharmony_ci			nvme_tcp_queue_id(queue));
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci	ret = kernel_connect(queue->sock, (struct sockaddr *)&ctrl->addr,
162062306a36Sopenharmony_ci		sizeof(ctrl->addr), 0);
162162306a36Sopenharmony_ci	if (ret) {
162262306a36Sopenharmony_ci		dev_err(nctrl->device,
162362306a36Sopenharmony_ci			"failed to connect socket: %d\n", ret);
162462306a36Sopenharmony_ci		goto err_rcv_pdu;
162562306a36Sopenharmony_ci	}
162662306a36Sopenharmony_ci
162762306a36Sopenharmony_ci	ret = nvme_tcp_init_connection(queue);
162862306a36Sopenharmony_ci	if (ret)
162962306a36Sopenharmony_ci		goto err_init_connect;
163062306a36Sopenharmony_ci
163162306a36Sopenharmony_ci	set_bit(NVME_TCP_Q_ALLOCATED, &queue->flags);
163262306a36Sopenharmony_ci
163362306a36Sopenharmony_ci	return 0;
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_cierr_init_connect:
163662306a36Sopenharmony_ci	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
163762306a36Sopenharmony_cierr_rcv_pdu:
163862306a36Sopenharmony_ci	kfree(queue->pdu);
163962306a36Sopenharmony_cierr_crypto:
164062306a36Sopenharmony_ci	if (queue->hdr_digest || queue->data_digest)
164162306a36Sopenharmony_ci		nvme_tcp_free_crypto(queue);
164262306a36Sopenharmony_cierr_sock:
164362306a36Sopenharmony_ci	sock_release(queue->sock);
164462306a36Sopenharmony_ci	queue->sock = NULL;
164562306a36Sopenharmony_cierr_destroy_mutex:
164662306a36Sopenharmony_ci	mutex_destroy(&queue->send_mutex);
164762306a36Sopenharmony_ci	mutex_destroy(&queue->queue_lock);
164862306a36Sopenharmony_ci	return ret;
164962306a36Sopenharmony_ci}
165062306a36Sopenharmony_ci
165162306a36Sopenharmony_cistatic void nvme_tcp_restore_sock_ops(struct nvme_tcp_queue *queue)
165262306a36Sopenharmony_ci{
165362306a36Sopenharmony_ci	struct socket *sock = queue->sock;
165462306a36Sopenharmony_ci
165562306a36Sopenharmony_ci	write_lock_bh(&sock->sk->sk_callback_lock);
165662306a36Sopenharmony_ci	sock->sk->sk_user_data  = NULL;
165762306a36Sopenharmony_ci	sock->sk->sk_data_ready = queue->data_ready;
165862306a36Sopenharmony_ci	sock->sk->sk_state_change = queue->state_change;
165962306a36Sopenharmony_ci	sock->sk->sk_write_space  = queue->write_space;
166062306a36Sopenharmony_ci	write_unlock_bh(&sock->sk->sk_callback_lock);
166162306a36Sopenharmony_ci}
166262306a36Sopenharmony_ci
166362306a36Sopenharmony_cistatic void __nvme_tcp_stop_queue(struct nvme_tcp_queue *queue)
166462306a36Sopenharmony_ci{
166562306a36Sopenharmony_ci	kernel_sock_shutdown(queue->sock, SHUT_RDWR);
166662306a36Sopenharmony_ci	nvme_tcp_restore_sock_ops(queue);
166762306a36Sopenharmony_ci	cancel_work_sync(&queue->io_work);
166862306a36Sopenharmony_ci}
166962306a36Sopenharmony_ci
167062306a36Sopenharmony_cistatic void nvme_tcp_stop_queue(struct nvme_ctrl *nctrl, int qid)
167162306a36Sopenharmony_ci{
167262306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
167362306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
167462306a36Sopenharmony_ci
167562306a36Sopenharmony_ci	if (!test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
167662306a36Sopenharmony_ci		return;
167762306a36Sopenharmony_ci
167862306a36Sopenharmony_ci	mutex_lock(&queue->queue_lock);
167962306a36Sopenharmony_ci	if (test_and_clear_bit(NVME_TCP_Q_LIVE, &queue->flags))
168062306a36Sopenharmony_ci		__nvme_tcp_stop_queue(queue);
168162306a36Sopenharmony_ci	mutex_unlock(&queue->queue_lock);
168262306a36Sopenharmony_ci}
168362306a36Sopenharmony_ci
168462306a36Sopenharmony_cistatic void nvme_tcp_setup_sock_ops(struct nvme_tcp_queue *queue)
168562306a36Sopenharmony_ci{
168662306a36Sopenharmony_ci	write_lock_bh(&queue->sock->sk->sk_callback_lock);
168762306a36Sopenharmony_ci	queue->sock->sk->sk_user_data = queue;
168862306a36Sopenharmony_ci	queue->state_change = queue->sock->sk->sk_state_change;
168962306a36Sopenharmony_ci	queue->data_ready = queue->sock->sk->sk_data_ready;
169062306a36Sopenharmony_ci	queue->write_space = queue->sock->sk->sk_write_space;
169162306a36Sopenharmony_ci	queue->sock->sk->sk_data_ready = nvme_tcp_data_ready;
169262306a36Sopenharmony_ci	queue->sock->sk->sk_state_change = nvme_tcp_state_change;
169362306a36Sopenharmony_ci	queue->sock->sk->sk_write_space = nvme_tcp_write_space;
169462306a36Sopenharmony_ci#ifdef CONFIG_NET_RX_BUSY_POLL
169562306a36Sopenharmony_ci	queue->sock->sk->sk_ll_usec = 1;
169662306a36Sopenharmony_ci#endif
169762306a36Sopenharmony_ci	write_unlock_bh(&queue->sock->sk->sk_callback_lock);
169862306a36Sopenharmony_ci}
169962306a36Sopenharmony_ci
170062306a36Sopenharmony_cistatic int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
170162306a36Sopenharmony_ci{
170262306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
170362306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[idx];
170462306a36Sopenharmony_ci	int ret;
170562306a36Sopenharmony_ci
170662306a36Sopenharmony_ci	queue->rd_enabled = true;
170762306a36Sopenharmony_ci	nvme_tcp_init_recv_ctx(queue);
170862306a36Sopenharmony_ci	nvme_tcp_setup_sock_ops(queue);
170962306a36Sopenharmony_ci
171062306a36Sopenharmony_ci	if (idx)
171162306a36Sopenharmony_ci		ret = nvmf_connect_io_queue(nctrl, idx);
171262306a36Sopenharmony_ci	else
171362306a36Sopenharmony_ci		ret = nvmf_connect_admin_queue(nctrl);
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci	if (!ret) {
171662306a36Sopenharmony_ci		set_bit(NVME_TCP_Q_LIVE, &queue->flags);
171762306a36Sopenharmony_ci	} else {
171862306a36Sopenharmony_ci		if (test_bit(NVME_TCP_Q_ALLOCATED, &queue->flags))
171962306a36Sopenharmony_ci			__nvme_tcp_stop_queue(queue);
172062306a36Sopenharmony_ci		dev_err(nctrl->device,
172162306a36Sopenharmony_ci			"failed to connect queue: %d ret=%d\n", idx, ret);
172262306a36Sopenharmony_ci	}
172362306a36Sopenharmony_ci	return ret;
172462306a36Sopenharmony_ci}
172562306a36Sopenharmony_ci
172662306a36Sopenharmony_cistatic void nvme_tcp_free_admin_queue(struct nvme_ctrl *ctrl)
172762306a36Sopenharmony_ci{
172862306a36Sopenharmony_ci	if (to_tcp_ctrl(ctrl)->async_req.pdu) {
172962306a36Sopenharmony_ci		cancel_work_sync(&ctrl->async_event_work);
173062306a36Sopenharmony_ci		nvme_tcp_free_async_req(to_tcp_ctrl(ctrl));
173162306a36Sopenharmony_ci		to_tcp_ctrl(ctrl)->async_req.pdu = NULL;
173262306a36Sopenharmony_ci	}
173362306a36Sopenharmony_ci
173462306a36Sopenharmony_ci	nvme_tcp_free_queue(ctrl, 0);
173562306a36Sopenharmony_ci}
173662306a36Sopenharmony_ci
173762306a36Sopenharmony_cistatic void nvme_tcp_free_io_queues(struct nvme_ctrl *ctrl)
173862306a36Sopenharmony_ci{
173962306a36Sopenharmony_ci	int i;
174062306a36Sopenharmony_ci
174162306a36Sopenharmony_ci	for (i = 1; i < ctrl->queue_count; i++)
174262306a36Sopenharmony_ci		nvme_tcp_free_queue(ctrl, i);
174362306a36Sopenharmony_ci}
174462306a36Sopenharmony_ci
174562306a36Sopenharmony_cistatic void nvme_tcp_stop_io_queues(struct nvme_ctrl *ctrl)
174662306a36Sopenharmony_ci{
174762306a36Sopenharmony_ci	int i;
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_ci	for (i = 1; i < ctrl->queue_count; i++)
175062306a36Sopenharmony_ci		nvme_tcp_stop_queue(ctrl, i);
175162306a36Sopenharmony_ci}
175262306a36Sopenharmony_ci
175362306a36Sopenharmony_cistatic int nvme_tcp_start_io_queues(struct nvme_ctrl *ctrl,
175462306a36Sopenharmony_ci				    int first, int last)
175562306a36Sopenharmony_ci{
175662306a36Sopenharmony_ci	int i, ret;
175762306a36Sopenharmony_ci
175862306a36Sopenharmony_ci	for (i = first; i < last; i++) {
175962306a36Sopenharmony_ci		ret = nvme_tcp_start_queue(ctrl, i);
176062306a36Sopenharmony_ci		if (ret)
176162306a36Sopenharmony_ci			goto out_stop_queues;
176262306a36Sopenharmony_ci	}
176362306a36Sopenharmony_ci
176462306a36Sopenharmony_ci	return 0;
176562306a36Sopenharmony_ci
176662306a36Sopenharmony_ciout_stop_queues:
176762306a36Sopenharmony_ci	for (i--; i >= first; i--)
176862306a36Sopenharmony_ci		nvme_tcp_stop_queue(ctrl, i);
176962306a36Sopenharmony_ci	return ret;
177062306a36Sopenharmony_ci}
177162306a36Sopenharmony_ci
177262306a36Sopenharmony_cistatic int nvme_tcp_alloc_admin_queue(struct nvme_ctrl *ctrl)
177362306a36Sopenharmony_ci{
177462306a36Sopenharmony_ci	int ret;
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci	ret = nvme_tcp_alloc_queue(ctrl, 0);
177762306a36Sopenharmony_ci	if (ret)
177862306a36Sopenharmony_ci		return ret;
177962306a36Sopenharmony_ci
178062306a36Sopenharmony_ci	ret = nvme_tcp_alloc_async_req(to_tcp_ctrl(ctrl));
178162306a36Sopenharmony_ci	if (ret)
178262306a36Sopenharmony_ci		goto out_free_queue;
178362306a36Sopenharmony_ci
178462306a36Sopenharmony_ci	return 0;
178562306a36Sopenharmony_ci
178662306a36Sopenharmony_ciout_free_queue:
178762306a36Sopenharmony_ci	nvme_tcp_free_queue(ctrl, 0);
178862306a36Sopenharmony_ci	return ret;
178962306a36Sopenharmony_ci}
179062306a36Sopenharmony_ci
179162306a36Sopenharmony_cistatic int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
179262306a36Sopenharmony_ci{
179362306a36Sopenharmony_ci	int i, ret;
179462306a36Sopenharmony_ci
179562306a36Sopenharmony_ci	for (i = 1; i < ctrl->queue_count; i++) {
179662306a36Sopenharmony_ci		ret = nvme_tcp_alloc_queue(ctrl, i);
179762306a36Sopenharmony_ci		if (ret)
179862306a36Sopenharmony_ci			goto out_free_queues;
179962306a36Sopenharmony_ci	}
180062306a36Sopenharmony_ci
180162306a36Sopenharmony_ci	return 0;
180262306a36Sopenharmony_ci
180362306a36Sopenharmony_ciout_free_queues:
180462306a36Sopenharmony_ci	for (i--; i >= 1; i--)
180562306a36Sopenharmony_ci		nvme_tcp_free_queue(ctrl, i);
180662306a36Sopenharmony_ci
180762306a36Sopenharmony_ci	return ret;
180862306a36Sopenharmony_ci}
180962306a36Sopenharmony_ci
181062306a36Sopenharmony_cistatic int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
181162306a36Sopenharmony_ci{
181262306a36Sopenharmony_ci	unsigned int nr_io_queues;
181362306a36Sopenharmony_ci	int ret;
181462306a36Sopenharmony_ci
181562306a36Sopenharmony_ci	nr_io_queues = nvmf_nr_io_queues(ctrl->opts);
181662306a36Sopenharmony_ci	ret = nvme_set_queue_count(ctrl, &nr_io_queues);
181762306a36Sopenharmony_ci	if (ret)
181862306a36Sopenharmony_ci		return ret;
181962306a36Sopenharmony_ci
182062306a36Sopenharmony_ci	if (nr_io_queues == 0) {
182162306a36Sopenharmony_ci		dev_err(ctrl->device,
182262306a36Sopenharmony_ci			"unable to set any I/O queues\n");
182362306a36Sopenharmony_ci		return -ENOMEM;
182462306a36Sopenharmony_ci	}
182562306a36Sopenharmony_ci
182662306a36Sopenharmony_ci	ctrl->queue_count = nr_io_queues + 1;
182762306a36Sopenharmony_ci	dev_info(ctrl->device,
182862306a36Sopenharmony_ci		"creating %d I/O queues.\n", nr_io_queues);
182962306a36Sopenharmony_ci
183062306a36Sopenharmony_ci	nvmf_set_io_queues(ctrl->opts, nr_io_queues,
183162306a36Sopenharmony_ci			   to_tcp_ctrl(ctrl)->io_queues);
183262306a36Sopenharmony_ci	return __nvme_tcp_alloc_io_queues(ctrl);
183362306a36Sopenharmony_ci}
183462306a36Sopenharmony_ci
183562306a36Sopenharmony_cistatic void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
183662306a36Sopenharmony_ci{
183762306a36Sopenharmony_ci	nvme_tcp_stop_io_queues(ctrl);
183862306a36Sopenharmony_ci	if (remove)
183962306a36Sopenharmony_ci		nvme_remove_io_tag_set(ctrl);
184062306a36Sopenharmony_ci	nvme_tcp_free_io_queues(ctrl);
184162306a36Sopenharmony_ci}
184262306a36Sopenharmony_ci
184362306a36Sopenharmony_cistatic int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
184462306a36Sopenharmony_ci{
184562306a36Sopenharmony_ci	int ret, nr_queues;
184662306a36Sopenharmony_ci
184762306a36Sopenharmony_ci	ret = nvme_tcp_alloc_io_queues(ctrl);
184862306a36Sopenharmony_ci	if (ret)
184962306a36Sopenharmony_ci		return ret;
185062306a36Sopenharmony_ci
185162306a36Sopenharmony_ci	if (new) {
185262306a36Sopenharmony_ci		ret = nvme_alloc_io_tag_set(ctrl, &to_tcp_ctrl(ctrl)->tag_set,
185362306a36Sopenharmony_ci				&nvme_tcp_mq_ops,
185462306a36Sopenharmony_ci				ctrl->opts->nr_poll_queues ? HCTX_MAX_TYPES : 2,
185562306a36Sopenharmony_ci				sizeof(struct nvme_tcp_request));
185662306a36Sopenharmony_ci		if (ret)
185762306a36Sopenharmony_ci			goto out_free_io_queues;
185862306a36Sopenharmony_ci	}
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_ci	/*
186162306a36Sopenharmony_ci	 * Only start IO queues for which we have allocated the tagset
186262306a36Sopenharmony_ci	 * and limitted it to the available queues. On reconnects, the
186362306a36Sopenharmony_ci	 * queue number might have changed.
186462306a36Sopenharmony_ci	 */
186562306a36Sopenharmony_ci	nr_queues = min(ctrl->tagset->nr_hw_queues + 1, ctrl->queue_count);
186662306a36Sopenharmony_ci	ret = nvme_tcp_start_io_queues(ctrl, 1, nr_queues);
186762306a36Sopenharmony_ci	if (ret)
186862306a36Sopenharmony_ci		goto out_cleanup_connect_q;
186962306a36Sopenharmony_ci
187062306a36Sopenharmony_ci	if (!new) {
187162306a36Sopenharmony_ci		nvme_start_freeze(ctrl);
187262306a36Sopenharmony_ci		nvme_unquiesce_io_queues(ctrl);
187362306a36Sopenharmony_ci		if (!nvme_wait_freeze_timeout(ctrl, NVME_IO_TIMEOUT)) {
187462306a36Sopenharmony_ci			/*
187562306a36Sopenharmony_ci			 * If we timed out waiting for freeze we are likely to
187662306a36Sopenharmony_ci			 * be stuck.  Fail the controller initialization just
187762306a36Sopenharmony_ci			 * to be safe.
187862306a36Sopenharmony_ci			 */
187962306a36Sopenharmony_ci			ret = -ENODEV;
188062306a36Sopenharmony_ci			nvme_unfreeze(ctrl);
188162306a36Sopenharmony_ci			goto out_wait_freeze_timed_out;
188262306a36Sopenharmony_ci		}
188362306a36Sopenharmony_ci		blk_mq_update_nr_hw_queues(ctrl->tagset,
188462306a36Sopenharmony_ci			ctrl->queue_count - 1);
188562306a36Sopenharmony_ci		nvme_unfreeze(ctrl);
188662306a36Sopenharmony_ci	}
188762306a36Sopenharmony_ci
188862306a36Sopenharmony_ci	/*
188962306a36Sopenharmony_ci	 * If the number of queues has increased (reconnect case)
189062306a36Sopenharmony_ci	 * start all new queues now.
189162306a36Sopenharmony_ci	 */
189262306a36Sopenharmony_ci	ret = nvme_tcp_start_io_queues(ctrl, nr_queues,
189362306a36Sopenharmony_ci				       ctrl->tagset->nr_hw_queues + 1);
189462306a36Sopenharmony_ci	if (ret)
189562306a36Sopenharmony_ci		goto out_wait_freeze_timed_out;
189662306a36Sopenharmony_ci
189762306a36Sopenharmony_ci	return 0;
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_ciout_wait_freeze_timed_out:
190062306a36Sopenharmony_ci	nvme_quiesce_io_queues(ctrl);
190162306a36Sopenharmony_ci	nvme_sync_io_queues(ctrl);
190262306a36Sopenharmony_ci	nvme_tcp_stop_io_queues(ctrl);
190362306a36Sopenharmony_ciout_cleanup_connect_q:
190462306a36Sopenharmony_ci	nvme_cancel_tagset(ctrl);
190562306a36Sopenharmony_ci	if (new)
190662306a36Sopenharmony_ci		nvme_remove_io_tag_set(ctrl);
190762306a36Sopenharmony_ciout_free_io_queues:
190862306a36Sopenharmony_ci	nvme_tcp_free_io_queues(ctrl);
190962306a36Sopenharmony_ci	return ret;
191062306a36Sopenharmony_ci}
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_cistatic void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove)
191362306a36Sopenharmony_ci{
191462306a36Sopenharmony_ci	nvme_tcp_stop_queue(ctrl, 0);
191562306a36Sopenharmony_ci	if (remove)
191662306a36Sopenharmony_ci		nvme_remove_admin_tag_set(ctrl);
191762306a36Sopenharmony_ci	nvme_tcp_free_admin_queue(ctrl);
191862306a36Sopenharmony_ci}
191962306a36Sopenharmony_ci
192062306a36Sopenharmony_cistatic int nvme_tcp_configure_admin_queue(struct nvme_ctrl *ctrl, bool new)
192162306a36Sopenharmony_ci{
192262306a36Sopenharmony_ci	int error;
192362306a36Sopenharmony_ci
192462306a36Sopenharmony_ci	error = nvme_tcp_alloc_admin_queue(ctrl);
192562306a36Sopenharmony_ci	if (error)
192662306a36Sopenharmony_ci		return error;
192762306a36Sopenharmony_ci
192862306a36Sopenharmony_ci	if (new) {
192962306a36Sopenharmony_ci		error = nvme_alloc_admin_tag_set(ctrl,
193062306a36Sopenharmony_ci				&to_tcp_ctrl(ctrl)->admin_tag_set,
193162306a36Sopenharmony_ci				&nvme_tcp_admin_mq_ops,
193262306a36Sopenharmony_ci				sizeof(struct nvme_tcp_request));
193362306a36Sopenharmony_ci		if (error)
193462306a36Sopenharmony_ci			goto out_free_queue;
193562306a36Sopenharmony_ci	}
193662306a36Sopenharmony_ci
193762306a36Sopenharmony_ci	error = nvme_tcp_start_queue(ctrl, 0);
193862306a36Sopenharmony_ci	if (error)
193962306a36Sopenharmony_ci		goto out_cleanup_tagset;
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ci	error = nvme_enable_ctrl(ctrl);
194262306a36Sopenharmony_ci	if (error)
194362306a36Sopenharmony_ci		goto out_stop_queue;
194462306a36Sopenharmony_ci
194562306a36Sopenharmony_ci	nvme_unquiesce_admin_queue(ctrl);
194662306a36Sopenharmony_ci
194762306a36Sopenharmony_ci	error = nvme_init_ctrl_finish(ctrl, false);
194862306a36Sopenharmony_ci	if (error)
194962306a36Sopenharmony_ci		goto out_quiesce_queue;
195062306a36Sopenharmony_ci
195162306a36Sopenharmony_ci	return 0;
195262306a36Sopenharmony_ci
195362306a36Sopenharmony_ciout_quiesce_queue:
195462306a36Sopenharmony_ci	nvme_quiesce_admin_queue(ctrl);
195562306a36Sopenharmony_ci	blk_sync_queue(ctrl->admin_q);
195662306a36Sopenharmony_ciout_stop_queue:
195762306a36Sopenharmony_ci	nvme_tcp_stop_queue(ctrl, 0);
195862306a36Sopenharmony_ci	nvme_cancel_admin_tagset(ctrl);
195962306a36Sopenharmony_ciout_cleanup_tagset:
196062306a36Sopenharmony_ci	if (new)
196162306a36Sopenharmony_ci		nvme_remove_admin_tag_set(ctrl);
196262306a36Sopenharmony_ciout_free_queue:
196362306a36Sopenharmony_ci	nvme_tcp_free_admin_queue(ctrl);
196462306a36Sopenharmony_ci	return error;
196562306a36Sopenharmony_ci}
196662306a36Sopenharmony_ci
196762306a36Sopenharmony_cistatic void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
196862306a36Sopenharmony_ci		bool remove)
196962306a36Sopenharmony_ci{
197062306a36Sopenharmony_ci	nvme_quiesce_admin_queue(ctrl);
197162306a36Sopenharmony_ci	blk_sync_queue(ctrl->admin_q);
197262306a36Sopenharmony_ci	nvme_tcp_stop_queue(ctrl, 0);
197362306a36Sopenharmony_ci	nvme_cancel_admin_tagset(ctrl);
197462306a36Sopenharmony_ci	if (remove)
197562306a36Sopenharmony_ci		nvme_unquiesce_admin_queue(ctrl);
197662306a36Sopenharmony_ci	nvme_tcp_destroy_admin_queue(ctrl, remove);
197762306a36Sopenharmony_ci}
197862306a36Sopenharmony_ci
197962306a36Sopenharmony_cistatic void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
198062306a36Sopenharmony_ci		bool remove)
198162306a36Sopenharmony_ci{
198262306a36Sopenharmony_ci	if (ctrl->queue_count <= 1)
198362306a36Sopenharmony_ci		return;
198462306a36Sopenharmony_ci	nvme_quiesce_admin_queue(ctrl);
198562306a36Sopenharmony_ci	nvme_quiesce_io_queues(ctrl);
198662306a36Sopenharmony_ci	nvme_sync_io_queues(ctrl);
198762306a36Sopenharmony_ci	nvme_tcp_stop_io_queues(ctrl);
198862306a36Sopenharmony_ci	nvme_cancel_tagset(ctrl);
198962306a36Sopenharmony_ci	if (remove)
199062306a36Sopenharmony_ci		nvme_unquiesce_io_queues(ctrl);
199162306a36Sopenharmony_ci	nvme_tcp_destroy_io_queues(ctrl, remove);
199262306a36Sopenharmony_ci}
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_cistatic void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl)
199562306a36Sopenharmony_ci{
199662306a36Sopenharmony_ci	enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
199762306a36Sopenharmony_ci
199862306a36Sopenharmony_ci	/* If we are resetting/deleting then do nothing */
199962306a36Sopenharmony_ci	if (state != NVME_CTRL_CONNECTING) {
200062306a36Sopenharmony_ci		WARN_ON_ONCE(state == NVME_CTRL_NEW || state == NVME_CTRL_LIVE);
200162306a36Sopenharmony_ci		return;
200262306a36Sopenharmony_ci	}
200362306a36Sopenharmony_ci
200462306a36Sopenharmony_ci	if (nvmf_should_reconnect(ctrl)) {
200562306a36Sopenharmony_ci		dev_info(ctrl->device, "Reconnecting in %d seconds...\n",
200662306a36Sopenharmony_ci			ctrl->opts->reconnect_delay);
200762306a36Sopenharmony_ci		queue_delayed_work(nvme_wq, &to_tcp_ctrl(ctrl)->connect_work,
200862306a36Sopenharmony_ci				ctrl->opts->reconnect_delay * HZ);
200962306a36Sopenharmony_ci	} else {
201062306a36Sopenharmony_ci		dev_info(ctrl->device, "Removing controller...\n");
201162306a36Sopenharmony_ci		nvme_delete_ctrl(ctrl);
201262306a36Sopenharmony_ci	}
201362306a36Sopenharmony_ci}
201462306a36Sopenharmony_ci
201562306a36Sopenharmony_cistatic int nvme_tcp_setup_ctrl(struct nvme_ctrl *ctrl, bool new)
201662306a36Sopenharmony_ci{
201762306a36Sopenharmony_ci	struct nvmf_ctrl_options *opts = ctrl->opts;
201862306a36Sopenharmony_ci	int ret;
201962306a36Sopenharmony_ci
202062306a36Sopenharmony_ci	ret = nvme_tcp_configure_admin_queue(ctrl, new);
202162306a36Sopenharmony_ci	if (ret)
202262306a36Sopenharmony_ci		return ret;
202362306a36Sopenharmony_ci
202462306a36Sopenharmony_ci	if (ctrl->icdoff) {
202562306a36Sopenharmony_ci		ret = -EOPNOTSUPP;
202662306a36Sopenharmony_ci		dev_err(ctrl->device, "icdoff is not supported!\n");
202762306a36Sopenharmony_ci		goto destroy_admin;
202862306a36Sopenharmony_ci	}
202962306a36Sopenharmony_ci
203062306a36Sopenharmony_ci	if (!nvme_ctrl_sgl_supported(ctrl)) {
203162306a36Sopenharmony_ci		ret = -EOPNOTSUPP;
203262306a36Sopenharmony_ci		dev_err(ctrl->device, "Mandatory sgls are not supported!\n");
203362306a36Sopenharmony_ci		goto destroy_admin;
203462306a36Sopenharmony_ci	}
203562306a36Sopenharmony_ci
203662306a36Sopenharmony_ci	if (opts->queue_size > ctrl->sqsize + 1)
203762306a36Sopenharmony_ci		dev_warn(ctrl->device,
203862306a36Sopenharmony_ci			"queue_size %zu > ctrl sqsize %u, clamping down\n",
203962306a36Sopenharmony_ci			opts->queue_size, ctrl->sqsize + 1);
204062306a36Sopenharmony_ci
204162306a36Sopenharmony_ci	if (ctrl->sqsize + 1 > ctrl->maxcmd) {
204262306a36Sopenharmony_ci		dev_warn(ctrl->device,
204362306a36Sopenharmony_ci			"sqsize %u > ctrl maxcmd %u, clamping down\n",
204462306a36Sopenharmony_ci			ctrl->sqsize + 1, ctrl->maxcmd);
204562306a36Sopenharmony_ci		ctrl->sqsize = ctrl->maxcmd - 1;
204662306a36Sopenharmony_ci	}
204762306a36Sopenharmony_ci
204862306a36Sopenharmony_ci	if (ctrl->queue_count > 1) {
204962306a36Sopenharmony_ci		ret = nvme_tcp_configure_io_queues(ctrl, new);
205062306a36Sopenharmony_ci		if (ret)
205162306a36Sopenharmony_ci			goto destroy_admin;
205262306a36Sopenharmony_ci	}
205362306a36Sopenharmony_ci
205462306a36Sopenharmony_ci	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE)) {
205562306a36Sopenharmony_ci		/*
205662306a36Sopenharmony_ci		 * state change failure is ok if we started ctrl delete,
205762306a36Sopenharmony_ci		 * unless we're during creation of a new controller to
205862306a36Sopenharmony_ci		 * avoid races with teardown flow.
205962306a36Sopenharmony_ci		 */
206062306a36Sopenharmony_ci		enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
206162306a36Sopenharmony_ci
206262306a36Sopenharmony_ci		WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
206362306a36Sopenharmony_ci			     state != NVME_CTRL_DELETING_NOIO);
206462306a36Sopenharmony_ci		WARN_ON_ONCE(new);
206562306a36Sopenharmony_ci		ret = -EINVAL;
206662306a36Sopenharmony_ci		goto destroy_io;
206762306a36Sopenharmony_ci	}
206862306a36Sopenharmony_ci
206962306a36Sopenharmony_ci	nvme_start_ctrl(ctrl);
207062306a36Sopenharmony_ci	return 0;
207162306a36Sopenharmony_ci
207262306a36Sopenharmony_cidestroy_io:
207362306a36Sopenharmony_ci	if (ctrl->queue_count > 1) {
207462306a36Sopenharmony_ci		nvme_quiesce_io_queues(ctrl);
207562306a36Sopenharmony_ci		nvme_sync_io_queues(ctrl);
207662306a36Sopenharmony_ci		nvme_tcp_stop_io_queues(ctrl);
207762306a36Sopenharmony_ci		nvme_cancel_tagset(ctrl);
207862306a36Sopenharmony_ci		nvme_tcp_destroy_io_queues(ctrl, new);
207962306a36Sopenharmony_ci	}
208062306a36Sopenharmony_cidestroy_admin:
208162306a36Sopenharmony_ci	nvme_quiesce_admin_queue(ctrl);
208262306a36Sopenharmony_ci	blk_sync_queue(ctrl->admin_q);
208362306a36Sopenharmony_ci	nvme_tcp_stop_queue(ctrl, 0);
208462306a36Sopenharmony_ci	nvme_cancel_admin_tagset(ctrl);
208562306a36Sopenharmony_ci	nvme_tcp_destroy_admin_queue(ctrl, new);
208662306a36Sopenharmony_ci	return ret;
208762306a36Sopenharmony_ci}
208862306a36Sopenharmony_ci
208962306a36Sopenharmony_cistatic void nvme_tcp_reconnect_ctrl_work(struct work_struct *work)
209062306a36Sopenharmony_ci{
209162306a36Sopenharmony_ci	struct nvme_tcp_ctrl *tcp_ctrl = container_of(to_delayed_work(work),
209262306a36Sopenharmony_ci			struct nvme_tcp_ctrl, connect_work);
209362306a36Sopenharmony_ci	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
209462306a36Sopenharmony_ci
209562306a36Sopenharmony_ci	++ctrl->nr_reconnects;
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_ci	if (nvme_tcp_setup_ctrl(ctrl, false))
209862306a36Sopenharmony_ci		goto requeue;
209962306a36Sopenharmony_ci
210062306a36Sopenharmony_ci	dev_info(ctrl->device, "Successfully reconnected (%d attempt)\n",
210162306a36Sopenharmony_ci			ctrl->nr_reconnects);
210262306a36Sopenharmony_ci
210362306a36Sopenharmony_ci	ctrl->nr_reconnects = 0;
210462306a36Sopenharmony_ci
210562306a36Sopenharmony_ci	return;
210662306a36Sopenharmony_ci
210762306a36Sopenharmony_cirequeue:
210862306a36Sopenharmony_ci	dev_info(ctrl->device, "Failed reconnect attempt %d\n",
210962306a36Sopenharmony_ci			ctrl->nr_reconnects);
211062306a36Sopenharmony_ci	nvme_tcp_reconnect_or_remove(ctrl);
211162306a36Sopenharmony_ci}
211262306a36Sopenharmony_ci
211362306a36Sopenharmony_cistatic void nvme_tcp_error_recovery_work(struct work_struct *work)
211462306a36Sopenharmony_ci{
211562306a36Sopenharmony_ci	struct nvme_tcp_ctrl *tcp_ctrl = container_of(work,
211662306a36Sopenharmony_ci				struct nvme_tcp_ctrl, err_work);
211762306a36Sopenharmony_ci	struct nvme_ctrl *ctrl = &tcp_ctrl->ctrl;
211862306a36Sopenharmony_ci
211962306a36Sopenharmony_ci	nvme_stop_keep_alive(ctrl);
212062306a36Sopenharmony_ci	flush_work(&ctrl->async_event_work);
212162306a36Sopenharmony_ci	nvme_tcp_teardown_io_queues(ctrl, false);
212262306a36Sopenharmony_ci	/* unquiesce to fail fast pending requests */
212362306a36Sopenharmony_ci	nvme_unquiesce_io_queues(ctrl);
212462306a36Sopenharmony_ci	nvme_tcp_teardown_admin_queue(ctrl, false);
212562306a36Sopenharmony_ci	nvme_unquiesce_admin_queue(ctrl);
212662306a36Sopenharmony_ci	nvme_auth_stop(ctrl);
212762306a36Sopenharmony_ci
212862306a36Sopenharmony_ci	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
212962306a36Sopenharmony_ci		/* state change failure is ok if we started ctrl delete */
213062306a36Sopenharmony_ci		enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
213162306a36Sopenharmony_ci
213262306a36Sopenharmony_ci		WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
213362306a36Sopenharmony_ci			     state != NVME_CTRL_DELETING_NOIO);
213462306a36Sopenharmony_ci		return;
213562306a36Sopenharmony_ci	}
213662306a36Sopenharmony_ci
213762306a36Sopenharmony_ci	nvme_tcp_reconnect_or_remove(ctrl);
213862306a36Sopenharmony_ci}
213962306a36Sopenharmony_ci
214062306a36Sopenharmony_cistatic void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
214162306a36Sopenharmony_ci{
214262306a36Sopenharmony_ci	nvme_tcp_teardown_io_queues(ctrl, shutdown);
214362306a36Sopenharmony_ci	nvme_quiesce_admin_queue(ctrl);
214462306a36Sopenharmony_ci	nvme_disable_ctrl(ctrl, shutdown);
214562306a36Sopenharmony_ci	nvme_tcp_teardown_admin_queue(ctrl, shutdown);
214662306a36Sopenharmony_ci}
214762306a36Sopenharmony_ci
214862306a36Sopenharmony_cistatic void nvme_tcp_delete_ctrl(struct nvme_ctrl *ctrl)
214962306a36Sopenharmony_ci{
215062306a36Sopenharmony_ci	nvme_tcp_teardown_ctrl(ctrl, true);
215162306a36Sopenharmony_ci}
215262306a36Sopenharmony_ci
215362306a36Sopenharmony_cistatic void nvme_reset_ctrl_work(struct work_struct *work)
215462306a36Sopenharmony_ci{
215562306a36Sopenharmony_ci	struct nvme_ctrl *ctrl =
215662306a36Sopenharmony_ci		container_of(work, struct nvme_ctrl, reset_work);
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci	nvme_stop_ctrl(ctrl);
215962306a36Sopenharmony_ci	nvme_tcp_teardown_ctrl(ctrl, false);
216062306a36Sopenharmony_ci
216162306a36Sopenharmony_ci	if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_CONNECTING)) {
216262306a36Sopenharmony_ci		/* state change failure is ok if we started ctrl delete */
216362306a36Sopenharmony_ci		enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
216462306a36Sopenharmony_ci
216562306a36Sopenharmony_ci		WARN_ON_ONCE(state != NVME_CTRL_DELETING &&
216662306a36Sopenharmony_ci			     state != NVME_CTRL_DELETING_NOIO);
216762306a36Sopenharmony_ci		return;
216862306a36Sopenharmony_ci	}
216962306a36Sopenharmony_ci
217062306a36Sopenharmony_ci	if (nvme_tcp_setup_ctrl(ctrl, false))
217162306a36Sopenharmony_ci		goto out_fail;
217262306a36Sopenharmony_ci
217362306a36Sopenharmony_ci	return;
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ciout_fail:
217662306a36Sopenharmony_ci	++ctrl->nr_reconnects;
217762306a36Sopenharmony_ci	nvme_tcp_reconnect_or_remove(ctrl);
217862306a36Sopenharmony_ci}
217962306a36Sopenharmony_ci
218062306a36Sopenharmony_cistatic void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
218162306a36Sopenharmony_ci{
218262306a36Sopenharmony_ci	flush_work(&to_tcp_ctrl(ctrl)->err_work);
218362306a36Sopenharmony_ci	cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
218462306a36Sopenharmony_ci}
218562306a36Sopenharmony_ci
218662306a36Sopenharmony_cistatic void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
218762306a36Sopenharmony_ci{
218862306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
218962306a36Sopenharmony_ci
219062306a36Sopenharmony_ci	if (list_empty(&ctrl->list))
219162306a36Sopenharmony_ci		goto free_ctrl;
219262306a36Sopenharmony_ci
219362306a36Sopenharmony_ci	mutex_lock(&nvme_tcp_ctrl_mutex);
219462306a36Sopenharmony_ci	list_del(&ctrl->list);
219562306a36Sopenharmony_ci	mutex_unlock(&nvme_tcp_ctrl_mutex);
219662306a36Sopenharmony_ci
219762306a36Sopenharmony_ci	nvmf_free_options(nctrl->opts);
219862306a36Sopenharmony_cifree_ctrl:
219962306a36Sopenharmony_ci	kfree(ctrl->queues);
220062306a36Sopenharmony_ci	kfree(ctrl);
220162306a36Sopenharmony_ci}
220262306a36Sopenharmony_ci
220362306a36Sopenharmony_cistatic void nvme_tcp_set_sg_null(struct nvme_command *c)
220462306a36Sopenharmony_ci{
220562306a36Sopenharmony_ci	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_ci	sg->addr = 0;
220862306a36Sopenharmony_ci	sg->length = 0;
220962306a36Sopenharmony_ci	sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
221062306a36Sopenharmony_ci			NVME_SGL_FMT_TRANSPORT_A;
221162306a36Sopenharmony_ci}
221262306a36Sopenharmony_ci
221362306a36Sopenharmony_cistatic void nvme_tcp_set_sg_inline(struct nvme_tcp_queue *queue,
221462306a36Sopenharmony_ci		struct nvme_command *c, u32 data_len)
221562306a36Sopenharmony_ci{
221662306a36Sopenharmony_ci	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
221762306a36Sopenharmony_ci
221862306a36Sopenharmony_ci	sg->addr = cpu_to_le64(queue->ctrl->ctrl.icdoff);
221962306a36Sopenharmony_ci	sg->length = cpu_to_le32(data_len);
222062306a36Sopenharmony_ci	sg->type = (NVME_SGL_FMT_DATA_DESC << 4) | NVME_SGL_FMT_OFFSET;
222162306a36Sopenharmony_ci}
222262306a36Sopenharmony_ci
222362306a36Sopenharmony_cistatic void nvme_tcp_set_sg_host_data(struct nvme_command *c,
222462306a36Sopenharmony_ci		u32 data_len)
222562306a36Sopenharmony_ci{
222662306a36Sopenharmony_ci	struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
222762306a36Sopenharmony_ci
222862306a36Sopenharmony_ci	sg->addr = 0;
222962306a36Sopenharmony_ci	sg->length = cpu_to_le32(data_len);
223062306a36Sopenharmony_ci	sg->type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) |
223162306a36Sopenharmony_ci			NVME_SGL_FMT_TRANSPORT_A;
223262306a36Sopenharmony_ci}
223362306a36Sopenharmony_ci
223462306a36Sopenharmony_cistatic void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
223562306a36Sopenharmony_ci{
223662306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(arg);
223762306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &ctrl->queues[0];
223862306a36Sopenharmony_ci	struct nvme_tcp_cmd_pdu *pdu = ctrl->async_req.pdu;
223962306a36Sopenharmony_ci	struct nvme_command *cmd = &pdu->cmd;
224062306a36Sopenharmony_ci	u8 hdgst = nvme_tcp_hdgst_len(queue);
224162306a36Sopenharmony_ci
224262306a36Sopenharmony_ci	memset(pdu, 0, sizeof(*pdu));
224362306a36Sopenharmony_ci	pdu->hdr.type = nvme_tcp_cmd;
224462306a36Sopenharmony_ci	if (queue->hdr_digest)
224562306a36Sopenharmony_ci		pdu->hdr.flags |= NVME_TCP_F_HDGST;
224662306a36Sopenharmony_ci	pdu->hdr.hlen = sizeof(*pdu);
224762306a36Sopenharmony_ci	pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst);
224862306a36Sopenharmony_ci
224962306a36Sopenharmony_ci	cmd->common.opcode = nvme_admin_async_event;
225062306a36Sopenharmony_ci	cmd->common.command_id = NVME_AQ_BLK_MQ_DEPTH;
225162306a36Sopenharmony_ci	cmd->common.flags |= NVME_CMD_SGL_METABUF;
225262306a36Sopenharmony_ci	nvme_tcp_set_sg_null(cmd);
225362306a36Sopenharmony_ci
225462306a36Sopenharmony_ci	ctrl->async_req.state = NVME_TCP_SEND_CMD_PDU;
225562306a36Sopenharmony_ci	ctrl->async_req.offset = 0;
225662306a36Sopenharmony_ci	ctrl->async_req.curr_bio = NULL;
225762306a36Sopenharmony_ci	ctrl->async_req.data_len = 0;
225862306a36Sopenharmony_ci
225962306a36Sopenharmony_ci	nvme_tcp_queue_request(&ctrl->async_req, true, true);
226062306a36Sopenharmony_ci}
226162306a36Sopenharmony_ci
226262306a36Sopenharmony_cistatic void nvme_tcp_complete_timed_out(struct request *rq)
226362306a36Sopenharmony_ci{
226462306a36Sopenharmony_ci	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
226562306a36Sopenharmony_ci	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
226662306a36Sopenharmony_ci
226762306a36Sopenharmony_ci	nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue));
226862306a36Sopenharmony_ci	nvmf_complete_timed_out_request(rq);
226962306a36Sopenharmony_ci}
227062306a36Sopenharmony_ci
227162306a36Sopenharmony_cistatic enum blk_eh_timer_return nvme_tcp_timeout(struct request *rq)
227262306a36Sopenharmony_ci{
227362306a36Sopenharmony_ci	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
227462306a36Sopenharmony_ci	struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl;
227562306a36Sopenharmony_ci	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
227662306a36Sopenharmony_ci	u8 opc = pdu->cmd.common.opcode, fctype = pdu->cmd.fabrics.fctype;
227762306a36Sopenharmony_ci	int qid = nvme_tcp_queue_id(req->queue);
227862306a36Sopenharmony_ci
227962306a36Sopenharmony_ci	dev_warn(ctrl->device,
228062306a36Sopenharmony_ci		"queue %d: timeout cid %#x type %d opcode %#x (%s)\n",
228162306a36Sopenharmony_ci		nvme_tcp_queue_id(req->queue), nvme_cid(rq), pdu->hdr.type,
228262306a36Sopenharmony_ci		opc, nvme_opcode_str(qid, opc, fctype));
228362306a36Sopenharmony_ci
228462306a36Sopenharmony_ci	if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE) {
228562306a36Sopenharmony_ci		/*
228662306a36Sopenharmony_ci		 * If we are resetting, connecting or deleting we should
228762306a36Sopenharmony_ci		 * complete immediately because we may block controller
228862306a36Sopenharmony_ci		 * teardown or setup sequence
228962306a36Sopenharmony_ci		 * - ctrl disable/shutdown fabrics requests
229062306a36Sopenharmony_ci		 * - connect requests
229162306a36Sopenharmony_ci		 * - initialization admin requests
229262306a36Sopenharmony_ci		 * - I/O requests that entered after unquiescing and
229362306a36Sopenharmony_ci		 *   the controller stopped responding
229462306a36Sopenharmony_ci		 *
229562306a36Sopenharmony_ci		 * All other requests should be cancelled by the error
229662306a36Sopenharmony_ci		 * recovery work, so it's fine that we fail it here.
229762306a36Sopenharmony_ci		 */
229862306a36Sopenharmony_ci		nvme_tcp_complete_timed_out(rq);
229962306a36Sopenharmony_ci		return BLK_EH_DONE;
230062306a36Sopenharmony_ci	}
230162306a36Sopenharmony_ci
230262306a36Sopenharmony_ci	/*
230362306a36Sopenharmony_ci	 * LIVE state should trigger the normal error recovery which will
230462306a36Sopenharmony_ci	 * handle completing this request.
230562306a36Sopenharmony_ci	 */
230662306a36Sopenharmony_ci	nvme_tcp_error_recovery(ctrl);
230762306a36Sopenharmony_ci	return BLK_EH_RESET_TIMER;
230862306a36Sopenharmony_ci}
230962306a36Sopenharmony_ci
231062306a36Sopenharmony_cistatic blk_status_t nvme_tcp_map_data(struct nvme_tcp_queue *queue,
231162306a36Sopenharmony_ci			struct request *rq)
231262306a36Sopenharmony_ci{
231362306a36Sopenharmony_ci	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
231462306a36Sopenharmony_ci	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
231562306a36Sopenharmony_ci	struct nvme_command *c = &pdu->cmd;
231662306a36Sopenharmony_ci
231762306a36Sopenharmony_ci	c->common.flags |= NVME_CMD_SGL_METABUF;
231862306a36Sopenharmony_ci
231962306a36Sopenharmony_ci	if (!blk_rq_nr_phys_segments(rq))
232062306a36Sopenharmony_ci		nvme_tcp_set_sg_null(c);
232162306a36Sopenharmony_ci	else if (rq_data_dir(rq) == WRITE &&
232262306a36Sopenharmony_ci	    req->data_len <= nvme_tcp_inline_data_size(req))
232362306a36Sopenharmony_ci		nvme_tcp_set_sg_inline(queue, c, req->data_len);
232462306a36Sopenharmony_ci	else
232562306a36Sopenharmony_ci		nvme_tcp_set_sg_host_data(c, req->data_len);
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci	return 0;
232862306a36Sopenharmony_ci}
232962306a36Sopenharmony_ci
233062306a36Sopenharmony_cistatic blk_status_t nvme_tcp_setup_cmd_pdu(struct nvme_ns *ns,
233162306a36Sopenharmony_ci		struct request *rq)
233262306a36Sopenharmony_ci{
233362306a36Sopenharmony_ci	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
233462306a36Sopenharmony_ci	struct nvme_tcp_cmd_pdu *pdu = nvme_tcp_req_cmd_pdu(req);
233562306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = req->queue;
233662306a36Sopenharmony_ci	u8 hdgst = nvme_tcp_hdgst_len(queue), ddgst = 0;
233762306a36Sopenharmony_ci	blk_status_t ret;
233862306a36Sopenharmony_ci
233962306a36Sopenharmony_ci	ret = nvme_setup_cmd(ns, rq);
234062306a36Sopenharmony_ci	if (ret)
234162306a36Sopenharmony_ci		return ret;
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci	req->state = NVME_TCP_SEND_CMD_PDU;
234462306a36Sopenharmony_ci	req->status = cpu_to_le16(NVME_SC_SUCCESS);
234562306a36Sopenharmony_ci	req->offset = 0;
234662306a36Sopenharmony_ci	req->data_sent = 0;
234762306a36Sopenharmony_ci	req->pdu_len = 0;
234862306a36Sopenharmony_ci	req->pdu_sent = 0;
234962306a36Sopenharmony_ci	req->h2cdata_left = 0;
235062306a36Sopenharmony_ci	req->data_len = blk_rq_nr_phys_segments(rq) ?
235162306a36Sopenharmony_ci				blk_rq_payload_bytes(rq) : 0;
235262306a36Sopenharmony_ci	req->curr_bio = rq->bio;
235362306a36Sopenharmony_ci	if (req->curr_bio && req->data_len)
235462306a36Sopenharmony_ci		nvme_tcp_init_iter(req, rq_data_dir(rq));
235562306a36Sopenharmony_ci
235662306a36Sopenharmony_ci	if (rq_data_dir(rq) == WRITE &&
235762306a36Sopenharmony_ci	    req->data_len <= nvme_tcp_inline_data_size(req))
235862306a36Sopenharmony_ci		req->pdu_len = req->data_len;
235962306a36Sopenharmony_ci
236062306a36Sopenharmony_ci	pdu->hdr.type = nvme_tcp_cmd;
236162306a36Sopenharmony_ci	pdu->hdr.flags = 0;
236262306a36Sopenharmony_ci	if (queue->hdr_digest)
236362306a36Sopenharmony_ci		pdu->hdr.flags |= NVME_TCP_F_HDGST;
236462306a36Sopenharmony_ci	if (queue->data_digest && req->pdu_len) {
236562306a36Sopenharmony_ci		pdu->hdr.flags |= NVME_TCP_F_DDGST;
236662306a36Sopenharmony_ci		ddgst = nvme_tcp_ddgst_len(queue);
236762306a36Sopenharmony_ci	}
236862306a36Sopenharmony_ci	pdu->hdr.hlen = sizeof(*pdu);
236962306a36Sopenharmony_ci	pdu->hdr.pdo = req->pdu_len ? pdu->hdr.hlen + hdgst : 0;
237062306a36Sopenharmony_ci	pdu->hdr.plen =
237162306a36Sopenharmony_ci		cpu_to_le32(pdu->hdr.hlen + hdgst + req->pdu_len + ddgst);
237262306a36Sopenharmony_ci
237362306a36Sopenharmony_ci	ret = nvme_tcp_map_data(queue, rq);
237462306a36Sopenharmony_ci	if (unlikely(ret)) {
237562306a36Sopenharmony_ci		nvme_cleanup_cmd(rq);
237662306a36Sopenharmony_ci		dev_err(queue->ctrl->ctrl.device,
237762306a36Sopenharmony_ci			"Failed to map data (%d)\n", ret);
237862306a36Sopenharmony_ci		return ret;
237962306a36Sopenharmony_ci	}
238062306a36Sopenharmony_ci
238162306a36Sopenharmony_ci	return 0;
238262306a36Sopenharmony_ci}
238362306a36Sopenharmony_ci
238462306a36Sopenharmony_cistatic void nvme_tcp_commit_rqs(struct blk_mq_hw_ctx *hctx)
238562306a36Sopenharmony_ci{
238662306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = hctx->driver_data;
238762306a36Sopenharmony_ci
238862306a36Sopenharmony_ci	if (!llist_empty(&queue->req_list))
238962306a36Sopenharmony_ci		queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
239062306a36Sopenharmony_ci}
239162306a36Sopenharmony_ci
239262306a36Sopenharmony_cistatic blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
239362306a36Sopenharmony_ci		const struct blk_mq_queue_data *bd)
239462306a36Sopenharmony_ci{
239562306a36Sopenharmony_ci	struct nvme_ns *ns = hctx->queue->queuedata;
239662306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = hctx->driver_data;
239762306a36Sopenharmony_ci	struct request *rq = bd->rq;
239862306a36Sopenharmony_ci	struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
239962306a36Sopenharmony_ci	bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
240062306a36Sopenharmony_ci	blk_status_t ret;
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci	if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
240362306a36Sopenharmony_ci		return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
240462306a36Sopenharmony_ci
240562306a36Sopenharmony_ci	ret = nvme_tcp_setup_cmd_pdu(ns, rq);
240662306a36Sopenharmony_ci	if (unlikely(ret))
240762306a36Sopenharmony_ci		return ret;
240862306a36Sopenharmony_ci
240962306a36Sopenharmony_ci	nvme_start_request(rq);
241062306a36Sopenharmony_ci
241162306a36Sopenharmony_ci	nvme_tcp_queue_request(req, true, bd->last);
241262306a36Sopenharmony_ci
241362306a36Sopenharmony_ci	return BLK_STS_OK;
241462306a36Sopenharmony_ci}
241562306a36Sopenharmony_ci
241662306a36Sopenharmony_cistatic void nvme_tcp_map_queues(struct blk_mq_tag_set *set)
241762306a36Sopenharmony_ci{
241862306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(set->driver_data);
241962306a36Sopenharmony_ci
242062306a36Sopenharmony_ci	nvmf_map_queues(set, &ctrl->ctrl, ctrl->io_queues);
242162306a36Sopenharmony_ci}
242262306a36Sopenharmony_ci
242362306a36Sopenharmony_cistatic int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob)
242462306a36Sopenharmony_ci{
242562306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = hctx->driver_data;
242662306a36Sopenharmony_ci	struct sock *sk = queue->sock->sk;
242762306a36Sopenharmony_ci
242862306a36Sopenharmony_ci	if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
242962306a36Sopenharmony_ci		return 0;
243062306a36Sopenharmony_ci
243162306a36Sopenharmony_ci	set_bit(NVME_TCP_Q_POLLING, &queue->flags);
243262306a36Sopenharmony_ci	if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
243362306a36Sopenharmony_ci		sk_busy_loop(sk, true);
243462306a36Sopenharmony_ci	nvme_tcp_try_recv(queue);
243562306a36Sopenharmony_ci	clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
243662306a36Sopenharmony_ci	return queue->nr_cqe;
243762306a36Sopenharmony_ci}
243862306a36Sopenharmony_ci
243962306a36Sopenharmony_cistatic int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size)
244062306a36Sopenharmony_ci{
244162306a36Sopenharmony_ci	struct nvme_tcp_queue *queue = &to_tcp_ctrl(ctrl)->queues[0];
244262306a36Sopenharmony_ci	struct sockaddr_storage src_addr;
244362306a36Sopenharmony_ci	int ret, len;
244462306a36Sopenharmony_ci
244562306a36Sopenharmony_ci	len = nvmf_get_address(ctrl, buf, size);
244662306a36Sopenharmony_ci
244762306a36Sopenharmony_ci	mutex_lock(&queue->queue_lock);
244862306a36Sopenharmony_ci
244962306a36Sopenharmony_ci	if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
245062306a36Sopenharmony_ci		goto done;
245162306a36Sopenharmony_ci	ret = kernel_getsockname(queue->sock, (struct sockaddr *)&src_addr);
245262306a36Sopenharmony_ci	if (ret > 0) {
245362306a36Sopenharmony_ci		if (len > 0)
245462306a36Sopenharmony_ci			len--; /* strip trailing newline */
245562306a36Sopenharmony_ci		len += scnprintf(buf + len, size - len, "%ssrc_addr=%pISc\n",
245662306a36Sopenharmony_ci				(len) ? "," : "", &src_addr);
245762306a36Sopenharmony_ci	}
245862306a36Sopenharmony_cidone:
245962306a36Sopenharmony_ci	mutex_unlock(&queue->queue_lock);
246062306a36Sopenharmony_ci
246162306a36Sopenharmony_ci	return len;
246262306a36Sopenharmony_ci}
246362306a36Sopenharmony_ci
246462306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_tcp_mq_ops = {
246562306a36Sopenharmony_ci	.queue_rq	= nvme_tcp_queue_rq,
246662306a36Sopenharmony_ci	.commit_rqs	= nvme_tcp_commit_rqs,
246762306a36Sopenharmony_ci	.complete	= nvme_complete_rq,
246862306a36Sopenharmony_ci	.init_request	= nvme_tcp_init_request,
246962306a36Sopenharmony_ci	.exit_request	= nvme_tcp_exit_request,
247062306a36Sopenharmony_ci	.init_hctx	= nvme_tcp_init_hctx,
247162306a36Sopenharmony_ci	.timeout	= nvme_tcp_timeout,
247262306a36Sopenharmony_ci	.map_queues	= nvme_tcp_map_queues,
247362306a36Sopenharmony_ci	.poll		= nvme_tcp_poll,
247462306a36Sopenharmony_ci};
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_cistatic const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
247762306a36Sopenharmony_ci	.queue_rq	= nvme_tcp_queue_rq,
247862306a36Sopenharmony_ci	.complete	= nvme_complete_rq,
247962306a36Sopenharmony_ci	.init_request	= nvme_tcp_init_request,
248062306a36Sopenharmony_ci	.exit_request	= nvme_tcp_exit_request,
248162306a36Sopenharmony_ci	.init_hctx	= nvme_tcp_init_admin_hctx,
248262306a36Sopenharmony_ci	.timeout	= nvme_tcp_timeout,
248362306a36Sopenharmony_ci};
248462306a36Sopenharmony_ci
248562306a36Sopenharmony_cistatic const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
248662306a36Sopenharmony_ci	.name			= "tcp",
248762306a36Sopenharmony_ci	.module			= THIS_MODULE,
248862306a36Sopenharmony_ci	.flags			= NVME_F_FABRICS | NVME_F_BLOCKING,
248962306a36Sopenharmony_ci	.reg_read32		= nvmf_reg_read32,
249062306a36Sopenharmony_ci	.reg_read64		= nvmf_reg_read64,
249162306a36Sopenharmony_ci	.reg_write32		= nvmf_reg_write32,
249262306a36Sopenharmony_ci	.free_ctrl		= nvme_tcp_free_ctrl,
249362306a36Sopenharmony_ci	.submit_async_event	= nvme_tcp_submit_async_event,
249462306a36Sopenharmony_ci	.delete_ctrl		= nvme_tcp_delete_ctrl,
249562306a36Sopenharmony_ci	.get_address		= nvme_tcp_get_address,
249662306a36Sopenharmony_ci	.stop_ctrl		= nvme_tcp_stop_ctrl,
249762306a36Sopenharmony_ci};
249862306a36Sopenharmony_ci
249962306a36Sopenharmony_cistatic bool
250062306a36Sopenharmony_cinvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
250162306a36Sopenharmony_ci{
250262306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl;
250362306a36Sopenharmony_ci	bool found = false;
250462306a36Sopenharmony_ci
250562306a36Sopenharmony_ci	mutex_lock(&nvme_tcp_ctrl_mutex);
250662306a36Sopenharmony_ci	list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list) {
250762306a36Sopenharmony_ci		found = nvmf_ip_options_match(&ctrl->ctrl, opts);
250862306a36Sopenharmony_ci		if (found)
250962306a36Sopenharmony_ci			break;
251062306a36Sopenharmony_ci	}
251162306a36Sopenharmony_ci	mutex_unlock(&nvme_tcp_ctrl_mutex);
251262306a36Sopenharmony_ci
251362306a36Sopenharmony_ci	return found;
251462306a36Sopenharmony_ci}
251562306a36Sopenharmony_ci
251662306a36Sopenharmony_cistatic struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
251762306a36Sopenharmony_ci		struct nvmf_ctrl_options *opts)
251862306a36Sopenharmony_ci{
251962306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl;
252062306a36Sopenharmony_ci	int ret;
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_ci	ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
252362306a36Sopenharmony_ci	if (!ctrl)
252462306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
252562306a36Sopenharmony_ci
252662306a36Sopenharmony_ci	INIT_LIST_HEAD(&ctrl->list);
252762306a36Sopenharmony_ci	ctrl->ctrl.opts = opts;
252862306a36Sopenharmony_ci	ctrl->ctrl.queue_count = opts->nr_io_queues + opts->nr_write_queues +
252962306a36Sopenharmony_ci				opts->nr_poll_queues + 1;
253062306a36Sopenharmony_ci	ctrl->ctrl.sqsize = opts->queue_size - 1;
253162306a36Sopenharmony_ci	ctrl->ctrl.kato = opts->kato;
253262306a36Sopenharmony_ci
253362306a36Sopenharmony_ci	INIT_DELAYED_WORK(&ctrl->connect_work,
253462306a36Sopenharmony_ci			nvme_tcp_reconnect_ctrl_work);
253562306a36Sopenharmony_ci	INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work);
253662306a36Sopenharmony_ci	INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work);
253762306a36Sopenharmony_ci
253862306a36Sopenharmony_ci	if (!(opts->mask & NVMF_OPT_TRSVCID)) {
253962306a36Sopenharmony_ci		opts->trsvcid =
254062306a36Sopenharmony_ci			kstrdup(__stringify(NVME_TCP_DISC_PORT), GFP_KERNEL);
254162306a36Sopenharmony_ci		if (!opts->trsvcid) {
254262306a36Sopenharmony_ci			ret = -ENOMEM;
254362306a36Sopenharmony_ci			goto out_free_ctrl;
254462306a36Sopenharmony_ci		}
254562306a36Sopenharmony_ci		opts->mask |= NVMF_OPT_TRSVCID;
254662306a36Sopenharmony_ci	}
254762306a36Sopenharmony_ci
254862306a36Sopenharmony_ci	ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
254962306a36Sopenharmony_ci			opts->traddr, opts->trsvcid, &ctrl->addr);
255062306a36Sopenharmony_ci	if (ret) {
255162306a36Sopenharmony_ci		pr_err("malformed address passed: %s:%s\n",
255262306a36Sopenharmony_ci			opts->traddr, opts->trsvcid);
255362306a36Sopenharmony_ci		goto out_free_ctrl;
255462306a36Sopenharmony_ci	}
255562306a36Sopenharmony_ci
255662306a36Sopenharmony_ci	if (opts->mask & NVMF_OPT_HOST_TRADDR) {
255762306a36Sopenharmony_ci		ret = inet_pton_with_scope(&init_net, AF_UNSPEC,
255862306a36Sopenharmony_ci			opts->host_traddr, NULL, &ctrl->src_addr);
255962306a36Sopenharmony_ci		if (ret) {
256062306a36Sopenharmony_ci			pr_err("malformed src address passed: %s\n",
256162306a36Sopenharmony_ci			       opts->host_traddr);
256262306a36Sopenharmony_ci			goto out_free_ctrl;
256362306a36Sopenharmony_ci		}
256462306a36Sopenharmony_ci	}
256562306a36Sopenharmony_ci
256662306a36Sopenharmony_ci	if (opts->mask & NVMF_OPT_HOST_IFACE) {
256762306a36Sopenharmony_ci		if (!__dev_get_by_name(&init_net, opts->host_iface)) {
256862306a36Sopenharmony_ci			pr_err("invalid interface passed: %s\n",
256962306a36Sopenharmony_ci			       opts->host_iface);
257062306a36Sopenharmony_ci			ret = -ENODEV;
257162306a36Sopenharmony_ci			goto out_free_ctrl;
257262306a36Sopenharmony_ci		}
257362306a36Sopenharmony_ci	}
257462306a36Sopenharmony_ci
257562306a36Sopenharmony_ci	if (!opts->duplicate_connect && nvme_tcp_existing_controller(opts)) {
257662306a36Sopenharmony_ci		ret = -EALREADY;
257762306a36Sopenharmony_ci		goto out_free_ctrl;
257862306a36Sopenharmony_ci	}
257962306a36Sopenharmony_ci
258062306a36Sopenharmony_ci	ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
258162306a36Sopenharmony_ci				GFP_KERNEL);
258262306a36Sopenharmony_ci	if (!ctrl->queues) {
258362306a36Sopenharmony_ci		ret = -ENOMEM;
258462306a36Sopenharmony_ci		goto out_free_ctrl;
258562306a36Sopenharmony_ci	}
258662306a36Sopenharmony_ci
258762306a36Sopenharmony_ci	ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0);
258862306a36Sopenharmony_ci	if (ret)
258962306a36Sopenharmony_ci		goto out_kfree_queues;
259062306a36Sopenharmony_ci
259162306a36Sopenharmony_ci	if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
259262306a36Sopenharmony_ci		WARN_ON_ONCE(1);
259362306a36Sopenharmony_ci		ret = -EINTR;
259462306a36Sopenharmony_ci		goto out_uninit_ctrl;
259562306a36Sopenharmony_ci	}
259662306a36Sopenharmony_ci
259762306a36Sopenharmony_ci	ret = nvme_tcp_setup_ctrl(&ctrl->ctrl, true);
259862306a36Sopenharmony_ci	if (ret)
259962306a36Sopenharmony_ci		goto out_uninit_ctrl;
260062306a36Sopenharmony_ci
260162306a36Sopenharmony_ci	dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISp\n",
260262306a36Sopenharmony_ci		nvmf_ctrl_subsysnqn(&ctrl->ctrl), &ctrl->addr);
260362306a36Sopenharmony_ci
260462306a36Sopenharmony_ci	mutex_lock(&nvme_tcp_ctrl_mutex);
260562306a36Sopenharmony_ci	list_add_tail(&ctrl->list, &nvme_tcp_ctrl_list);
260662306a36Sopenharmony_ci	mutex_unlock(&nvme_tcp_ctrl_mutex);
260762306a36Sopenharmony_ci
260862306a36Sopenharmony_ci	return &ctrl->ctrl;
260962306a36Sopenharmony_ci
261062306a36Sopenharmony_ciout_uninit_ctrl:
261162306a36Sopenharmony_ci	nvme_uninit_ctrl(&ctrl->ctrl);
261262306a36Sopenharmony_ci	nvme_put_ctrl(&ctrl->ctrl);
261362306a36Sopenharmony_ci	if (ret > 0)
261462306a36Sopenharmony_ci		ret = -EIO;
261562306a36Sopenharmony_ci	return ERR_PTR(ret);
261662306a36Sopenharmony_ciout_kfree_queues:
261762306a36Sopenharmony_ci	kfree(ctrl->queues);
261862306a36Sopenharmony_ciout_free_ctrl:
261962306a36Sopenharmony_ci	kfree(ctrl);
262062306a36Sopenharmony_ci	return ERR_PTR(ret);
262162306a36Sopenharmony_ci}
262262306a36Sopenharmony_ci
262362306a36Sopenharmony_cistatic struct nvmf_transport_ops nvme_tcp_transport = {
262462306a36Sopenharmony_ci	.name		= "tcp",
262562306a36Sopenharmony_ci	.module		= THIS_MODULE,
262662306a36Sopenharmony_ci	.required_opts	= NVMF_OPT_TRADDR,
262762306a36Sopenharmony_ci	.allowed_opts	= NVMF_OPT_TRSVCID | NVMF_OPT_RECONNECT_DELAY |
262862306a36Sopenharmony_ci			  NVMF_OPT_HOST_TRADDR | NVMF_OPT_CTRL_LOSS_TMO |
262962306a36Sopenharmony_ci			  NVMF_OPT_HDR_DIGEST | NVMF_OPT_DATA_DIGEST |
263062306a36Sopenharmony_ci			  NVMF_OPT_NR_WRITE_QUEUES | NVMF_OPT_NR_POLL_QUEUES |
263162306a36Sopenharmony_ci			  NVMF_OPT_TOS | NVMF_OPT_HOST_IFACE,
263262306a36Sopenharmony_ci	.create_ctrl	= nvme_tcp_create_ctrl,
263362306a36Sopenharmony_ci};
263462306a36Sopenharmony_ci
263562306a36Sopenharmony_cistatic int __init nvme_tcp_init_module(void)
263662306a36Sopenharmony_ci{
263762306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
263862306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
263962306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
264062306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_tcp_rsp_pdu) != 24);
264162306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_tcp_r2t_pdu) != 24);
264262306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_tcp_icreq_pdu) != 128);
264362306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
264462306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
264562306a36Sopenharmony_ci
264662306a36Sopenharmony_ci	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
264762306a36Sopenharmony_ci			WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
264862306a36Sopenharmony_ci	if (!nvme_tcp_wq)
264962306a36Sopenharmony_ci		return -ENOMEM;
265062306a36Sopenharmony_ci
265162306a36Sopenharmony_ci	nvmf_register_transport(&nvme_tcp_transport);
265262306a36Sopenharmony_ci	return 0;
265362306a36Sopenharmony_ci}
265462306a36Sopenharmony_ci
265562306a36Sopenharmony_cistatic void __exit nvme_tcp_cleanup_module(void)
265662306a36Sopenharmony_ci{
265762306a36Sopenharmony_ci	struct nvme_tcp_ctrl *ctrl;
265862306a36Sopenharmony_ci
265962306a36Sopenharmony_ci	nvmf_unregister_transport(&nvme_tcp_transport);
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_ci	mutex_lock(&nvme_tcp_ctrl_mutex);
266262306a36Sopenharmony_ci	list_for_each_entry(ctrl, &nvme_tcp_ctrl_list, list)
266362306a36Sopenharmony_ci		nvme_delete_ctrl(&ctrl->ctrl);
266462306a36Sopenharmony_ci	mutex_unlock(&nvme_tcp_ctrl_mutex);
266562306a36Sopenharmony_ci	flush_workqueue(nvme_delete_wq);
266662306a36Sopenharmony_ci
266762306a36Sopenharmony_ci	destroy_workqueue(nvme_tcp_wq);
266862306a36Sopenharmony_ci}
266962306a36Sopenharmony_ci
267062306a36Sopenharmony_cimodule_init(nvme_tcp_init_module);
267162306a36Sopenharmony_cimodule_exit(nvme_tcp_cleanup_module);
267262306a36Sopenharmony_ci
267362306a36Sopenharmony_ciMODULE_LICENSE("GPL v2");
2674