162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci *   Copyright (C) 2017, Microsoft Corporation.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci *   Author(s): Long Li <longli@microsoft.com>
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci#include <linux/module.h>
862306a36Sopenharmony_ci#include <linux/highmem.h>
962306a36Sopenharmony_ci#include "smbdirect.h"
1062306a36Sopenharmony_ci#include "cifs_debug.h"
1162306a36Sopenharmony_ci#include "cifsproto.h"
1262306a36Sopenharmony_ci#include "smb2proto.h"
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_cistatic struct smbd_response *get_empty_queue_buffer(
1562306a36Sopenharmony_ci		struct smbd_connection *info);
1662306a36Sopenharmony_cistatic struct smbd_response *get_receive_buffer(
1762306a36Sopenharmony_ci		struct smbd_connection *info);
1862306a36Sopenharmony_cistatic void put_receive_buffer(
1962306a36Sopenharmony_ci		struct smbd_connection *info,
2062306a36Sopenharmony_ci		struct smbd_response *response);
2162306a36Sopenharmony_cistatic int allocate_receive_buffers(struct smbd_connection *info, int num_buf);
2262306a36Sopenharmony_cistatic void destroy_receive_buffers(struct smbd_connection *info);
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_cistatic void put_empty_packet(
2562306a36Sopenharmony_ci		struct smbd_connection *info, struct smbd_response *response);
2662306a36Sopenharmony_cistatic void enqueue_reassembly(
2762306a36Sopenharmony_ci		struct smbd_connection *info,
2862306a36Sopenharmony_ci		struct smbd_response *response, int data_length);
2962306a36Sopenharmony_cistatic struct smbd_response *_get_first_reassembly(
3062306a36Sopenharmony_ci		struct smbd_connection *info);
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_cistatic int smbd_post_recv(
3362306a36Sopenharmony_ci		struct smbd_connection *info,
3462306a36Sopenharmony_ci		struct smbd_response *response);
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_cistatic int smbd_post_send_empty(struct smbd_connection *info);
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_cistatic void destroy_mr_list(struct smbd_connection *info);
3962306a36Sopenharmony_cistatic int allocate_mr_list(struct smbd_connection *info);
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_cistruct smb_extract_to_rdma {
4262306a36Sopenharmony_ci	struct ib_sge		*sge;
4362306a36Sopenharmony_ci	unsigned int		nr_sge;
4462306a36Sopenharmony_ci	unsigned int		max_sge;
4562306a36Sopenharmony_ci	struct ib_device	*device;
4662306a36Sopenharmony_ci	u32			local_dma_lkey;
4762306a36Sopenharmony_ci	enum dma_data_direction	direction;
4862306a36Sopenharmony_ci};
4962306a36Sopenharmony_cistatic ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
5062306a36Sopenharmony_ci					struct smb_extract_to_rdma *rdma);
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci/* SMBD version number */
5362306a36Sopenharmony_ci#define SMBD_V1	0x0100
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci/* Port numbers for SMBD transport */
5662306a36Sopenharmony_ci#define SMB_PORT	445
5762306a36Sopenharmony_ci#define SMBD_PORT	5445
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_ci/* Address lookup and resolve timeout in ms */
6062306a36Sopenharmony_ci#define RDMA_RESOLVE_TIMEOUT	5000
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/* SMBD negotiation timeout in seconds */
6362306a36Sopenharmony_ci#define SMBD_NEGOTIATE_TIMEOUT	120
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci/* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */
6662306a36Sopenharmony_ci#define SMBD_MIN_RECEIVE_SIZE		128
6762306a36Sopenharmony_ci#define SMBD_MIN_FRAGMENTED_SIZE	131072
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci/*
7062306a36Sopenharmony_ci * Default maximum number of RDMA read/write outstanding on this connection
7162306a36Sopenharmony_ci * This value is possibly decreased during QP creation on hardware limit
7262306a36Sopenharmony_ci */
7362306a36Sopenharmony_ci#define SMBD_CM_RESPONDER_RESOURCES	32
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci/* Maximum number of retries on data transfer operations */
7662306a36Sopenharmony_ci#define SMBD_CM_RETRY			6
7762306a36Sopenharmony_ci/* No need to retry on Receiver Not Ready since SMBD manages credits */
7862306a36Sopenharmony_ci#define SMBD_CM_RNR_RETRY		0
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci/*
8162306a36Sopenharmony_ci * User configurable initial values per SMBD transport connection
8262306a36Sopenharmony_ci * as defined in [MS-SMBD] 3.1.1.1
8362306a36Sopenharmony_ci * Those may change after a SMBD negotiation
8462306a36Sopenharmony_ci */
8562306a36Sopenharmony_ci/* The local peer's maximum number of credits to grant to the peer */
8662306a36Sopenharmony_ciint smbd_receive_credit_max = 255;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci/* The remote peer's credit request of local peer */
8962306a36Sopenharmony_ciint smbd_send_credit_target = 255;
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci/* The maximum single message size can be sent to remote peer */
9262306a36Sopenharmony_ciint smbd_max_send_size = 1364;
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_ci/*  The maximum fragmented upper-layer payload receive size supported */
9562306a36Sopenharmony_ciint smbd_max_fragmented_recv_size = 1024 * 1024;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci/*  The maximum single-message size which can be received */
9862306a36Sopenharmony_ciint smbd_max_receive_size = 1364;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci/* The timeout to initiate send of a keepalive message on idle */
10162306a36Sopenharmony_ciint smbd_keep_alive_interval = 120;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci/*
10462306a36Sopenharmony_ci * User configurable initial values for RDMA transport
10562306a36Sopenharmony_ci * The actual values used may be lower and are limited to hardware capabilities
10662306a36Sopenharmony_ci */
10762306a36Sopenharmony_ci/* Default maximum number of pages in a single RDMA write/read */
10862306a36Sopenharmony_ciint smbd_max_frmr_depth = 2048;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci/* If payload is less than this byte, use RDMA send/recv not read/write */
11162306a36Sopenharmony_ciint rdma_readwrite_threshold = 4096;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci/* Transport logging functions
11462306a36Sopenharmony_ci * Logging are defined as classes. They can be OR'ed to define the actual
11562306a36Sopenharmony_ci * logging level via module parameter smbd_logging_class
11662306a36Sopenharmony_ci * e.g. cifs.smbd_logging_class=0xa0 will log all log_rdma_recv() and
11762306a36Sopenharmony_ci * log_rdma_event()
11862306a36Sopenharmony_ci */
11962306a36Sopenharmony_ci#define LOG_OUTGOING			0x1
12062306a36Sopenharmony_ci#define LOG_INCOMING			0x2
12162306a36Sopenharmony_ci#define LOG_READ			0x4
12262306a36Sopenharmony_ci#define LOG_WRITE			0x8
12362306a36Sopenharmony_ci#define LOG_RDMA_SEND			0x10
12462306a36Sopenharmony_ci#define LOG_RDMA_RECV			0x20
12562306a36Sopenharmony_ci#define LOG_KEEP_ALIVE			0x40
12662306a36Sopenharmony_ci#define LOG_RDMA_EVENT			0x80
12762306a36Sopenharmony_ci#define LOG_RDMA_MR			0x100
12862306a36Sopenharmony_cistatic unsigned int smbd_logging_class;
12962306a36Sopenharmony_cimodule_param(smbd_logging_class, uint, 0644);
13062306a36Sopenharmony_ciMODULE_PARM_DESC(smbd_logging_class,
13162306a36Sopenharmony_ci	"Logging class for SMBD transport 0x0 to 0x100");
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci#define ERR		0x0
13462306a36Sopenharmony_ci#define INFO		0x1
13562306a36Sopenharmony_cistatic unsigned int smbd_logging_level = ERR;
13662306a36Sopenharmony_cimodule_param(smbd_logging_level, uint, 0644);
13762306a36Sopenharmony_ciMODULE_PARM_DESC(smbd_logging_level,
13862306a36Sopenharmony_ci	"Logging level for SMBD transport, 0 (default): error, 1: info");
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_ci#define log_rdma(level, class, fmt, args...)				\
14162306a36Sopenharmony_cido {									\
14262306a36Sopenharmony_ci	if (level <= smbd_logging_level || class & smbd_logging_class)	\
14362306a36Sopenharmony_ci		cifs_dbg(VFS, "%s:%d " fmt, __func__, __LINE__, ##args);\
14462306a36Sopenharmony_ci} while (0)
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci#define log_outgoing(level, fmt, args...) \
14762306a36Sopenharmony_ci		log_rdma(level, LOG_OUTGOING, fmt, ##args)
14862306a36Sopenharmony_ci#define log_incoming(level, fmt, args...) \
14962306a36Sopenharmony_ci		log_rdma(level, LOG_INCOMING, fmt, ##args)
15062306a36Sopenharmony_ci#define log_read(level, fmt, args...)	log_rdma(level, LOG_READ, fmt, ##args)
15162306a36Sopenharmony_ci#define log_write(level, fmt, args...)	log_rdma(level, LOG_WRITE, fmt, ##args)
15262306a36Sopenharmony_ci#define log_rdma_send(level, fmt, args...) \
15362306a36Sopenharmony_ci		log_rdma(level, LOG_RDMA_SEND, fmt, ##args)
15462306a36Sopenharmony_ci#define log_rdma_recv(level, fmt, args...) \
15562306a36Sopenharmony_ci		log_rdma(level, LOG_RDMA_RECV, fmt, ##args)
15662306a36Sopenharmony_ci#define log_keep_alive(level, fmt, args...) \
15762306a36Sopenharmony_ci		log_rdma(level, LOG_KEEP_ALIVE, fmt, ##args)
15862306a36Sopenharmony_ci#define log_rdma_event(level, fmt, args...) \
15962306a36Sopenharmony_ci		log_rdma(level, LOG_RDMA_EVENT, fmt, ##args)
16062306a36Sopenharmony_ci#define log_rdma_mr(level, fmt, args...) \
16162306a36Sopenharmony_ci		log_rdma(level, LOG_RDMA_MR, fmt, ##args)
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_cistatic void smbd_disconnect_rdma_work(struct work_struct *work)
16462306a36Sopenharmony_ci{
16562306a36Sopenharmony_ci	struct smbd_connection *info =
16662306a36Sopenharmony_ci		container_of(work, struct smbd_connection, disconnect_work);
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	if (info->transport_status == SMBD_CONNECTED) {
16962306a36Sopenharmony_ci		info->transport_status = SMBD_DISCONNECTING;
17062306a36Sopenharmony_ci		rdma_disconnect(info->id);
17162306a36Sopenharmony_ci	}
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_cistatic void smbd_disconnect_rdma_connection(struct smbd_connection *info)
17562306a36Sopenharmony_ci{
17662306a36Sopenharmony_ci	queue_work(info->workqueue, &info->disconnect_work);
17762306a36Sopenharmony_ci}
17862306a36Sopenharmony_ci
17962306a36Sopenharmony_ci/* Upcall from RDMA CM */
18062306a36Sopenharmony_cistatic int smbd_conn_upcall(
18162306a36Sopenharmony_ci		struct rdma_cm_id *id, struct rdma_cm_event *event)
18262306a36Sopenharmony_ci{
18362306a36Sopenharmony_ci	struct smbd_connection *info = id->context;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	log_rdma_event(INFO, "event=%d status=%d\n",
18662306a36Sopenharmony_ci		event->event, event->status);
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	switch (event->event) {
18962306a36Sopenharmony_ci	case RDMA_CM_EVENT_ADDR_RESOLVED:
19062306a36Sopenharmony_ci	case RDMA_CM_EVENT_ROUTE_RESOLVED:
19162306a36Sopenharmony_ci		info->ri_rc = 0;
19262306a36Sopenharmony_ci		complete(&info->ri_done);
19362306a36Sopenharmony_ci		break;
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci	case RDMA_CM_EVENT_ADDR_ERROR:
19662306a36Sopenharmony_ci		info->ri_rc = -EHOSTUNREACH;
19762306a36Sopenharmony_ci		complete(&info->ri_done);
19862306a36Sopenharmony_ci		break;
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	case RDMA_CM_EVENT_ROUTE_ERROR:
20162306a36Sopenharmony_ci		info->ri_rc = -ENETUNREACH;
20262306a36Sopenharmony_ci		complete(&info->ri_done);
20362306a36Sopenharmony_ci		break;
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	case RDMA_CM_EVENT_ESTABLISHED:
20662306a36Sopenharmony_ci		log_rdma_event(INFO, "connected event=%d\n", event->event);
20762306a36Sopenharmony_ci		info->transport_status = SMBD_CONNECTED;
20862306a36Sopenharmony_ci		wake_up_interruptible(&info->conn_wait);
20962306a36Sopenharmony_ci		break;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	case RDMA_CM_EVENT_CONNECT_ERROR:
21262306a36Sopenharmony_ci	case RDMA_CM_EVENT_UNREACHABLE:
21362306a36Sopenharmony_ci	case RDMA_CM_EVENT_REJECTED:
21462306a36Sopenharmony_ci		log_rdma_event(INFO, "connecting failed event=%d\n", event->event);
21562306a36Sopenharmony_ci		info->transport_status = SMBD_DISCONNECTED;
21662306a36Sopenharmony_ci		wake_up_interruptible(&info->conn_wait);
21762306a36Sopenharmony_ci		break;
21862306a36Sopenharmony_ci
21962306a36Sopenharmony_ci	case RDMA_CM_EVENT_DEVICE_REMOVAL:
22062306a36Sopenharmony_ci	case RDMA_CM_EVENT_DISCONNECTED:
22162306a36Sopenharmony_ci		/* This happenes when we fail the negotiation */
22262306a36Sopenharmony_ci		if (info->transport_status == SMBD_NEGOTIATE_FAILED) {
22362306a36Sopenharmony_ci			info->transport_status = SMBD_DISCONNECTED;
22462306a36Sopenharmony_ci			wake_up(&info->conn_wait);
22562306a36Sopenharmony_ci			break;
22662306a36Sopenharmony_ci		}
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci		info->transport_status = SMBD_DISCONNECTED;
22962306a36Sopenharmony_ci		wake_up_interruptible(&info->disconn_wait);
23062306a36Sopenharmony_ci		wake_up_interruptible(&info->wait_reassembly_queue);
23162306a36Sopenharmony_ci		wake_up_interruptible_all(&info->wait_send_queue);
23262306a36Sopenharmony_ci		break;
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci	default:
23562306a36Sopenharmony_ci		break;
23662306a36Sopenharmony_ci	}
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	return 0;
23962306a36Sopenharmony_ci}
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci/* Upcall from RDMA QP */
24262306a36Sopenharmony_cistatic void
24362306a36Sopenharmony_cismbd_qp_async_error_upcall(struct ib_event *event, void *context)
24462306a36Sopenharmony_ci{
24562306a36Sopenharmony_ci	struct smbd_connection *info = context;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	log_rdma_event(ERR, "%s on device %s info %p\n",
24862306a36Sopenharmony_ci		ib_event_msg(event->event), event->device->name, info);
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	switch (event->event) {
25162306a36Sopenharmony_ci	case IB_EVENT_CQ_ERR:
25262306a36Sopenharmony_ci	case IB_EVENT_QP_FATAL:
25362306a36Sopenharmony_ci		smbd_disconnect_rdma_connection(info);
25462306a36Sopenharmony_ci		break;
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	default:
25762306a36Sopenharmony_ci		break;
25862306a36Sopenharmony_ci	}
25962306a36Sopenharmony_ci}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_cistatic inline void *smbd_request_payload(struct smbd_request *request)
26262306a36Sopenharmony_ci{
26362306a36Sopenharmony_ci	return (void *)request->packet;
26462306a36Sopenharmony_ci}
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_cistatic inline void *smbd_response_payload(struct smbd_response *response)
26762306a36Sopenharmony_ci{
26862306a36Sopenharmony_ci	return (void *)response->packet;
26962306a36Sopenharmony_ci}
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci/* Called when a RDMA send is done */
27262306a36Sopenharmony_cistatic void send_done(struct ib_cq *cq, struct ib_wc *wc)
27362306a36Sopenharmony_ci{
27462306a36Sopenharmony_ci	int i;
27562306a36Sopenharmony_ci	struct smbd_request *request =
27662306a36Sopenharmony_ci		container_of(wc->wr_cqe, struct smbd_request, cqe);
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci	log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n",
27962306a36Sopenharmony_ci		request, wc->status);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
28262306a36Sopenharmony_ci		log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n",
28362306a36Sopenharmony_ci			wc->status, wc->opcode);
28462306a36Sopenharmony_ci		smbd_disconnect_rdma_connection(request->info);
28562306a36Sopenharmony_ci	}
28662306a36Sopenharmony_ci
28762306a36Sopenharmony_ci	for (i = 0; i < request->num_sge; i++)
28862306a36Sopenharmony_ci		ib_dma_unmap_single(request->info->id->device,
28962306a36Sopenharmony_ci			request->sge[i].addr,
29062306a36Sopenharmony_ci			request->sge[i].length,
29162306a36Sopenharmony_ci			DMA_TO_DEVICE);
29262306a36Sopenharmony_ci
29362306a36Sopenharmony_ci	if (atomic_dec_and_test(&request->info->send_pending))
29462306a36Sopenharmony_ci		wake_up(&request->info->wait_send_pending);
29562306a36Sopenharmony_ci
29662306a36Sopenharmony_ci	wake_up(&request->info->wait_post_send);
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	mempool_free(request, request->info->request_mempool);
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_cistatic void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp)
30262306a36Sopenharmony_ci{
30362306a36Sopenharmony_ci	log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n",
30462306a36Sopenharmony_ci		       resp->min_version, resp->max_version,
30562306a36Sopenharmony_ci		       resp->negotiated_version, resp->credits_requested,
30662306a36Sopenharmony_ci		       resp->credits_granted, resp->status,
30762306a36Sopenharmony_ci		       resp->max_readwrite_size, resp->preferred_send_size,
30862306a36Sopenharmony_ci		       resp->max_receive_size, resp->max_fragmented_size);
30962306a36Sopenharmony_ci}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci/*
31262306a36Sopenharmony_ci * Process a negotiation response message, according to [MS-SMBD]3.1.5.7
31362306a36Sopenharmony_ci * response, packet_length: the negotiation response message
31462306a36Sopenharmony_ci * return value: true if negotiation is a success, false if failed
31562306a36Sopenharmony_ci */
31662306a36Sopenharmony_cistatic bool process_negotiation_response(
31762306a36Sopenharmony_ci		struct smbd_response *response, int packet_length)
31862306a36Sopenharmony_ci{
31962306a36Sopenharmony_ci	struct smbd_connection *info = response->info;
32062306a36Sopenharmony_ci	struct smbd_negotiate_resp *packet = smbd_response_payload(response);
32162306a36Sopenharmony_ci
32262306a36Sopenharmony_ci	if (packet_length < sizeof(struct smbd_negotiate_resp)) {
32362306a36Sopenharmony_ci		log_rdma_event(ERR,
32462306a36Sopenharmony_ci			"error: packet_length=%d\n", packet_length);
32562306a36Sopenharmony_ci		return false;
32662306a36Sopenharmony_ci	}
32762306a36Sopenharmony_ci
32862306a36Sopenharmony_ci	if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) {
32962306a36Sopenharmony_ci		log_rdma_event(ERR, "error: negotiated_version=%x\n",
33062306a36Sopenharmony_ci			le16_to_cpu(packet->negotiated_version));
33162306a36Sopenharmony_ci		return false;
33262306a36Sopenharmony_ci	}
33362306a36Sopenharmony_ci	info->protocol = le16_to_cpu(packet->negotiated_version);
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	if (packet->credits_requested == 0) {
33662306a36Sopenharmony_ci		log_rdma_event(ERR, "error: credits_requested==0\n");
33762306a36Sopenharmony_ci		return false;
33862306a36Sopenharmony_ci	}
33962306a36Sopenharmony_ci	info->receive_credit_target = le16_to_cpu(packet->credits_requested);
34062306a36Sopenharmony_ci
34162306a36Sopenharmony_ci	if (packet->credits_granted == 0) {
34262306a36Sopenharmony_ci		log_rdma_event(ERR, "error: credits_granted==0\n");
34362306a36Sopenharmony_ci		return false;
34462306a36Sopenharmony_ci	}
34562306a36Sopenharmony_ci	atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted));
34662306a36Sopenharmony_ci
34762306a36Sopenharmony_ci	atomic_set(&info->receive_credits, 0);
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci	if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) {
35062306a36Sopenharmony_ci		log_rdma_event(ERR, "error: preferred_send_size=%d\n",
35162306a36Sopenharmony_ci			le32_to_cpu(packet->preferred_send_size));
35262306a36Sopenharmony_ci		return false;
35362306a36Sopenharmony_ci	}
35462306a36Sopenharmony_ci	info->max_receive_size = le32_to_cpu(packet->preferred_send_size);
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) {
35762306a36Sopenharmony_ci		log_rdma_event(ERR, "error: max_receive_size=%d\n",
35862306a36Sopenharmony_ci			le32_to_cpu(packet->max_receive_size));
35962306a36Sopenharmony_ci		return false;
36062306a36Sopenharmony_ci	}
36162306a36Sopenharmony_ci	info->max_send_size = min_t(int, info->max_send_size,
36262306a36Sopenharmony_ci					le32_to_cpu(packet->max_receive_size));
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci	if (le32_to_cpu(packet->max_fragmented_size) <
36562306a36Sopenharmony_ci			SMBD_MIN_FRAGMENTED_SIZE) {
36662306a36Sopenharmony_ci		log_rdma_event(ERR, "error: max_fragmented_size=%d\n",
36762306a36Sopenharmony_ci			le32_to_cpu(packet->max_fragmented_size));
36862306a36Sopenharmony_ci		return false;
36962306a36Sopenharmony_ci	}
37062306a36Sopenharmony_ci	info->max_fragmented_send_size =
37162306a36Sopenharmony_ci		le32_to_cpu(packet->max_fragmented_size);
37262306a36Sopenharmony_ci	info->rdma_readwrite_threshold =
37362306a36Sopenharmony_ci		rdma_readwrite_threshold > info->max_fragmented_send_size ?
37462306a36Sopenharmony_ci		info->max_fragmented_send_size :
37562306a36Sopenharmony_ci		rdma_readwrite_threshold;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	info->max_readwrite_size = min_t(u32,
37962306a36Sopenharmony_ci			le32_to_cpu(packet->max_readwrite_size),
38062306a36Sopenharmony_ci			info->max_frmr_depth * PAGE_SIZE);
38162306a36Sopenharmony_ci	info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE;
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	return true;
38462306a36Sopenharmony_ci}
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_cistatic void smbd_post_send_credits(struct work_struct *work)
38762306a36Sopenharmony_ci{
38862306a36Sopenharmony_ci	int ret = 0;
38962306a36Sopenharmony_ci	int use_receive_queue = 1;
39062306a36Sopenharmony_ci	int rc;
39162306a36Sopenharmony_ci	struct smbd_response *response;
39262306a36Sopenharmony_ci	struct smbd_connection *info =
39362306a36Sopenharmony_ci		container_of(work, struct smbd_connection,
39462306a36Sopenharmony_ci			post_send_credits_work);
39562306a36Sopenharmony_ci
39662306a36Sopenharmony_ci	if (info->transport_status != SMBD_CONNECTED) {
39762306a36Sopenharmony_ci		wake_up(&info->wait_receive_queues);
39862306a36Sopenharmony_ci		return;
39962306a36Sopenharmony_ci	}
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	if (info->receive_credit_target >
40262306a36Sopenharmony_ci		atomic_read(&info->receive_credits)) {
40362306a36Sopenharmony_ci		while (true) {
40462306a36Sopenharmony_ci			if (use_receive_queue)
40562306a36Sopenharmony_ci				response = get_receive_buffer(info);
40662306a36Sopenharmony_ci			else
40762306a36Sopenharmony_ci				response = get_empty_queue_buffer(info);
40862306a36Sopenharmony_ci			if (!response) {
40962306a36Sopenharmony_ci				/* now switch to emtpy packet queue */
41062306a36Sopenharmony_ci				if (use_receive_queue) {
41162306a36Sopenharmony_ci					use_receive_queue = 0;
41262306a36Sopenharmony_ci					continue;
41362306a36Sopenharmony_ci				} else
41462306a36Sopenharmony_ci					break;
41562306a36Sopenharmony_ci			}
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci			response->type = SMBD_TRANSFER_DATA;
41862306a36Sopenharmony_ci			response->first_segment = false;
41962306a36Sopenharmony_ci			rc = smbd_post_recv(info, response);
42062306a36Sopenharmony_ci			if (rc) {
42162306a36Sopenharmony_ci				log_rdma_recv(ERR,
42262306a36Sopenharmony_ci					"post_recv failed rc=%d\n", rc);
42362306a36Sopenharmony_ci				put_receive_buffer(info, response);
42462306a36Sopenharmony_ci				break;
42562306a36Sopenharmony_ci			}
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci			ret++;
42862306a36Sopenharmony_ci		}
42962306a36Sopenharmony_ci	}
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci	spin_lock(&info->lock_new_credits_offered);
43262306a36Sopenharmony_ci	info->new_credits_offered += ret;
43362306a36Sopenharmony_ci	spin_unlock(&info->lock_new_credits_offered);
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci	/* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */
43662306a36Sopenharmony_ci	info->send_immediate = true;
43762306a36Sopenharmony_ci	if (atomic_read(&info->receive_credits) <
43862306a36Sopenharmony_ci		info->receive_credit_target - 1) {
43962306a36Sopenharmony_ci		if (info->keep_alive_requested == KEEP_ALIVE_PENDING ||
44062306a36Sopenharmony_ci		    info->send_immediate) {
44162306a36Sopenharmony_ci			log_keep_alive(INFO, "send an empty message\n");
44262306a36Sopenharmony_ci			smbd_post_send_empty(info);
44362306a36Sopenharmony_ci		}
44462306a36Sopenharmony_ci	}
44562306a36Sopenharmony_ci}
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci/* Called from softirq, when recv is done */
44862306a36Sopenharmony_cistatic void recv_done(struct ib_cq *cq, struct ib_wc *wc)
44962306a36Sopenharmony_ci{
45062306a36Sopenharmony_ci	struct smbd_data_transfer *data_transfer;
45162306a36Sopenharmony_ci	struct smbd_response *response =
45262306a36Sopenharmony_ci		container_of(wc->wr_cqe, struct smbd_response, cqe);
45362306a36Sopenharmony_ci	struct smbd_connection *info = response->info;
45462306a36Sopenharmony_ci	int data_length = 0;
45562306a36Sopenharmony_ci
45662306a36Sopenharmony_ci	log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n",
45762306a36Sopenharmony_ci		      response, response->type, wc->status, wc->opcode,
45862306a36Sopenharmony_ci		      wc->byte_len, wc->pkey_index);
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) {
46162306a36Sopenharmony_ci		log_rdma_recv(INFO, "wc->status=%d opcode=%d\n",
46262306a36Sopenharmony_ci			wc->status, wc->opcode);
46362306a36Sopenharmony_ci		smbd_disconnect_rdma_connection(info);
46462306a36Sopenharmony_ci		goto error;
46562306a36Sopenharmony_ci	}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	ib_dma_sync_single_for_cpu(
46862306a36Sopenharmony_ci		wc->qp->device,
46962306a36Sopenharmony_ci		response->sge.addr,
47062306a36Sopenharmony_ci		response->sge.length,
47162306a36Sopenharmony_ci		DMA_FROM_DEVICE);
47262306a36Sopenharmony_ci
47362306a36Sopenharmony_ci	switch (response->type) {
47462306a36Sopenharmony_ci	/* SMBD negotiation response */
47562306a36Sopenharmony_ci	case SMBD_NEGOTIATE_RESP:
47662306a36Sopenharmony_ci		dump_smbd_negotiate_resp(smbd_response_payload(response));
47762306a36Sopenharmony_ci		info->full_packet_received = true;
47862306a36Sopenharmony_ci		info->negotiate_done =
47962306a36Sopenharmony_ci			process_negotiation_response(response, wc->byte_len);
48062306a36Sopenharmony_ci		complete(&info->negotiate_completion);
48162306a36Sopenharmony_ci		break;
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	/* SMBD data transfer packet */
48462306a36Sopenharmony_ci	case SMBD_TRANSFER_DATA:
48562306a36Sopenharmony_ci		data_transfer = smbd_response_payload(response);
48662306a36Sopenharmony_ci		data_length = le32_to_cpu(data_transfer->data_length);
48762306a36Sopenharmony_ci
48862306a36Sopenharmony_ci		/*
48962306a36Sopenharmony_ci		 * If this is a packet with data playload place the data in
49062306a36Sopenharmony_ci		 * reassembly queue and wake up the reading thread
49162306a36Sopenharmony_ci		 */
49262306a36Sopenharmony_ci		if (data_length) {
49362306a36Sopenharmony_ci			if (info->full_packet_received)
49462306a36Sopenharmony_ci				response->first_segment = true;
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci			if (le32_to_cpu(data_transfer->remaining_data_length))
49762306a36Sopenharmony_ci				info->full_packet_received = false;
49862306a36Sopenharmony_ci			else
49962306a36Sopenharmony_ci				info->full_packet_received = true;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci			enqueue_reassembly(
50262306a36Sopenharmony_ci				info,
50362306a36Sopenharmony_ci				response,
50462306a36Sopenharmony_ci				data_length);
50562306a36Sopenharmony_ci		} else
50662306a36Sopenharmony_ci			put_empty_packet(info, response);
50762306a36Sopenharmony_ci
50862306a36Sopenharmony_ci		if (data_length)
50962306a36Sopenharmony_ci			wake_up_interruptible(&info->wait_reassembly_queue);
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci		atomic_dec(&info->receive_credits);
51262306a36Sopenharmony_ci		info->receive_credit_target =
51362306a36Sopenharmony_ci			le16_to_cpu(data_transfer->credits_requested);
51462306a36Sopenharmony_ci		if (le16_to_cpu(data_transfer->credits_granted)) {
51562306a36Sopenharmony_ci			atomic_add(le16_to_cpu(data_transfer->credits_granted),
51662306a36Sopenharmony_ci				&info->send_credits);
51762306a36Sopenharmony_ci			/*
51862306a36Sopenharmony_ci			 * We have new send credits granted from remote peer
51962306a36Sopenharmony_ci			 * If any sender is waiting for credits, unblock it
52062306a36Sopenharmony_ci			 */
52162306a36Sopenharmony_ci			wake_up_interruptible(&info->wait_send_queue);
52262306a36Sopenharmony_ci		}
52362306a36Sopenharmony_ci
52462306a36Sopenharmony_ci		log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n",
52562306a36Sopenharmony_ci			     le16_to_cpu(data_transfer->flags),
52662306a36Sopenharmony_ci			     le32_to_cpu(data_transfer->data_offset),
52762306a36Sopenharmony_ci			     le32_to_cpu(data_transfer->data_length),
52862306a36Sopenharmony_ci			     le32_to_cpu(data_transfer->remaining_data_length));
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci		/* Send a KEEP_ALIVE response right away if requested */
53162306a36Sopenharmony_ci		info->keep_alive_requested = KEEP_ALIVE_NONE;
53262306a36Sopenharmony_ci		if (le16_to_cpu(data_transfer->flags) &
53362306a36Sopenharmony_ci				SMB_DIRECT_RESPONSE_REQUESTED) {
53462306a36Sopenharmony_ci			info->keep_alive_requested = KEEP_ALIVE_PENDING;
53562306a36Sopenharmony_ci		}
53662306a36Sopenharmony_ci
53762306a36Sopenharmony_ci		return;
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	default:
54062306a36Sopenharmony_ci		log_rdma_recv(ERR,
54162306a36Sopenharmony_ci			"unexpected response type=%d\n", response->type);
54262306a36Sopenharmony_ci	}
54362306a36Sopenharmony_ci
54462306a36Sopenharmony_cierror:
54562306a36Sopenharmony_ci	put_receive_buffer(info, response);
54662306a36Sopenharmony_ci}
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_cistatic struct rdma_cm_id *smbd_create_id(
54962306a36Sopenharmony_ci		struct smbd_connection *info,
55062306a36Sopenharmony_ci		struct sockaddr *dstaddr, int port)
55162306a36Sopenharmony_ci{
55262306a36Sopenharmony_ci	struct rdma_cm_id *id;
55362306a36Sopenharmony_ci	int rc;
55462306a36Sopenharmony_ci	__be16 *sport;
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci	id = rdma_create_id(&init_net, smbd_conn_upcall, info,
55762306a36Sopenharmony_ci		RDMA_PS_TCP, IB_QPT_RC);
55862306a36Sopenharmony_ci	if (IS_ERR(id)) {
55962306a36Sopenharmony_ci		rc = PTR_ERR(id);
56062306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_create_id() failed %i\n", rc);
56162306a36Sopenharmony_ci		return id;
56262306a36Sopenharmony_ci	}
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	if (dstaddr->sa_family == AF_INET6)
56562306a36Sopenharmony_ci		sport = &((struct sockaddr_in6 *)dstaddr)->sin6_port;
56662306a36Sopenharmony_ci	else
56762306a36Sopenharmony_ci		sport = &((struct sockaddr_in *)dstaddr)->sin_port;
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	*sport = htons(port);
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	init_completion(&info->ri_done);
57262306a36Sopenharmony_ci	info->ri_rc = -ETIMEDOUT;
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr,
57562306a36Sopenharmony_ci		RDMA_RESOLVE_TIMEOUT);
57662306a36Sopenharmony_ci	if (rc) {
57762306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc);
57862306a36Sopenharmony_ci		goto out;
57962306a36Sopenharmony_ci	}
58062306a36Sopenharmony_ci	rc = wait_for_completion_interruptible_timeout(
58162306a36Sopenharmony_ci		&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
58262306a36Sopenharmony_ci	/* e.g. if interrupted returns -ERESTARTSYS */
58362306a36Sopenharmony_ci	if (rc < 0) {
58462306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc);
58562306a36Sopenharmony_ci		goto out;
58662306a36Sopenharmony_ci	}
58762306a36Sopenharmony_ci	rc = info->ri_rc;
58862306a36Sopenharmony_ci	if (rc) {
58962306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc);
59062306a36Sopenharmony_ci		goto out;
59162306a36Sopenharmony_ci	}
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	info->ri_rc = -ETIMEDOUT;
59462306a36Sopenharmony_ci	rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT);
59562306a36Sopenharmony_ci	if (rc) {
59662306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc);
59762306a36Sopenharmony_ci		goto out;
59862306a36Sopenharmony_ci	}
59962306a36Sopenharmony_ci	rc = wait_for_completion_interruptible_timeout(
60062306a36Sopenharmony_ci		&info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT));
60162306a36Sopenharmony_ci	/* e.g. if interrupted returns -ERESTARTSYS */
60262306a36Sopenharmony_ci	if (rc < 0)  {
60362306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc);
60462306a36Sopenharmony_ci		goto out;
60562306a36Sopenharmony_ci	}
60662306a36Sopenharmony_ci	rc = info->ri_rc;
60762306a36Sopenharmony_ci	if (rc) {
60862306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc);
60962306a36Sopenharmony_ci		goto out;
61062306a36Sopenharmony_ci	}
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	return id;
61362306a36Sopenharmony_ci
61462306a36Sopenharmony_ciout:
61562306a36Sopenharmony_ci	rdma_destroy_id(id);
61662306a36Sopenharmony_ci	return ERR_PTR(rc);
61762306a36Sopenharmony_ci}
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci/*
62062306a36Sopenharmony_ci * Test if FRWR (Fast Registration Work Requests) is supported on the device
62162306a36Sopenharmony_ci * This implementation requries FRWR on RDMA read/write
62262306a36Sopenharmony_ci * return value: true if it is supported
62362306a36Sopenharmony_ci */
62462306a36Sopenharmony_cistatic bool frwr_is_supported(struct ib_device_attr *attrs)
62562306a36Sopenharmony_ci{
62662306a36Sopenharmony_ci	if (!(attrs->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
62762306a36Sopenharmony_ci		return false;
62862306a36Sopenharmony_ci	if (attrs->max_fast_reg_page_list_len == 0)
62962306a36Sopenharmony_ci		return false;
63062306a36Sopenharmony_ci	return true;
63162306a36Sopenharmony_ci}
63262306a36Sopenharmony_ci
63362306a36Sopenharmony_cistatic int smbd_ia_open(
63462306a36Sopenharmony_ci		struct smbd_connection *info,
63562306a36Sopenharmony_ci		struct sockaddr *dstaddr, int port)
63662306a36Sopenharmony_ci{
63762306a36Sopenharmony_ci	int rc;
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci	info->id = smbd_create_id(info, dstaddr, port);
64062306a36Sopenharmony_ci	if (IS_ERR(info->id)) {
64162306a36Sopenharmony_ci		rc = PTR_ERR(info->id);
64262306a36Sopenharmony_ci		goto out1;
64362306a36Sopenharmony_ci	}
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	if (!frwr_is_supported(&info->id->device->attrs)) {
64662306a36Sopenharmony_ci		log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n");
64762306a36Sopenharmony_ci		log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n",
64862306a36Sopenharmony_ci			       info->id->device->attrs.device_cap_flags,
64962306a36Sopenharmony_ci			       info->id->device->attrs.max_fast_reg_page_list_len);
65062306a36Sopenharmony_ci		rc = -EPROTONOSUPPORT;
65162306a36Sopenharmony_ci		goto out2;
65262306a36Sopenharmony_ci	}
65362306a36Sopenharmony_ci	info->max_frmr_depth = min_t(int,
65462306a36Sopenharmony_ci		smbd_max_frmr_depth,
65562306a36Sopenharmony_ci		info->id->device->attrs.max_fast_reg_page_list_len);
65662306a36Sopenharmony_ci	info->mr_type = IB_MR_TYPE_MEM_REG;
65762306a36Sopenharmony_ci	if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG)
65862306a36Sopenharmony_ci		info->mr_type = IB_MR_TYPE_SG_GAPS;
65962306a36Sopenharmony_ci
66062306a36Sopenharmony_ci	info->pd = ib_alloc_pd(info->id->device, 0);
66162306a36Sopenharmony_ci	if (IS_ERR(info->pd)) {
66262306a36Sopenharmony_ci		rc = PTR_ERR(info->pd);
66362306a36Sopenharmony_ci		log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc);
66462306a36Sopenharmony_ci		goto out2;
66562306a36Sopenharmony_ci	}
66662306a36Sopenharmony_ci
66762306a36Sopenharmony_ci	return 0;
66862306a36Sopenharmony_ci
66962306a36Sopenharmony_ciout2:
67062306a36Sopenharmony_ci	rdma_destroy_id(info->id);
67162306a36Sopenharmony_ci	info->id = NULL;
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ciout1:
67462306a36Sopenharmony_ci	return rc;
67562306a36Sopenharmony_ci}
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci/*
67862306a36Sopenharmony_ci * Send a negotiation request message to the peer
67962306a36Sopenharmony_ci * The negotiation procedure is in [MS-SMBD] 3.1.5.2 and 3.1.5.3
68062306a36Sopenharmony_ci * After negotiation, the transport is connected and ready for
68162306a36Sopenharmony_ci * carrying upper layer SMB payload
68262306a36Sopenharmony_ci */
68362306a36Sopenharmony_cistatic int smbd_post_send_negotiate_req(struct smbd_connection *info)
68462306a36Sopenharmony_ci{
68562306a36Sopenharmony_ci	struct ib_send_wr send_wr;
68662306a36Sopenharmony_ci	int rc = -ENOMEM;
68762306a36Sopenharmony_ci	struct smbd_request *request;
68862306a36Sopenharmony_ci	struct smbd_negotiate_req *packet;
68962306a36Sopenharmony_ci
69062306a36Sopenharmony_ci	request = mempool_alloc(info->request_mempool, GFP_KERNEL);
69162306a36Sopenharmony_ci	if (!request)
69262306a36Sopenharmony_ci		return rc;
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	request->info = info;
69562306a36Sopenharmony_ci
69662306a36Sopenharmony_ci	packet = smbd_request_payload(request);
69762306a36Sopenharmony_ci	packet->min_version = cpu_to_le16(SMBD_V1);
69862306a36Sopenharmony_ci	packet->max_version = cpu_to_le16(SMBD_V1);
69962306a36Sopenharmony_ci	packet->reserved = 0;
70062306a36Sopenharmony_ci	packet->credits_requested = cpu_to_le16(info->send_credit_target);
70162306a36Sopenharmony_ci	packet->preferred_send_size = cpu_to_le32(info->max_send_size);
70262306a36Sopenharmony_ci	packet->max_receive_size = cpu_to_le32(info->max_receive_size);
70362306a36Sopenharmony_ci	packet->max_fragmented_size =
70462306a36Sopenharmony_ci		cpu_to_le32(info->max_fragmented_recv_size);
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_ci	request->num_sge = 1;
70762306a36Sopenharmony_ci	request->sge[0].addr = ib_dma_map_single(
70862306a36Sopenharmony_ci				info->id->device, (void *)packet,
70962306a36Sopenharmony_ci				sizeof(*packet), DMA_TO_DEVICE);
71062306a36Sopenharmony_ci	if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
71162306a36Sopenharmony_ci		rc = -EIO;
71262306a36Sopenharmony_ci		goto dma_mapping_failed;
71362306a36Sopenharmony_ci	}
71462306a36Sopenharmony_ci
71562306a36Sopenharmony_ci	request->sge[0].length = sizeof(*packet);
71662306a36Sopenharmony_ci	request->sge[0].lkey = info->pd->local_dma_lkey;
71762306a36Sopenharmony_ci
71862306a36Sopenharmony_ci	ib_dma_sync_single_for_device(
71962306a36Sopenharmony_ci		info->id->device, request->sge[0].addr,
72062306a36Sopenharmony_ci		request->sge[0].length, DMA_TO_DEVICE);
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	request->cqe.done = send_done;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	send_wr.next = NULL;
72562306a36Sopenharmony_ci	send_wr.wr_cqe = &request->cqe;
72662306a36Sopenharmony_ci	send_wr.sg_list = request->sge;
72762306a36Sopenharmony_ci	send_wr.num_sge = request->num_sge;
72862306a36Sopenharmony_ci	send_wr.opcode = IB_WR_SEND;
72962306a36Sopenharmony_ci	send_wr.send_flags = IB_SEND_SIGNALED;
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci	log_rdma_send(INFO, "sge addr=0x%llx length=%u lkey=0x%x\n",
73262306a36Sopenharmony_ci		request->sge[0].addr,
73362306a36Sopenharmony_ci		request->sge[0].length, request->sge[0].lkey);
73462306a36Sopenharmony_ci
73562306a36Sopenharmony_ci	atomic_inc(&info->send_pending);
73662306a36Sopenharmony_ci	rc = ib_post_send(info->id->qp, &send_wr, NULL);
73762306a36Sopenharmony_ci	if (!rc)
73862306a36Sopenharmony_ci		return 0;
73962306a36Sopenharmony_ci
74062306a36Sopenharmony_ci	/* if we reach here, post send failed */
74162306a36Sopenharmony_ci	log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
74262306a36Sopenharmony_ci	atomic_dec(&info->send_pending);
74362306a36Sopenharmony_ci	ib_dma_unmap_single(info->id->device, request->sge[0].addr,
74462306a36Sopenharmony_ci		request->sge[0].length, DMA_TO_DEVICE);
74562306a36Sopenharmony_ci
74662306a36Sopenharmony_ci	smbd_disconnect_rdma_connection(info);
74762306a36Sopenharmony_ci
74862306a36Sopenharmony_cidma_mapping_failed:
74962306a36Sopenharmony_ci	mempool_free(request, info->request_mempool);
75062306a36Sopenharmony_ci	return rc;
75162306a36Sopenharmony_ci}
75262306a36Sopenharmony_ci
75362306a36Sopenharmony_ci/*
75462306a36Sopenharmony_ci * Extend the credits to remote peer
75562306a36Sopenharmony_ci * This implements [MS-SMBD] 3.1.5.9
75662306a36Sopenharmony_ci * The idea is that we should extend credits to remote peer as quickly as
75762306a36Sopenharmony_ci * it's allowed, to maintain data flow. We allocate as much receive
75862306a36Sopenharmony_ci * buffer as possible, and extend the receive credits to remote peer
75962306a36Sopenharmony_ci * return value: the new credtis being granted.
76062306a36Sopenharmony_ci */
76162306a36Sopenharmony_cistatic int manage_credits_prior_sending(struct smbd_connection *info)
76262306a36Sopenharmony_ci{
76362306a36Sopenharmony_ci	int new_credits;
76462306a36Sopenharmony_ci
76562306a36Sopenharmony_ci	spin_lock(&info->lock_new_credits_offered);
76662306a36Sopenharmony_ci	new_credits = info->new_credits_offered;
76762306a36Sopenharmony_ci	info->new_credits_offered = 0;
76862306a36Sopenharmony_ci	spin_unlock(&info->lock_new_credits_offered);
76962306a36Sopenharmony_ci
77062306a36Sopenharmony_ci	return new_credits;
77162306a36Sopenharmony_ci}
77262306a36Sopenharmony_ci
77362306a36Sopenharmony_ci/*
77462306a36Sopenharmony_ci * Check if we need to send a KEEP_ALIVE message
77562306a36Sopenharmony_ci * The idle connection timer triggers a KEEP_ALIVE message when expires
77662306a36Sopenharmony_ci * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send
77762306a36Sopenharmony_ci * back a response.
77862306a36Sopenharmony_ci * return value:
77962306a36Sopenharmony_ci * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set
78062306a36Sopenharmony_ci * 0: otherwise
78162306a36Sopenharmony_ci */
78262306a36Sopenharmony_cistatic int manage_keep_alive_before_sending(struct smbd_connection *info)
78362306a36Sopenharmony_ci{
78462306a36Sopenharmony_ci	if (info->keep_alive_requested == KEEP_ALIVE_PENDING) {
78562306a36Sopenharmony_ci		info->keep_alive_requested = KEEP_ALIVE_SENT;
78662306a36Sopenharmony_ci		return 1;
78762306a36Sopenharmony_ci	}
78862306a36Sopenharmony_ci	return 0;
78962306a36Sopenharmony_ci}
79062306a36Sopenharmony_ci
79162306a36Sopenharmony_ci/* Post the send request */
79262306a36Sopenharmony_cistatic int smbd_post_send(struct smbd_connection *info,
79362306a36Sopenharmony_ci		struct smbd_request *request)
79462306a36Sopenharmony_ci{
79562306a36Sopenharmony_ci	struct ib_send_wr send_wr;
79662306a36Sopenharmony_ci	int rc, i;
79762306a36Sopenharmony_ci
79862306a36Sopenharmony_ci	for (i = 0; i < request->num_sge; i++) {
79962306a36Sopenharmony_ci		log_rdma_send(INFO,
80062306a36Sopenharmony_ci			"rdma_request sge[%d] addr=0x%llx length=%u\n",
80162306a36Sopenharmony_ci			i, request->sge[i].addr, request->sge[i].length);
80262306a36Sopenharmony_ci		ib_dma_sync_single_for_device(
80362306a36Sopenharmony_ci			info->id->device,
80462306a36Sopenharmony_ci			request->sge[i].addr,
80562306a36Sopenharmony_ci			request->sge[i].length,
80662306a36Sopenharmony_ci			DMA_TO_DEVICE);
80762306a36Sopenharmony_ci	}
80862306a36Sopenharmony_ci
80962306a36Sopenharmony_ci	request->cqe.done = send_done;
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	send_wr.next = NULL;
81262306a36Sopenharmony_ci	send_wr.wr_cqe = &request->cqe;
81362306a36Sopenharmony_ci	send_wr.sg_list = request->sge;
81462306a36Sopenharmony_ci	send_wr.num_sge = request->num_sge;
81562306a36Sopenharmony_ci	send_wr.opcode = IB_WR_SEND;
81662306a36Sopenharmony_ci	send_wr.send_flags = IB_SEND_SIGNALED;
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	rc = ib_post_send(info->id->qp, &send_wr, NULL);
81962306a36Sopenharmony_ci	if (rc) {
82062306a36Sopenharmony_ci		log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc);
82162306a36Sopenharmony_ci		smbd_disconnect_rdma_connection(info);
82262306a36Sopenharmony_ci		rc = -EAGAIN;
82362306a36Sopenharmony_ci	} else
82462306a36Sopenharmony_ci		/* Reset timer for idle connection after packet is sent */
82562306a36Sopenharmony_ci		mod_delayed_work(info->workqueue, &info->idle_timer_work,
82662306a36Sopenharmony_ci			info->keep_alive_interval*HZ);
82762306a36Sopenharmony_ci
82862306a36Sopenharmony_ci	return rc;
82962306a36Sopenharmony_ci}
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_cistatic int smbd_post_send_iter(struct smbd_connection *info,
83262306a36Sopenharmony_ci			       struct iov_iter *iter,
83362306a36Sopenharmony_ci			       int *_remaining_data_length)
83462306a36Sopenharmony_ci{
83562306a36Sopenharmony_ci	int i, rc;
83662306a36Sopenharmony_ci	int header_length;
83762306a36Sopenharmony_ci	int data_length;
83862306a36Sopenharmony_ci	struct smbd_request *request;
83962306a36Sopenharmony_ci	struct smbd_data_transfer *packet;
84062306a36Sopenharmony_ci	int new_credits = 0;
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_ciwait_credit:
84362306a36Sopenharmony_ci	/* Wait for send credits. A SMBD packet needs one credit */
84462306a36Sopenharmony_ci	rc = wait_event_interruptible(info->wait_send_queue,
84562306a36Sopenharmony_ci		atomic_read(&info->send_credits) > 0 ||
84662306a36Sopenharmony_ci		info->transport_status != SMBD_CONNECTED);
84762306a36Sopenharmony_ci	if (rc)
84862306a36Sopenharmony_ci		goto err_wait_credit;
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_ci	if (info->transport_status != SMBD_CONNECTED) {
85162306a36Sopenharmony_ci		log_outgoing(ERR, "disconnected not sending on wait_credit\n");
85262306a36Sopenharmony_ci		rc = -EAGAIN;
85362306a36Sopenharmony_ci		goto err_wait_credit;
85462306a36Sopenharmony_ci	}
85562306a36Sopenharmony_ci	if (unlikely(atomic_dec_return(&info->send_credits) < 0)) {
85662306a36Sopenharmony_ci		atomic_inc(&info->send_credits);
85762306a36Sopenharmony_ci		goto wait_credit;
85862306a36Sopenharmony_ci	}
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ciwait_send_queue:
86162306a36Sopenharmony_ci	wait_event(info->wait_post_send,
86262306a36Sopenharmony_ci		atomic_read(&info->send_pending) < info->send_credit_target ||
86362306a36Sopenharmony_ci		info->transport_status != SMBD_CONNECTED);
86462306a36Sopenharmony_ci
86562306a36Sopenharmony_ci	if (info->transport_status != SMBD_CONNECTED) {
86662306a36Sopenharmony_ci		log_outgoing(ERR, "disconnected not sending on wait_send_queue\n");
86762306a36Sopenharmony_ci		rc = -EAGAIN;
86862306a36Sopenharmony_ci		goto err_wait_send_queue;
86962306a36Sopenharmony_ci	}
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	if (unlikely(atomic_inc_return(&info->send_pending) >
87262306a36Sopenharmony_ci				info->send_credit_target)) {
87362306a36Sopenharmony_ci		atomic_dec(&info->send_pending);
87462306a36Sopenharmony_ci		goto wait_send_queue;
87562306a36Sopenharmony_ci	}
87662306a36Sopenharmony_ci
87762306a36Sopenharmony_ci	request = mempool_alloc(info->request_mempool, GFP_KERNEL);
87862306a36Sopenharmony_ci	if (!request) {
87962306a36Sopenharmony_ci		rc = -ENOMEM;
88062306a36Sopenharmony_ci		goto err_alloc;
88162306a36Sopenharmony_ci	}
88262306a36Sopenharmony_ci
88362306a36Sopenharmony_ci	request->info = info;
88462306a36Sopenharmony_ci	memset(request->sge, 0, sizeof(request->sge));
88562306a36Sopenharmony_ci
88662306a36Sopenharmony_ci	/* Fill in the data payload to find out how much data we can add */
88762306a36Sopenharmony_ci	if (iter) {
88862306a36Sopenharmony_ci		struct smb_extract_to_rdma extract = {
88962306a36Sopenharmony_ci			.nr_sge		= 1,
89062306a36Sopenharmony_ci			.max_sge	= SMBDIRECT_MAX_SEND_SGE,
89162306a36Sopenharmony_ci			.sge		= request->sge,
89262306a36Sopenharmony_ci			.device		= info->id->device,
89362306a36Sopenharmony_ci			.local_dma_lkey	= info->pd->local_dma_lkey,
89462306a36Sopenharmony_ci			.direction	= DMA_TO_DEVICE,
89562306a36Sopenharmony_ci		};
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci		rc = smb_extract_iter_to_rdma(iter, *_remaining_data_length,
89862306a36Sopenharmony_ci					      &extract);
89962306a36Sopenharmony_ci		if (rc < 0)
90062306a36Sopenharmony_ci			goto err_dma;
90162306a36Sopenharmony_ci		data_length = rc;
90262306a36Sopenharmony_ci		request->num_sge = extract.nr_sge;
90362306a36Sopenharmony_ci		*_remaining_data_length -= data_length;
90462306a36Sopenharmony_ci	} else {
90562306a36Sopenharmony_ci		data_length = 0;
90662306a36Sopenharmony_ci		request->num_sge = 1;
90762306a36Sopenharmony_ci	}
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ci	/* Fill in the packet header */
91062306a36Sopenharmony_ci	packet = smbd_request_payload(request);
91162306a36Sopenharmony_ci	packet->credits_requested = cpu_to_le16(info->send_credit_target);
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci	new_credits = manage_credits_prior_sending(info);
91462306a36Sopenharmony_ci	atomic_add(new_credits, &info->receive_credits);
91562306a36Sopenharmony_ci	packet->credits_granted = cpu_to_le16(new_credits);
91662306a36Sopenharmony_ci
91762306a36Sopenharmony_ci	info->send_immediate = false;
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_ci	packet->flags = 0;
92062306a36Sopenharmony_ci	if (manage_keep_alive_before_sending(info))
92162306a36Sopenharmony_ci		packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED);
92262306a36Sopenharmony_ci
92362306a36Sopenharmony_ci	packet->reserved = 0;
92462306a36Sopenharmony_ci	if (!data_length)
92562306a36Sopenharmony_ci		packet->data_offset = 0;
92662306a36Sopenharmony_ci	else
92762306a36Sopenharmony_ci		packet->data_offset = cpu_to_le32(24);
92862306a36Sopenharmony_ci	packet->data_length = cpu_to_le32(data_length);
92962306a36Sopenharmony_ci	packet->remaining_data_length = cpu_to_le32(*_remaining_data_length);
93062306a36Sopenharmony_ci	packet->padding = 0;
93162306a36Sopenharmony_ci
93262306a36Sopenharmony_ci	log_outgoing(INFO, "credits_requested=%d credits_granted=%d data_offset=%d data_length=%d remaining_data_length=%d\n",
93362306a36Sopenharmony_ci		     le16_to_cpu(packet->credits_requested),
93462306a36Sopenharmony_ci		     le16_to_cpu(packet->credits_granted),
93562306a36Sopenharmony_ci		     le32_to_cpu(packet->data_offset),
93662306a36Sopenharmony_ci		     le32_to_cpu(packet->data_length),
93762306a36Sopenharmony_ci		     le32_to_cpu(packet->remaining_data_length));
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci	/* Map the packet to DMA */
94062306a36Sopenharmony_ci	header_length = sizeof(struct smbd_data_transfer);
94162306a36Sopenharmony_ci	/* If this is a packet without payload, don't send padding */
94262306a36Sopenharmony_ci	if (!data_length)
94362306a36Sopenharmony_ci		header_length = offsetof(struct smbd_data_transfer, padding);
94462306a36Sopenharmony_ci
94562306a36Sopenharmony_ci	request->sge[0].addr = ib_dma_map_single(info->id->device,
94662306a36Sopenharmony_ci						 (void *)packet,
94762306a36Sopenharmony_ci						 header_length,
94862306a36Sopenharmony_ci						 DMA_TO_DEVICE);
94962306a36Sopenharmony_ci	if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
95062306a36Sopenharmony_ci		rc = -EIO;
95162306a36Sopenharmony_ci		request->sge[0].addr = 0;
95262306a36Sopenharmony_ci		goto err_dma;
95362306a36Sopenharmony_ci	}
95462306a36Sopenharmony_ci
95562306a36Sopenharmony_ci	request->sge[0].length = header_length;
95662306a36Sopenharmony_ci	request->sge[0].lkey = info->pd->local_dma_lkey;
95762306a36Sopenharmony_ci
95862306a36Sopenharmony_ci	rc = smbd_post_send(info, request);
95962306a36Sopenharmony_ci	if (!rc)
96062306a36Sopenharmony_ci		return 0;
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_cierr_dma:
96362306a36Sopenharmony_ci	for (i = 0; i < request->num_sge; i++)
96462306a36Sopenharmony_ci		if (request->sge[i].addr)
96562306a36Sopenharmony_ci			ib_dma_unmap_single(info->id->device,
96662306a36Sopenharmony_ci					    request->sge[i].addr,
96762306a36Sopenharmony_ci					    request->sge[i].length,
96862306a36Sopenharmony_ci					    DMA_TO_DEVICE);
96962306a36Sopenharmony_ci	mempool_free(request, info->request_mempool);
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci	/* roll back receive credits and credits to be offered */
97262306a36Sopenharmony_ci	spin_lock(&info->lock_new_credits_offered);
97362306a36Sopenharmony_ci	info->new_credits_offered += new_credits;
97462306a36Sopenharmony_ci	spin_unlock(&info->lock_new_credits_offered);
97562306a36Sopenharmony_ci	atomic_sub(new_credits, &info->receive_credits);
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_cierr_alloc:
97862306a36Sopenharmony_ci	if (atomic_dec_and_test(&info->send_pending))
97962306a36Sopenharmony_ci		wake_up(&info->wait_send_pending);
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_cierr_wait_send_queue:
98262306a36Sopenharmony_ci	/* roll back send credits and pending */
98362306a36Sopenharmony_ci	atomic_inc(&info->send_credits);
98462306a36Sopenharmony_ci
98562306a36Sopenharmony_cierr_wait_credit:
98662306a36Sopenharmony_ci	return rc;
98762306a36Sopenharmony_ci}
98862306a36Sopenharmony_ci
98962306a36Sopenharmony_ci/*
99062306a36Sopenharmony_ci * Send an empty message
99162306a36Sopenharmony_ci * Empty message is used to extend credits to peer to for keep live
99262306a36Sopenharmony_ci * while there is no upper layer payload to send at the time
99362306a36Sopenharmony_ci */
99462306a36Sopenharmony_cistatic int smbd_post_send_empty(struct smbd_connection *info)
99562306a36Sopenharmony_ci{
99662306a36Sopenharmony_ci	int remaining_data_length = 0;
99762306a36Sopenharmony_ci
99862306a36Sopenharmony_ci	info->count_send_empty++;
99962306a36Sopenharmony_ci	return smbd_post_send_iter(info, NULL, &remaining_data_length);
100062306a36Sopenharmony_ci}
100162306a36Sopenharmony_ci
100262306a36Sopenharmony_ci/*
100362306a36Sopenharmony_ci * Post a receive request to the transport
100462306a36Sopenharmony_ci * The remote peer can only send data when a receive request is posted
100562306a36Sopenharmony_ci * The interaction is controlled by send/receive credit system
100662306a36Sopenharmony_ci */
100762306a36Sopenharmony_cistatic int smbd_post_recv(
100862306a36Sopenharmony_ci		struct smbd_connection *info, struct smbd_response *response)
100962306a36Sopenharmony_ci{
101062306a36Sopenharmony_ci	struct ib_recv_wr recv_wr;
101162306a36Sopenharmony_ci	int rc = -EIO;
101262306a36Sopenharmony_ci
101362306a36Sopenharmony_ci	response->sge.addr = ib_dma_map_single(
101462306a36Sopenharmony_ci				info->id->device, response->packet,
101562306a36Sopenharmony_ci				info->max_receive_size, DMA_FROM_DEVICE);
101662306a36Sopenharmony_ci	if (ib_dma_mapping_error(info->id->device, response->sge.addr))
101762306a36Sopenharmony_ci		return rc;
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	response->sge.length = info->max_receive_size;
102062306a36Sopenharmony_ci	response->sge.lkey = info->pd->local_dma_lkey;
102162306a36Sopenharmony_ci
102262306a36Sopenharmony_ci	response->cqe.done = recv_done;
102362306a36Sopenharmony_ci
102462306a36Sopenharmony_ci	recv_wr.wr_cqe = &response->cqe;
102562306a36Sopenharmony_ci	recv_wr.next = NULL;
102662306a36Sopenharmony_ci	recv_wr.sg_list = &response->sge;
102762306a36Sopenharmony_ci	recv_wr.num_sge = 1;
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	rc = ib_post_recv(info->id->qp, &recv_wr, NULL);
103062306a36Sopenharmony_ci	if (rc) {
103162306a36Sopenharmony_ci		ib_dma_unmap_single(info->id->device, response->sge.addr,
103262306a36Sopenharmony_ci				    response->sge.length, DMA_FROM_DEVICE);
103362306a36Sopenharmony_ci		smbd_disconnect_rdma_connection(info);
103462306a36Sopenharmony_ci		log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc);
103562306a36Sopenharmony_ci	}
103662306a36Sopenharmony_ci
103762306a36Sopenharmony_ci	return rc;
103862306a36Sopenharmony_ci}
103962306a36Sopenharmony_ci
104062306a36Sopenharmony_ci/* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */
104162306a36Sopenharmony_cistatic int smbd_negotiate(struct smbd_connection *info)
104262306a36Sopenharmony_ci{
104362306a36Sopenharmony_ci	int rc;
104462306a36Sopenharmony_ci	struct smbd_response *response = get_receive_buffer(info);
104562306a36Sopenharmony_ci
104662306a36Sopenharmony_ci	response->type = SMBD_NEGOTIATE_RESP;
104762306a36Sopenharmony_ci	rc = smbd_post_recv(info, response);
104862306a36Sopenharmony_ci	log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n",
104962306a36Sopenharmony_ci		       rc, response->sge.addr,
105062306a36Sopenharmony_ci		       response->sge.length, response->sge.lkey);
105162306a36Sopenharmony_ci	if (rc)
105262306a36Sopenharmony_ci		return rc;
105362306a36Sopenharmony_ci
105462306a36Sopenharmony_ci	init_completion(&info->negotiate_completion);
105562306a36Sopenharmony_ci	info->negotiate_done = false;
105662306a36Sopenharmony_ci	rc = smbd_post_send_negotiate_req(info);
105762306a36Sopenharmony_ci	if (rc)
105862306a36Sopenharmony_ci		return rc;
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci	rc = wait_for_completion_interruptible_timeout(
106162306a36Sopenharmony_ci		&info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ);
106262306a36Sopenharmony_ci	log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc);
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	if (info->negotiate_done)
106562306a36Sopenharmony_ci		return 0;
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci	if (rc == 0)
106862306a36Sopenharmony_ci		rc = -ETIMEDOUT;
106962306a36Sopenharmony_ci	else if (rc == -ERESTARTSYS)
107062306a36Sopenharmony_ci		rc = -EINTR;
107162306a36Sopenharmony_ci	else
107262306a36Sopenharmony_ci		rc = -ENOTCONN;
107362306a36Sopenharmony_ci
107462306a36Sopenharmony_ci	return rc;
107562306a36Sopenharmony_ci}
107662306a36Sopenharmony_ci
107762306a36Sopenharmony_cistatic void put_empty_packet(
107862306a36Sopenharmony_ci		struct smbd_connection *info, struct smbd_response *response)
107962306a36Sopenharmony_ci{
108062306a36Sopenharmony_ci	spin_lock(&info->empty_packet_queue_lock);
108162306a36Sopenharmony_ci	list_add_tail(&response->list, &info->empty_packet_queue);
108262306a36Sopenharmony_ci	info->count_empty_packet_queue++;
108362306a36Sopenharmony_ci	spin_unlock(&info->empty_packet_queue_lock);
108462306a36Sopenharmony_ci
108562306a36Sopenharmony_ci	queue_work(info->workqueue, &info->post_send_credits_work);
108662306a36Sopenharmony_ci}
108762306a36Sopenharmony_ci
108862306a36Sopenharmony_ci/*
108962306a36Sopenharmony_ci * Implement Connection.FragmentReassemblyBuffer defined in [MS-SMBD] 3.1.1.1
109062306a36Sopenharmony_ci * This is a queue for reassembling upper layer payload and present to upper
109162306a36Sopenharmony_ci * layer. All the inncoming payload go to the reassembly queue, regardless of
109262306a36Sopenharmony_ci * if reassembly is required. The uuper layer code reads from the queue for all
109362306a36Sopenharmony_ci * incoming payloads.
109462306a36Sopenharmony_ci * Put a received packet to the reassembly queue
109562306a36Sopenharmony_ci * response: the packet received
109662306a36Sopenharmony_ci * data_length: the size of payload in this packet
109762306a36Sopenharmony_ci */
109862306a36Sopenharmony_cistatic void enqueue_reassembly(
109962306a36Sopenharmony_ci	struct smbd_connection *info,
110062306a36Sopenharmony_ci	struct smbd_response *response,
110162306a36Sopenharmony_ci	int data_length)
110262306a36Sopenharmony_ci{
110362306a36Sopenharmony_ci	spin_lock(&info->reassembly_queue_lock);
110462306a36Sopenharmony_ci	list_add_tail(&response->list, &info->reassembly_queue);
110562306a36Sopenharmony_ci	info->reassembly_queue_length++;
110662306a36Sopenharmony_ci	/*
110762306a36Sopenharmony_ci	 * Make sure reassembly_data_length is updated after list and
110862306a36Sopenharmony_ci	 * reassembly_queue_length are updated. On the dequeue side
110962306a36Sopenharmony_ci	 * reassembly_data_length is checked without a lock to determine
111062306a36Sopenharmony_ci	 * if reassembly_queue_length and list is up to date
111162306a36Sopenharmony_ci	 */
111262306a36Sopenharmony_ci	virt_wmb();
111362306a36Sopenharmony_ci	info->reassembly_data_length += data_length;
111462306a36Sopenharmony_ci	spin_unlock(&info->reassembly_queue_lock);
111562306a36Sopenharmony_ci	info->count_reassembly_queue++;
111662306a36Sopenharmony_ci	info->count_enqueue_reassembly_queue++;
111762306a36Sopenharmony_ci}
111862306a36Sopenharmony_ci
111962306a36Sopenharmony_ci/*
112062306a36Sopenharmony_ci * Get the first entry at the front of reassembly queue
112162306a36Sopenharmony_ci * Caller is responsible for locking
112262306a36Sopenharmony_ci * return value: the first entry if any, NULL if queue is empty
112362306a36Sopenharmony_ci */
112462306a36Sopenharmony_cistatic struct smbd_response *_get_first_reassembly(struct smbd_connection *info)
112562306a36Sopenharmony_ci{
112662306a36Sopenharmony_ci	struct smbd_response *ret = NULL;
112762306a36Sopenharmony_ci
112862306a36Sopenharmony_ci	if (!list_empty(&info->reassembly_queue)) {
112962306a36Sopenharmony_ci		ret = list_first_entry(
113062306a36Sopenharmony_ci			&info->reassembly_queue,
113162306a36Sopenharmony_ci			struct smbd_response, list);
113262306a36Sopenharmony_ci	}
113362306a36Sopenharmony_ci	return ret;
113462306a36Sopenharmony_ci}
113562306a36Sopenharmony_ci
113662306a36Sopenharmony_cistatic struct smbd_response *get_empty_queue_buffer(
113762306a36Sopenharmony_ci		struct smbd_connection *info)
113862306a36Sopenharmony_ci{
113962306a36Sopenharmony_ci	struct smbd_response *ret = NULL;
114062306a36Sopenharmony_ci	unsigned long flags;
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	spin_lock_irqsave(&info->empty_packet_queue_lock, flags);
114362306a36Sopenharmony_ci	if (!list_empty(&info->empty_packet_queue)) {
114462306a36Sopenharmony_ci		ret = list_first_entry(
114562306a36Sopenharmony_ci			&info->empty_packet_queue,
114662306a36Sopenharmony_ci			struct smbd_response, list);
114762306a36Sopenharmony_ci		list_del(&ret->list);
114862306a36Sopenharmony_ci		info->count_empty_packet_queue--;
114962306a36Sopenharmony_ci	}
115062306a36Sopenharmony_ci	spin_unlock_irqrestore(&info->empty_packet_queue_lock, flags);
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci	return ret;
115362306a36Sopenharmony_ci}
115462306a36Sopenharmony_ci
115562306a36Sopenharmony_ci/*
115662306a36Sopenharmony_ci * Get a receive buffer
115762306a36Sopenharmony_ci * For each remote send, we need to post a receive. The receive buffers are
115862306a36Sopenharmony_ci * pre-allocated in advance.
115962306a36Sopenharmony_ci * return value: the receive buffer, NULL if none is available
116062306a36Sopenharmony_ci */
116162306a36Sopenharmony_cistatic struct smbd_response *get_receive_buffer(struct smbd_connection *info)
116262306a36Sopenharmony_ci{
116362306a36Sopenharmony_ci	struct smbd_response *ret = NULL;
116462306a36Sopenharmony_ci	unsigned long flags;
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci	spin_lock_irqsave(&info->receive_queue_lock, flags);
116762306a36Sopenharmony_ci	if (!list_empty(&info->receive_queue)) {
116862306a36Sopenharmony_ci		ret = list_first_entry(
116962306a36Sopenharmony_ci			&info->receive_queue,
117062306a36Sopenharmony_ci			struct smbd_response, list);
117162306a36Sopenharmony_ci		list_del(&ret->list);
117262306a36Sopenharmony_ci		info->count_receive_queue--;
117362306a36Sopenharmony_ci		info->count_get_receive_buffer++;
117462306a36Sopenharmony_ci	}
117562306a36Sopenharmony_ci	spin_unlock_irqrestore(&info->receive_queue_lock, flags);
117662306a36Sopenharmony_ci
117762306a36Sopenharmony_ci	return ret;
117862306a36Sopenharmony_ci}
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_ci/*
118162306a36Sopenharmony_ci * Return a receive buffer
118262306a36Sopenharmony_ci * Upon returning of a receive buffer, we can post new receive and extend
118362306a36Sopenharmony_ci * more receive credits to remote peer. This is done immediately after a
118462306a36Sopenharmony_ci * receive buffer is returned.
118562306a36Sopenharmony_ci */
118662306a36Sopenharmony_cistatic void put_receive_buffer(
118762306a36Sopenharmony_ci	struct smbd_connection *info, struct smbd_response *response)
118862306a36Sopenharmony_ci{
118962306a36Sopenharmony_ci	unsigned long flags;
119062306a36Sopenharmony_ci
119162306a36Sopenharmony_ci	ib_dma_unmap_single(info->id->device, response->sge.addr,
119262306a36Sopenharmony_ci		response->sge.length, DMA_FROM_DEVICE);
119362306a36Sopenharmony_ci
119462306a36Sopenharmony_ci	spin_lock_irqsave(&info->receive_queue_lock, flags);
119562306a36Sopenharmony_ci	list_add_tail(&response->list, &info->receive_queue);
119662306a36Sopenharmony_ci	info->count_receive_queue++;
119762306a36Sopenharmony_ci	info->count_put_receive_buffer++;
119862306a36Sopenharmony_ci	spin_unlock_irqrestore(&info->receive_queue_lock, flags);
119962306a36Sopenharmony_ci
120062306a36Sopenharmony_ci	queue_work(info->workqueue, &info->post_send_credits_work);
120162306a36Sopenharmony_ci}
120262306a36Sopenharmony_ci
120362306a36Sopenharmony_ci/* Preallocate all receive buffer on transport establishment */
120462306a36Sopenharmony_cistatic int allocate_receive_buffers(struct smbd_connection *info, int num_buf)
120562306a36Sopenharmony_ci{
120662306a36Sopenharmony_ci	int i;
120762306a36Sopenharmony_ci	struct smbd_response *response;
120862306a36Sopenharmony_ci
120962306a36Sopenharmony_ci	INIT_LIST_HEAD(&info->reassembly_queue);
121062306a36Sopenharmony_ci	spin_lock_init(&info->reassembly_queue_lock);
121162306a36Sopenharmony_ci	info->reassembly_data_length = 0;
121262306a36Sopenharmony_ci	info->reassembly_queue_length = 0;
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci	INIT_LIST_HEAD(&info->receive_queue);
121562306a36Sopenharmony_ci	spin_lock_init(&info->receive_queue_lock);
121662306a36Sopenharmony_ci	info->count_receive_queue = 0;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	INIT_LIST_HEAD(&info->empty_packet_queue);
121962306a36Sopenharmony_ci	spin_lock_init(&info->empty_packet_queue_lock);
122062306a36Sopenharmony_ci	info->count_empty_packet_queue = 0;
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	init_waitqueue_head(&info->wait_receive_queues);
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci	for (i = 0; i < num_buf; i++) {
122562306a36Sopenharmony_ci		response = mempool_alloc(info->response_mempool, GFP_KERNEL);
122662306a36Sopenharmony_ci		if (!response)
122762306a36Sopenharmony_ci			goto allocate_failed;
122862306a36Sopenharmony_ci
122962306a36Sopenharmony_ci		response->info = info;
123062306a36Sopenharmony_ci		list_add_tail(&response->list, &info->receive_queue);
123162306a36Sopenharmony_ci		info->count_receive_queue++;
123262306a36Sopenharmony_ci	}
123362306a36Sopenharmony_ci
123462306a36Sopenharmony_ci	return 0;
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ciallocate_failed:
123762306a36Sopenharmony_ci	while (!list_empty(&info->receive_queue)) {
123862306a36Sopenharmony_ci		response = list_first_entry(
123962306a36Sopenharmony_ci				&info->receive_queue,
124062306a36Sopenharmony_ci				struct smbd_response, list);
124162306a36Sopenharmony_ci		list_del(&response->list);
124262306a36Sopenharmony_ci		info->count_receive_queue--;
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci		mempool_free(response, info->response_mempool);
124562306a36Sopenharmony_ci	}
124662306a36Sopenharmony_ci	return -ENOMEM;
124762306a36Sopenharmony_ci}
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_cistatic void destroy_receive_buffers(struct smbd_connection *info)
125062306a36Sopenharmony_ci{
125162306a36Sopenharmony_ci	struct smbd_response *response;
125262306a36Sopenharmony_ci
125362306a36Sopenharmony_ci	while ((response = get_receive_buffer(info)))
125462306a36Sopenharmony_ci		mempool_free(response, info->response_mempool);
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci	while ((response = get_empty_queue_buffer(info)))
125762306a36Sopenharmony_ci		mempool_free(response, info->response_mempool);
125862306a36Sopenharmony_ci}
125962306a36Sopenharmony_ci
126062306a36Sopenharmony_ci/* Implement idle connection timer [MS-SMBD] 3.1.6.2 */
126162306a36Sopenharmony_cistatic void idle_connection_timer(struct work_struct *work)
126262306a36Sopenharmony_ci{
126362306a36Sopenharmony_ci	struct smbd_connection *info = container_of(
126462306a36Sopenharmony_ci					work, struct smbd_connection,
126562306a36Sopenharmony_ci					idle_timer_work.work);
126662306a36Sopenharmony_ci
126762306a36Sopenharmony_ci	if (info->keep_alive_requested != KEEP_ALIVE_NONE) {
126862306a36Sopenharmony_ci		log_keep_alive(ERR,
126962306a36Sopenharmony_ci			"error status info->keep_alive_requested=%d\n",
127062306a36Sopenharmony_ci			info->keep_alive_requested);
127162306a36Sopenharmony_ci		smbd_disconnect_rdma_connection(info);
127262306a36Sopenharmony_ci		return;
127362306a36Sopenharmony_ci	}
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_ci	log_keep_alive(INFO, "about to send an empty idle message\n");
127662306a36Sopenharmony_ci	smbd_post_send_empty(info);
127762306a36Sopenharmony_ci
127862306a36Sopenharmony_ci	/* Setup the next idle timeout work */
127962306a36Sopenharmony_ci	queue_delayed_work(info->workqueue, &info->idle_timer_work,
128062306a36Sopenharmony_ci			info->keep_alive_interval*HZ);
128162306a36Sopenharmony_ci}
128262306a36Sopenharmony_ci
128362306a36Sopenharmony_ci/*
128462306a36Sopenharmony_ci * Destroy the transport and related RDMA and memory resources
128562306a36Sopenharmony_ci * Need to go through all the pending counters and make sure on one is using
128662306a36Sopenharmony_ci * the transport while it is destroyed
128762306a36Sopenharmony_ci */
128862306a36Sopenharmony_civoid smbd_destroy(struct TCP_Server_Info *server)
128962306a36Sopenharmony_ci{
129062306a36Sopenharmony_ci	struct smbd_connection *info = server->smbd_conn;
129162306a36Sopenharmony_ci	struct smbd_response *response;
129262306a36Sopenharmony_ci	unsigned long flags;
129362306a36Sopenharmony_ci
129462306a36Sopenharmony_ci	if (!info) {
129562306a36Sopenharmony_ci		log_rdma_event(INFO, "rdma session already destroyed\n");
129662306a36Sopenharmony_ci		return;
129762306a36Sopenharmony_ci	}
129862306a36Sopenharmony_ci
129962306a36Sopenharmony_ci	log_rdma_event(INFO, "destroying rdma session\n");
130062306a36Sopenharmony_ci	if (info->transport_status != SMBD_DISCONNECTED) {
130162306a36Sopenharmony_ci		rdma_disconnect(server->smbd_conn->id);
130262306a36Sopenharmony_ci		log_rdma_event(INFO, "wait for transport being disconnected\n");
130362306a36Sopenharmony_ci		wait_event_interruptible(
130462306a36Sopenharmony_ci			info->disconn_wait,
130562306a36Sopenharmony_ci			info->transport_status == SMBD_DISCONNECTED);
130662306a36Sopenharmony_ci	}
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_ci	log_rdma_event(INFO, "destroying qp\n");
130962306a36Sopenharmony_ci	ib_drain_qp(info->id->qp);
131062306a36Sopenharmony_ci	rdma_destroy_qp(info->id);
131162306a36Sopenharmony_ci
131262306a36Sopenharmony_ci	log_rdma_event(INFO, "cancelling idle timer\n");
131362306a36Sopenharmony_ci	cancel_delayed_work_sync(&info->idle_timer_work);
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci	log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
131662306a36Sopenharmony_ci	wait_event(info->wait_send_pending,
131762306a36Sopenharmony_ci		atomic_read(&info->send_pending) == 0);
131862306a36Sopenharmony_ci
131962306a36Sopenharmony_ci	/* It's not possible for upper layer to get to reassembly */
132062306a36Sopenharmony_ci	log_rdma_event(INFO, "drain the reassembly queue\n");
132162306a36Sopenharmony_ci	do {
132262306a36Sopenharmony_ci		spin_lock_irqsave(&info->reassembly_queue_lock, flags);
132362306a36Sopenharmony_ci		response = _get_first_reassembly(info);
132462306a36Sopenharmony_ci		if (response) {
132562306a36Sopenharmony_ci			list_del(&response->list);
132662306a36Sopenharmony_ci			spin_unlock_irqrestore(
132762306a36Sopenharmony_ci				&info->reassembly_queue_lock, flags);
132862306a36Sopenharmony_ci			put_receive_buffer(info, response);
132962306a36Sopenharmony_ci		} else
133062306a36Sopenharmony_ci			spin_unlock_irqrestore(
133162306a36Sopenharmony_ci				&info->reassembly_queue_lock, flags);
133262306a36Sopenharmony_ci	} while (response);
133362306a36Sopenharmony_ci	info->reassembly_data_length = 0;
133462306a36Sopenharmony_ci
133562306a36Sopenharmony_ci	log_rdma_event(INFO, "free receive buffers\n");
133662306a36Sopenharmony_ci	wait_event(info->wait_receive_queues,
133762306a36Sopenharmony_ci		info->count_receive_queue + info->count_empty_packet_queue
133862306a36Sopenharmony_ci			== info->receive_credit_max);
133962306a36Sopenharmony_ci	destroy_receive_buffers(info);
134062306a36Sopenharmony_ci
134162306a36Sopenharmony_ci	/*
134262306a36Sopenharmony_ci	 * For performance reasons, memory registration and deregistration
134362306a36Sopenharmony_ci	 * are not locked by srv_mutex. It is possible some processes are
134462306a36Sopenharmony_ci	 * blocked on transport srv_mutex while holding memory registration.
134562306a36Sopenharmony_ci	 * Release the transport srv_mutex to allow them to hit the failure
134662306a36Sopenharmony_ci	 * path when sending data, and then release memory registartions.
134762306a36Sopenharmony_ci	 */
134862306a36Sopenharmony_ci	log_rdma_event(INFO, "freeing mr list\n");
134962306a36Sopenharmony_ci	wake_up_interruptible_all(&info->wait_mr);
135062306a36Sopenharmony_ci	while (atomic_read(&info->mr_used_count)) {
135162306a36Sopenharmony_ci		cifs_server_unlock(server);
135262306a36Sopenharmony_ci		msleep(1000);
135362306a36Sopenharmony_ci		cifs_server_lock(server);
135462306a36Sopenharmony_ci	}
135562306a36Sopenharmony_ci	destroy_mr_list(info);
135662306a36Sopenharmony_ci
135762306a36Sopenharmony_ci	ib_free_cq(info->send_cq);
135862306a36Sopenharmony_ci	ib_free_cq(info->recv_cq);
135962306a36Sopenharmony_ci	ib_dealloc_pd(info->pd);
136062306a36Sopenharmony_ci	rdma_destroy_id(info->id);
136162306a36Sopenharmony_ci
136262306a36Sopenharmony_ci	/* free mempools */
136362306a36Sopenharmony_ci	mempool_destroy(info->request_mempool);
136462306a36Sopenharmony_ci	kmem_cache_destroy(info->request_cache);
136562306a36Sopenharmony_ci
136662306a36Sopenharmony_ci	mempool_destroy(info->response_mempool);
136762306a36Sopenharmony_ci	kmem_cache_destroy(info->response_cache);
136862306a36Sopenharmony_ci
136962306a36Sopenharmony_ci	info->transport_status = SMBD_DESTROYED;
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	destroy_workqueue(info->workqueue);
137262306a36Sopenharmony_ci	log_rdma_event(INFO,  "rdma session destroyed\n");
137362306a36Sopenharmony_ci	kfree(info);
137462306a36Sopenharmony_ci	server->smbd_conn = NULL;
137562306a36Sopenharmony_ci}
137662306a36Sopenharmony_ci
137762306a36Sopenharmony_ci/*
137862306a36Sopenharmony_ci * Reconnect this SMBD connection, called from upper layer
137962306a36Sopenharmony_ci * return value: 0 on success, or actual error code
138062306a36Sopenharmony_ci */
138162306a36Sopenharmony_ciint smbd_reconnect(struct TCP_Server_Info *server)
138262306a36Sopenharmony_ci{
138362306a36Sopenharmony_ci	log_rdma_event(INFO, "reconnecting rdma session\n");
138462306a36Sopenharmony_ci
138562306a36Sopenharmony_ci	if (!server->smbd_conn) {
138662306a36Sopenharmony_ci		log_rdma_event(INFO, "rdma session already destroyed\n");
138762306a36Sopenharmony_ci		goto create_conn;
138862306a36Sopenharmony_ci	}
138962306a36Sopenharmony_ci
139062306a36Sopenharmony_ci	/*
139162306a36Sopenharmony_ci	 * This is possible if transport is disconnected and we haven't received
139262306a36Sopenharmony_ci	 * notification from RDMA, but upper layer has detected timeout
139362306a36Sopenharmony_ci	 */
139462306a36Sopenharmony_ci	if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
139562306a36Sopenharmony_ci		log_rdma_event(INFO, "disconnecting transport\n");
139662306a36Sopenharmony_ci		smbd_destroy(server);
139762306a36Sopenharmony_ci	}
139862306a36Sopenharmony_ci
139962306a36Sopenharmony_cicreate_conn:
140062306a36Sopenharmony_ci	log_rdma_event(INFO, "creating rdma session\n");
140162306a36Sopenharmony_ci	server->smbd_conn = smbd_get_connection(
140262306a36Sopenharmony_ci		server, (struct sockaddr *) &server->dstaddr);
140362306a36Sopenharmony_ci
140462306a36Sopenharmony_ci	if (server->smbd_conn) {
140562306a36Sopenharmony_ci		cifs_dbg(VFS, "RDMA transport re-established\n");
140662306a36Sopenharmony_ci		trace_smb3_smbd_connect_done(server->hostname, server->conn_id, &server->dstaddr);
140762306a36Sopenharmony_ci		return 0;
140862306a36Sopenharmony_ci	}
140962306a36Sopenharmony_ci	trace_smb3_smbd_connect_err(server->hostname, server->conn_id, &server->dstaddr);
141062306a36Sopenharmony_ci	return -ENOENT;
141162306a36Sopenharmony_ci}
141262306a36Sopenharmony_ci
141362306a36Sopenharmony_cistatic void destroy_caches_and_workqueue(struct smbd_connection *info)
141462306a36Sopenharmony_ci{
141562306a36Sopenharmony_ci	destroy_receive_buffers(info);
141662306a36Sopenharmony_ci	destroy_workqueue(info->workqueue);
141762306a36Sopenharmony_ci	mempool_destroy(info->response_mempool);
141862306a36Sopenharmony_ci	kmem_cache_destroy(info->response_cache);
141962306a36Sopenharmony_ci	mempool_destroy(info->request_mempool);
142062306a36Sopenharmony_ci	kmem_cache_destroy(info->request_cache);
142162306a36Sopenharmony_ci}
142262306a36Sopenharmony_ci
142362306a36Sopenharmony_ci#define MAX_NAME_LEN	80
142462306a36Sopenharmony_cistatic int allocate_caches_and_workqueue(struct smbd_connection *info)
142562306a36Sopenharmony_ci{
142662306a36Sopenharmony_ci	char name[MAX_NAME_LEN];
142762306a36Sopenharmony_ci	int rc;
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	scnprintf(name, MAX_NAME_LEN, "smbd_request_%p", info);
143062306a36Sopenharmony_ci	info->request_cache =
143162306a36Sopenharmony_ci		kmem_cache_create(
143262306a36Sopenharmony_ci			name,
143362306a36Sopenharmony_ci			sizeof(struct smbd_request) +
143462306a36Sopenharmony_ci				sizeof(struct smbd_data_transfer),
143562306a36Sopenharmony_ci			0, SLAB_HWCACHE_ALIGN, NULL);
143662306a36Sopenharmony_ci	if (!info->request_cache)
143762306a36Sopenharmony_ci		return -ENOMEM;
143862306a36Sopenharmony_ci
143962306a36Sopenharmony_ci	info->request_mempool =
144062306a36Sopenharmony_ci		mempool_create(info->send_credit_target, mempool_alloc_slab,
144162306a36Sopenharmony_ci			mempool_free_slab, info->request_cache);
144262306a36Sopenharmony_ci	if (!info->request_mempool)
144362306a36Sopenharmony_ci		goto out1;
144462306a36Sopenharmony_ci
144562306a36Sopenharmony_ci	scnprintf(name, MAX_NAME_LEN, "smbd_response_%p", info);
144662306a36Sopenharmony_ci	info->response_cache =
144762306a36Sopenharmony_ci		kmem_cache_create(
144862306a36Sopenharmony_ci			name,
144962306a36Sopenharmony_ci			sizeof(struct smbd_response) +
145062306a36Sopenharmony_ci				info->max_receive_size,
145162306a36Sopenharmony_ci			0, SLAB_HWCACHE_ALIGN, NULL);
145262306a36Sopenharmony_ci	if (!info->response_cache)
145362306a36Sopenharmony_ci		goto out2;
145462306a36Sopenharmony_ci
145562306a36Sopenharmony_ci	info->response_mempool =
145662306a36Sopenharmony_ci		mempool_create(info->receive_credit_max, mempool_alloc_slab,
145762306a36Sopenharmony_ci		       mempool_free_slab, info->response_cache);
145862306a36Sopenharmony_ci	if (!info->response_mempool)
145962306a36Sopenharmony_ci		goto out3;
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_ci	scnprintf(name, MAX_NAME_LEN, "smbd_%p", info);
146262306a36Sopenharmony_ci	info->workqueue = create_workqueue(name);
146362306a36Sopenharmony_ci	if (!info->workqueue)
146462306a36Sopenharmony_ci		goto out4;
146562306a36Sopenharmony_ci
146662306a36Sopenharmony_ci	rc = allocate_receive_buffers(info, info->receive_credit_max);
146762306a36Sopenharmony_ci	if (rc) {
146862306a36Sopenharmony_ci		log_rdma_event(ERR, "failed to allocate receive buffers\n");
146962306a36Sopenharmony_ci		goto out5;
147062306a36Sopenharmony_ci	}
147162306a36Sopenharmony_ci
147262306a36Sopenharmony_ci	return 0;
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ciout5:
147562306a36Sopenharmony_ci	destroy_workqueue(info->workqueue);
147662306a36Sopenharmony_ciout4:
147762306a36Sopenharmony_ci	mempool_destroy(info->response_mempool);
147862306a36Sopenharmony_ciout3:
147962306a36Sopenharmony_ci	kmem_cache_destroy(info->response_cache);
148062306a36Sopenharmony_ciout2:
148162306a36Sopenharmony_ci	mempool_destroy(info->request_mempool);
148262306a36Sopenharmony_ciout1:
148362306a36Sopenharmony_ci	kmem_cache_destroy(info->request_cache);
148462306a36Sopenharmony_ci	return -ENOMEM;
148562306a36Sopenharmony_ci}
148662306a36Sopenharmony_ci
148762306a36Sopenharmony_ci/* Create a SMBD connection, called by upper layer */
148862306a36Sopenharmony_cistatic struct smbd_connection *_smbd_get_connection(
148962306a36Sopenharmony_ci	struct TCP_Server_Info *server, struct sockaddr *dstaddr, int port)
149062306a36Sopenharmony_ci{
149162306a36Sopenharmony_ci	int rc;
149262306a36Sopenharmony_ci	struct smbd_connection *info;
149362306a36Sopenharmony_ci	struct rdma_conn_param conn_param;
149462306a36Sopenharmony_ci	struct ib_qp_init_attr qp_attr;
149562306a36Sopenharmony_ci	struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr;
149662306a36Sopenharmony_ci	struct ib_port_immutable port_immutable;
149762306a36Sopenharmony_ci	u32 ird_ord_hdr[2];
149862306a36Sopenharmony_ci
149962306a36Sopenharmony_ci	info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL);
150062306a36Sopenharmony_ci	if (!info)
150162306a36Sopenharmony_ci		return NULL;
150262306a36Sopenharmony_ci
150362306a36Sopenharmony_ci	info->transport_status = SMBD_CONNECTING;
150462306a36Sopenharmony_ci	rc = smbd_ia_open(info, dstaddr, port);
150562306a36Sopenharmony_ci	if (rc) {
150662306a36Sopenharmony_ci		log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc);
150762306a36Sopenharmony_ci		goto create_id_failed;
150862306a36Sopenharmony_ci	}
150962306a36Sopenharmony_ci
151062306a36Sopenharmony_ci	if (smbd_send_credit_target > info->id->device->attrs.max_cqe ||
151162306a36Sopenharmony_ci	    smbd_send_credit_target > info->id->device->attrs.max_qp_wr) {
151262306a36Sopenharmony_ci		log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
151362306a36Sopenharmony_ci			       smbd_send_credit_target,
151462306a36Sopenharmony_ci			       info->id->device->attrs.max_cqe,
151562306a36Sopenharmony_ci			       info->id->device->attrs.max_qp_wr);
151662306a36Sopenharmony_ci		goto config_failed;
151762306a36Sopenharmony_ci	}
151862306a36Sopenharmony_ci
151962306a36Sopenharmony_ci	if (smbd_receive_credit_max > info->id->device->attrs.max_cqe ||
152062306a36Sopenharmony_ci	    smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) {
152162306a36Sopenharmony_ci		log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
152262306a36Sopenharmony_ci			       smbd_receive_credit_max,
152362306a36Sopenharmony_ci			       info->id->device->attrs.max_cqe,
152462306a36Sopenharmony_ci			       info->id->device->attrs.max_qp_wr);
152562306a36Sopenharmony_ci		goto config_failed;
152662306a36Sopenharmony_ci	}
152762306a36Sopenharmony_ci
152862306a36Sopenharmony_ci	info->receive_credit_max = smbd_receive_credit_max;
152962306a36Sopenharmony_ci	info->send_credit_target = smbd_send_credit_target;
153062306a36Sopenharmony_ci	info->max_send_size = smbd_max_send_size;
153162306a36Sopenharmony_ci	info->max_fragmented_recv_size = smbd_max_fragmented_recv_size;
153262306a36Sopenharmony_ci	info->max_receive_size = smbd_max_receive_size;
153362306a36Sopenharmony_ci	info->keep_alive_interval = smbd_keep_alive_interval;
153462306a36Sopenharmony_ci
153562306a36Sopenharmony_ci	if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE ||
153662306a36Sopenharmony_ci	    info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) {
153762306a36Sopenharmony_ci		log_rdma_event(ERR,
153862306a36Sopenharmony_ci			"device %.*s max_send_sge/max_recv_sge = %d/%d too small\n",
153962306a36Sopenharmony_ci			IB_DEVICE_NAME_MAX,
154062306a36Sopenharmony_ci			info->id->device->name,
154162306a36Sopenharmony_ci			info->id->device->attrs.max_send_sge,
154262306a36Sopenharmony_ci			info->id->device->attrs.max_recv_sge);
154362306a36Sopenharmony_ci		goto config_failed;
154462306a36Sopenharmony_ci	}
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_ci	info->send_cq = NULL;
154762306a36Sopenharmony_ci	info->recv_cq = NULL;
154862306a36Sopenharmony_ci	info->send_cq =
154962306a36Sopenharmony_ci		ib_alloc_cq_any(info->id->device, info,
155062306a36Sopenharmony_ci				info->send_credit_target, IB_POLL_SOFTIRQ);
155162306a36Sopenharmony_ci	if (IS_ERR(info->send_cq)) {
155262306a36Sopenharmony_ci		info->send_cq = NULL;
155362306a36Sopenharmony_ci		goto alloc_cq_failed;
155462306a36Sopenharmony_ci	}
155562306a36Sopenharmony_ci
155662306a36Sopenharmony_ci	info->recv_cq =
155762306a36Sopenharmony_ci		ib_alloc_cq_any(info->id->device, info,
155862306a36Sopenharmony_ci				info->receive_credit_max, IB_POLL_SOFTIRQ);
155962306a36Sopenharmony_ci	if (IS_ERR(info->recv_cq)) {
156062306a36Sopenharmony_ci		info->recv_cq = NULL;
156162306a36Sopenharmony_ci		goto alloc_cq_failed;
156262306a36Sopenharmony_ci	}
156362306a36Sopenharmony_ci
156462306a36Sopenharmony_ci	memset(&qp_attr, 0, sizeof(qp_attr));
156562306a36Sopenharmony_ci	qp_attr.event_handler = smbd_qp_async_error_upcall;
156662306a36Sopenharmony_ci	qp_attr.qp_context = info;
156762306a36Sopenharmony_ci	qp_attr.cap.max_send_wr = info->send_credit_target;
156862306a36Sopenharmony_ci	qp_attr.cap.max_recv_wr = info->receive_credit_max;
156962306a36Sopenharmony_ci	qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE;
157062306a36Sopenharmony_ci	qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE;
157162306a36Sopenharmony_ci	qp_attr.cap.max_inline_data = 0;
157262306a36Sopenharmony_ci	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
157362306a36Sopenharmony_ci	qp_attr.qp_type = IB_QPT_RC;
157462306a36Sopenharmony_ci	qp_attr.send_cq = info->send_cq;
157562306a36Sopenharmony_ci	qp_attr.recv_cq = info->recv_cq;
157662306a36Sopenharmony_ci	qp_attr.port_num = ~0;
157762306a36Sopenharmony_ci
157862306a36Sopenharmony_ci	rc = rdma_create_qp(info->id, info->pd, &qp_attr);
157962306a36Sopenharmony_ci	if (rc) {
158062306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc);
158162306a36Sopenharmony_ci		goto create_qp_failed;
158262306a36Sopenharmony_ci	}
158362306a36Sopenharmony_ci
158462306a36Sopenharmony_ci	memset(&conn_param, 0, sizeof(conn_param));
158562306a36Sopenharmony_ci	conn_param.initiator_depth = 0;
158662306a36Sopenharmony_ci
158762306a36Sopenharmony_ci	conn_param.responder_resources =
158862306a36Sopenharmony_ci		info->id->device->attrs.max_qp_rd_atom
158962306a36Sopenharmony_ci			< SMBD_CM_RESPONDER_RESOURCES ?
159062306a36Sopenharmony_ci		info->id->device->attrs.max_qp_rd_atom :
159162306a36Sopenharmony_ci		SMBD_CM_RESPONDER_RESOURCES;
159262306a36Sopenharmony_ci	info->responder_resources = conn_param.responder_resources;
159362306a36Sopenharmony_ci	log_rdma_mr(INFO, "responder_resources=%d\n",
159462306a36Sopenharmony_ci		info->responder_resources);
159562306a36Sopenharmony_ci
159662306a36Sopenharmony_ci	/* Need to send IRD/ORD in private data for iWARP */
159762306a36Sopenharmony_ci	info->id->device->ops.get_port_immutable(
159862306a36Sopenharmony_ci		info->id->device, info->id->port_num, &port_immutable);
159962306a36Sopenharmony_ci	if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) {
160062306a36Sopenharmony_ci		ird_ord_hdr[0] = info->responder_resources;
160162306a36Sopenharmony_ci		ird_ord_hdr[1] = 1;
160262306a36Sopenharmony_ci		conn_param.private_data = ird_ord_hdr;
160362306a36Sopenharmony_ci		conn_param.private_data_len = sizeof(ird_ord_hdr);
160462306a36Sopenharmony_ci	} else {
160562306a36Sopenharmony_ci		conn_param.private_data = NULL;
160662306a36Sopenharmony_ci		conn_param.private_data_len = 0;
160762306a36Sopenharmony_ci	}
160862306a36Sopenharmony_ci
160962306a36Sopenharmony_ci	conn_param.retry_count = SMBD_CM_RETRY;
161062306a36Sopenharmony_ci	conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY;
161162306a36Sopenharmony_ci	conn_param.flow_control = 0;
161262306a36Sopenharmony_ci
161362306a36Sopenharmony_ci	log_rdma_event(INFO, "connecting to IP %pI4 port %d\n",
161462306a36Sopenharmony_ci		&addr_in->sin_addr, port);
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci	init_waitqueue_head(&info->conn_wait);
161762306a36Sopenharmony_ci	init_waitqueue_head(&info->disconn_wait);
161862306a36Sopenharmony_ci	init_waitqueue_head(&info->wait_reassembly_queue);
161962306a36Sopenharmony_ci	rc = rdma_connect(info->id, &conn_param);
162062306a36Sopenharmony_ci	if (rc) {
162162306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
162262306a36Sopenharmony_ci		goto rdma_connect_failed;
162362306a36Sopenharmony_ci	}
162462306a36Sopenharmony_ci
162562306a36Sopenharmony_ci	wait_event_interruptible(
162662306a36Sopenharmony_ci		info->conn_wait, info->transport_status != SMBD_CONNECTING);
162762306a36Sopenharmony_ci
162862306a36Sopenharmony_ci	if (info->transport_status != SMBD_CONNECTED) {
162962306a36Sopenharmony_ci		log_rdma_event(ERR, "rdma_connect failed port=%d\n", port);
163062306a36Sopenharmony_ci		goto rdma_connect_failed;
163162306a36Sopenharmony_ci	}
163262306a36Sopenharmony_ci
163362306a36Sopenharmony_ci	log_rdma_event(INFO, "rdma_connect connected\n");
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	rc = allocate_caches_and_workqueue(info);
163662306a36Sopenharmony_ci	if (rc) {
163762306a36Sopenharmony_ci		log_rdma_event(ERR, "cache allocation failed\n");
163862306a36Sopenharmony_ci		goto allocate_cache_failed;
163962306a36Sopenharmony_ci	}
164062306a36Sopenharmony_ci
164162306a36Sopenharmony_ci	init_waitqueue_head(&info->wait_send_queue);
164262306a36Sopenharmony_ci	INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
164362306a36Sopenharmony_ci	queue_delayed_work(info->workqueue, &info->idle_timer_work,
164462306a36Sopenharmony_ci		info->keep_alive_interval*HZ);
164562306a36Sopenharmony_ci
164662306a36Sopenharmony_ci	init_waitqueue_head(&info->wait_send_pending);
164762306a36Sopenharmony_ci	atomic_set(&info->send_pending, 0);
164862306a36Sopenharmony_ci
164962306a36Sopenharmony_ci	init_waitqueue_head(&info->wait_post_send);
165062306a36Sopenharmony_ci
165162306a36Sopenharmony_ci	INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work);
165262306a36Sopenharmony_ci	INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits);
165362306a36Sopenharmony_ci	info->new_credits_offered = 0;
165462306a36Sopenharmony_ci	spin_lock_init(&info->lock_new_credits_offered);
165562306a36Sopenharmony_ci
165662306a36Sopenharmony_ci	rc = smbd_negotiate(info);
165762306a36Sopenharmony_ci	if (rc) {
165862306a36Sopenharmony_ci		log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc);
165962306a36Sopenharmony_ci		goto negotiation_failed;
166062306a36Sopenharmony_ci	}
166162306a36Sopenharmony_ci
166262306a36Sopenharmony_ci	rc = allocate_mr_list(info);
166362306a36Sopenharmony_ci	if (rc) {
166462306a36Sopenharmony_ci		log_rdma_mr(ERR, "memory registration allocation failed\n");
166562306a36Sopenharmony_ci		goto allocate_mr_failed;
166662306a36Sopenharmony_ci	}
166762306a36Sopenharmony_ci
166862306a36Sopenharmony_ci	return info;
166962306a36Sopenharmony_ci
167062306a36Sopenharmony_ciallocate_mr_failed:
167162306a36Sopenharmony_ci	/* At this point, need to a full transport shutdown */
167262306a36Sopenharmony_ci	server->smbd_conn = info;
167362306a36Sopenharmony_ci	smbd_destroy(server);
167462306a36Sopenharmony_ci	return NULL;
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_cinegotiation_failed:
167762306a36Sopenharmony_ci	cancel_delayed_work_sync(&info->idle_timer_work);
167862306a36Sopenharmony_ci	destroy_caches_and_workqueue(info);
167962306a36Sopenharmony_ci	info->transport_status = SMBD_NEGOTIATE_FAILED;
168062306a36Sopenharmony_ci	init_waitqueue_head(&info->conn_wait);
168162306a36Sopenharmony_ci	rdma_disconnect(info->id);
168262306a36Sopenharmony_ci	wait_event(info->conn_wait,
168362306a36Sopenharmony_ci		info->transport_status == SMBD_DISCONNECTED);
168462306a36Sopenharmony_ci
168562306a36Sopenharmony_ciallocate_cache_failed:
168662306a36Sopenharmony_cirdma_connect_failed:
168762306a36Sopenharmony_ci	rdma_destroy_qp(info->id);
168862306a36Sopenharmony_ci
168962306a36Sopenharmony_cicreate_qp_failed:
169062306a36Sopenharmony_cialloc_cq_failed:
169162306a36Sopenharmony_ci	if (info->send_cq)
169262306a36Sopenharmony_ci		ib_free_cq(info->send_cq);
169362306a36Sopenharmony_ci	if (info->recv_cq)
169462306a36Sopenharmony_ci		ib_free_cq(info->recv_cq);
169562306a36Sopenharmony_ci
169662306a36Sopenharmony_ciconfig_failed:
169762306a36Sopenharmony_ci	ib_dealloc_pd(info->pd);
169862306a36Sopenharmony_ci	rdma_destroy_id(info->id);
169962306a36Sopenharmony_ci
170062306a36Sopenharmony_cicreate_id_failed:
170162306a36Sopenharmony_ci	kfree(info);
170262306a36Sopenharmony_ci	return NULL;
170362306a36Sopenharmony_ci}
170462306a36Sopenharmony_ci
170562306a36Sopenharmony_cistruct smbd_connection *smbd_get_connection(
170662306a36Sopenharmony_ci	struct TCP_Server_Info *server, struct sockaddr *dstaddr)
170762306a36Sopenharmony_ci{
170862306a36Sopenharmony_ci	struct smbd_connection *ret;
170962306a36Sopenharmony_ci	int port = SMBD_PORT;
171062306a36Sopenharmony_ci
171162306a36Sopenharmony_citry_again:
171262306a36Sopenharmony_ci	ret = _smbd_get_connection(server, dstaddr, port);
171362306a36Sopenharmony_ci
171462306a36Sopenharmony_ci	/* Try SMB_PORT if SMBD_PORT doesn't work */
171562306a36Sopenharmony_ci	if (!ret && port == SMBD_PORT) {
171662306a36Sopenharmony_ci		port = SMB_PORT;
171762306a36Sopenharmony_ci		goto try_again;
171862306a36Sopenharmony_ci	}
171962306a36Sopenharmony_ci	return ret;
172062306a36Sopenharmony_ci}
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci/*
172362306a36Sopenharmony_ci * Receive data from receive reassembly queue
172462306a36Sopenharmony_ci * All the incoming data packets are placed in reassembly queue
172562306a36Sopenharmony_ci * buf: the buffer to read data into
172662306a36Sopenharmony_ci * size: the length of data to read
172762306a36Sopenharmony_ci * return value: actual data read
172862306a36Sopenharmony_ci * Note: this implementation copies the data from reassebmly queue to receive
172962306a36Sopenharmony_ci * buffers used by upper layer. This is not the optimal code path. A better way
173062306a36Sopenharmony_ci * to do it is to not have upper layer allocate its receive buffers but rather
173162306a36Sopenharmony_ci * borrow the buffer from reassembly queue, and return it after data is
173262306a36Sopenharmony_ci * consumed. But this will require more changes to upper layer code, and also
173362306a36Sopenharmony_ci * need to consider packet boundaries while they still being reassembled.
173462306a36Sopenharmony_ci */
173562306a36Sopenharmony_cistatic int smbd_recv_buf(struct smbd_connection *info, char *buf,
173662306a36Sopenharmony_ci		unsigned int size)
173762306a36Sopenharmony_ci{
173862306a36Sopenharmony_ci	struct smbd_response *response;
173962306a36Sopenharmony_ci	struct smbd_data_transfer *data_transfer;
174062306a36Sopenharmony_ci	int to_copy, to_read, data_read, offset;
174162306a36Sopenharmony_ci	u32 data_length, remaining_data_length, data_offset;
174262306a36Sopenharmony_ci	int rc;
174362306a36Sopenharmony_ci
174462306a36Sopenharmony_ciagain:
174562306a36Sopenharmony_ci	/*
174662306a36Sopenharmony_ci	 * No need to hold the reassembly queue lock all the time as we are
174762306a36Sopenharmony_ci	 * the only one reading from the front of the queue. The transport
174862306a36Sopenharmony_ci	 * may add more entries to the back of the queue at the same time
174962306a36Sopenharmony_ci	 */
175062306a36Sopenharmony_ci	log_read(INFO, "size=%d info->reassembly_data_length=%d\n", size,
175162306a36Sopenharmony_ci		info->reassembly_data_length);
175262306a36Sopenharmony_ci	if (info->reassembly_data_length >= size) {
175362306a36Sopenharmony_ci		int queue_length;
175462306a36Sopenharmony_ci		int queue_removed = 0;
175562306a36Sopenharmony_ci
175662306a36Sopenharmony_ci		/*
175762306a36Sopenharmony_ci		 * Need to make sure reassembly_data_length is read before
175862306a36Sopenharmony_ci		 * reading reassembly_queue_length and calling
175962306a36Sopenharmony_ci		 * _get_first_reassembly. This call is lock free
176062306a36Sopenharmony_ci		 * as we never read at the end of the queue which are being
176162306a36Sopenharmony_ci		 * updated in SOFTIRQ as more data is received
176262306a36Sopenharmony_ci		 */
176362306a36Sopenharmony_ci		virt_rmb();
176462306a36Sopenharmony_ci		queue_length = info->reassembly_queue_length;
176562306a36Sopenharmony_ci		data_read = 0;
176662306a36Sopenharmony_ci		to_read = size;
176762306a36Sopenharmony_ci		offset = info->first_entry_offset;
176862306a36Sopenharmony_ci		while (data_read < size) {
176962306a36Sopenharmony_ci			response = _get_first_reassembly(info);
177062306a36Sopenharmony_ci			data_transfer = smbd_response_payload(response);
177162306a36Sopenharmony_ci			data_length = le32_to_cpu(data_transfer->data_length);
177262306a36Sopenharmony_ci			remaining_data_length =
177362306a36Sopenharmony_ci				le32_to_cpu(
177462306a36Sopenharmony_ci					data_transfer->remaining_data_length);
177562306a36Sopenharmony_ci			data_offset = le32_to_cpu(data_transfer->data_offset);
177662306a36Sopenharmony_ci
177762306a36Sopenharmony_ci			/*
177862306a36Sopenharmony_ci			 * The upper layer expects RFC1002 length at the
177962306a36Sopenharmony_ci			 * beginning of the payload. Return it to indicate
178062306a36Sopenharmony_ci			 * the total length of the packet. This minimize the
178162306a36Sopenharmony_ci			 * change to upper layer packet processing logic. This
178262306a36Sopenharmony_ci			 * will be eventually remove when an intermediate
178362306a36Sopenharmony_ci			 * transport layer is added
178462306a36Sopenharmony_ci			 */
178562306a36Sopenharmony_ci			if (response->first_segment && size == 4) {
178662306a36Sopenharmony_ci				unsigned int rfc1002_len =
178762306a36Sopenharmony_ci					data_length + remaining_data_length;
178862306a36Sopenharmony_ci				*((__be32 *)buf) = cpu_to_be32(rfc1002_len);
178962306a36Sopenharmony_ci				data_read = 4;
179062306a36Sopenharmony_ci				response->first_segment = false;
179162306a36Sopenharmony_ci				log_read(INFO, "returning rfc1002 length %d\n",
179262306a36Sopenharmony_ci					rfc1002_len);
179362306a36Sopenharmony_ci				goto read_rfc1002_done;
179462306a36Sopenharmony_ci			}
179562306a36Sopenharmony_ci
179662306a36Sopenharmony_ci			to_copy = min_t(int, data_length - offset, to_read);
179762306a36Sopenharmony_ci			memcpy(
179862306a36Sopenharmony_ci				buf + data_read,
179962306a36Sopenharmony_ci				(char *)data_transfer + data_offset + offset,
180062306a36Sopenharmony_ci				to_copy);
180162306a36Sopenharmony_ci
180262306a36Sopenharmony_ci			/* move on to the next buffer? */
180362306a36Sopenharmony_ci			if (to_copy == data_length - offset) {
180462306a36Sopenharmony_ci				queue_length--;
180562306a36Sopenharmony_ci				/*
180662306a36Sopenharmony_ci				 * No need to lock if we are not at the
180762306a36Sopenharmony_ci				 * end of the queue
180862306a36Sopenharmony_ci				 */
180962306a36Sopenharmony_ci				if (queue_length)
181062306a36Sopenharmony_ci					list_del(&response->list);
181162306a36Sopenharmony_ci				else {
181262306a36Sopenharmony_ci					spin_lock_irq(
181362306a36Sopenharmony_ci						&info->reassembly_queue_lock);
181462306a36Sopenharmony_ci					list_del(&response->list);
181562306a36Sopenharmony_ci					spin_unlock_irq(
181662306a36Sopenharmony_ci						&info->reassembly_queue_lock);
181762306a36Sopenharmony_ci				}
181862306a36Sopenharmony_ci				queue_removed++;
181962306a36Sopenharmony_ci				info->count_reassembly_queue--;
182062306a36Sopenharmony_ci				info->count_dequeue_reassembly_queue++;
182162306a36Sopenharmony_ci				put_receive_buffer(info, response);
182262306a36Sopenharmony_ci				offset = 0;
182362306a36Sopenharmony_ci				log_read(INFO, "put_receive_buffer offset=0\n");
182462306a36Sopenharmony_ci			} else
182562306a36Sopenharmony_ci				offset += to_copy;
182662306a36Sopenharmony_ci
182762306a36Sopenharmony_ci			to_read -= to_copy;
182862306a36Sopenharmony_ci			data_read += to_copy;
182962306a36Sopenharmony_ci
183062306a36Sopenharmony_ci			log_read(INFO, "_get_first_reassembly memcpy %d bytes data_transfer_length-offset=%d after that to_read=%d data_read=%d offset=%d\n",
183162306a36Sopenharmony_ci				 to_copy, data_length - offset,
183262306a36Sopenharmony_ci				 to_read, data_read, offset);
183362306a36Sopenharmony_ci		}
183462306a36Sopenharmony_ci
183562306a36Sopenharmony_ci		spin_lock_irq(&info->reassembly_queue_lock);
183662306a36Sopenharmony_ci		info->reassembly_data_length -= data_read;
183762306a36Sopenharmony_ci		info->reassembly_queue_length -= queue_removed;
183862306a36Sopenharmony_ci		spin_unlock_irq(&info->reassembly_queue_lock);
183962306a36Sopenharmony_ci
184062306a36Sopenharmony_ci		info->first_entry_offset = offset;
184162306a36Sopenharmony_ci		log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n",
184262306a36Sopenharmony_ci			 data_read, info->reassembly_data_length,
184362306a36Sopenharmony_ci			 info->first_entry_offset);
184462306a36Sopenharmony_ciread_rfc1002_done:
184562306a36Sopenharmony_ci		return data_read;
184662306a36Sopenharmony_ci	}
184762306a36Sopenharmony_ci
184862306a36Sopenharmony_ci	log_read(INFO, "wait_event on more data\n");
184962306a36Sopenharmony_ci	rc = wait_event_interruptible(
185062306a36Sopenharmony_ci		info->wait_reassembly_queue,
185162306a36Sopenharmony_ci		info->reassembly_data_length >= size ||
185262306a36Sopenharmony_ci			info->transport_status != SMBD_CONNECTED);
185362306a36Sopenharmony_ci	/* Don't return any data if interrupted */
185462306a36Sopenharmony_ci	if (rc)
185562306a36Sopenharmony_ci		return rc;
185662306a36Sopenharmony_ci
185762306a36Sopenharmony_ci	if (info->transport_status != SMBD_CONNECTED) {
185862306a36Sopenharmony_ci		log_read(ERR, "disconnected\n");
185962306a36Sopenharmony_ci		return -ECONNABORTED;
186062306a36Sopenharmony_ci	}
186162306a36Sopenharmony_ci
186262306a36Sopenharmony_ci	goto again;
186362306a36Sopenharmony_ci}
186462306a36Sopenharmony_ci
186562306a36Sopenharmony_ci/*
186662306a36Sopenharmony_ci * Receive a page from receive reassembly queue
186762306a36Sopenharmony_ci * page: the page to read data into
186862306a36Sopenharmony_ci * to_read: the length of data to read
186962306a36Sopenharmony_ci * return value: actual data read
187062306a36Sopenharmony_ci */
187162306a36Sopenharmony_cistatic int smbd_recv_page(struct smbd_connection *info,
187262306a36Sopenharmony_ci		struct page *page, unsigned int page_offset,
187362306a36Sopenharmony_ci		unsigned int to_read)
187462306a36Sopenharmony_ci{
187562306a36Sopenharmony_ci	int ret;
187662306a36Sopenharmony_ci	char *to_address;
187762306a36Sopenharmony_ci	void *page_address;
187862306a36Sopenharmony_ci
187962306a36Sopenharmony_ci	/* make sure we have the page ready for read */
188062306a36Sopenharmony_ci	ret = wait_event_interruptible(
188162306a36Sopenharmony_ci		info->wait_reassembly_queue,
188262306a36Sopenharmony_ci		info->reassembly_data_length >= to_read ||
188362306a36Sopenharmony_ci			info->transport_status != SMBD_CONNECTED);
188462306a36Sopenharmony_ci	if (ret)
188562306a36Sopenharmony_ci		return ret;
188662306a36Sopenharmony_ci
188762306a36Sopenharmony_ci	/* now we can read from reassembly queue and not sleep */
188862306a36Sopenharmony_ci	page_address = kmap_atomic(page);
188962306a36Sopenharmony_ci	to_address = (char *) page_address + page_offset;
189062306a36Sopenharmony_ci
189162306a36Sopenharmony_ci	log_read(INFO, "reading from page=%p address=%p to_read=%d\n",
189262306a36Sopenharmony_ci		page, to_address, to_read);
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_ci	ret = smbd_recv_buf(info, to_address, to_read);
189562306a36Sopenharmony_ci	kunmap_atomic(page_address);
189662306a36Sopenharmony_ci
189762306a36Sopenharmony_ci	return ret;
189862306a36Sopenharmony_ci}
189962306a36Sopenharmony_ci
190062306a36Sopenharmony_ci/*
190162306a36Sopenharmony_ci * Receive data from transport
190262306a36Sopenharmony_ci * msg: a msghdr point to the buffer, can be ITER_KVEC or ITER_BVEC
190362306a36Sopenharmony_ci * return: total bytes read, or 0. SMB Direct will not do partial read.
190462306a36Sopenharmony_ci */
190562306a36Sopenharmony_ciint smbd_recv(struct smbd_connection *info, struct msghdr *msg)
190662306a36Sopenharmony_ci{
190762306a36Sopenharmony_ci	char *buf;
190862306a36Sopenharmony_ci	struct page *page;
190962306a36Sopenharmony_ci	unsigned int to_read, page_offset;
191062306a36Sopenharmony_ci	int rc;
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	if (iov_iter_rw(&msg->msg_iter) == WRITE) {
191362306a36Sopenharmony_ci		/* It's a bug in upper layer to get there */
191462306a36Sopenharmony_ci		cifs_dbg(VFS, "Invalid msg iter dir %u\n",
191562306a36Sopenharmony_ci			 iov_iter_rw(&msg->msg_iter));
191662306a36Sopenharmony_ci		rc = -EINVAL;
191762306a36Sopenharmony_ci		goto out;
191862306a36Sopenharmony_ci	}
191962306a36Sopenharmony_ci
192062306a36Sopenharmony_ci	switch (iov_iter_type(&msg->msg_iter)) {
192162306a36Sopenharmony_ci	case ITER_KVEC:
192262306a36Sopenharmony_ci		buf = msg->msg_iter.kvec->iov_base;
192362306a36Sopenharmony_ci		to_read = msg->msg_iter.kvec->iov_len;
192462306a36Sopenharmony_ci		rc = smbd_recv_buf(info, buf, to_read);
192562306a36Sopenharmony_ci		break;
192662306a36Sopenharmony_ci
192762306a36Sopenharmony_ci	case ITER_BVEC:
192862306a36Sopenharmony_ci		page = msg->msg_iter.bvec->bv_page;
192962306a36Sopenharmony_ci		page_offset = msg->msg_iter.bvec->bv_offset;
193062306a36Sopenharmony_ci		to_read = msg->msg_iter.bvec->bv_len;
193162306a36Sopenharmony_ci		rc = smbd_recv_page(info, page, page_offset, to_read);
193262306a36Sopenharmony_ci		break;
193362306a36Sopenharmony_ci
193462306a36Sopenharmony_ci	default:
193562306a36Sopenharmony_ci		/* It's a bug in upper layer to get there */
193662306a36Sopenharmony_ci		cifs_dbg(VFS, "Invalid msg type %d\n",
193762306a36Sopenharmony_ci			 iov_iter_type(&msg->msg_iter));
193862306a36Sopenharmony_ci		rc = -EINVAL;
193962306a36Sopenharmony_ci	}
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ciout:
194262306a36Sopenharmony_ci	/* SMBDirect will read it all or nothing */
194362306a36Sopenharmony_ci	if (rc > 0)
194462306a36Sopenharmony_ci		msg->msg_iter.count = 0;
194562306a36Sopenharmony_ci	return rc;
194662306a36Sopenharmony_ci}
194762306a36Sopenharmony_ci
194862306a36Sopenharmony_ci/*
194962306a36Sopenharmony_ci * Send data to transport
195062306a36Sopenharmony_ci * Each rqst is transported as a SMBDirect payload
195162306a36Sopenharmony_ci * rqst: the data to write
195262306a36Sopenharmony_ci * return value: 0 if successfully write, otherwise error code
195362306a36Sopenharmony_ci */
195462306a36Sopenharmony_ciint smbd_send(struct TCP_Server_Info *server,
195562306a36Sopenharmony_ci	int num_rqst, struct smb_rqst *rqst_array)
195662306a36Sopenharmony_ci{
195762306a36Sopenharmony_ci	struct smbd_connection *info = server->smbd_conn;
195862306a36Sopenharmony_ci	struct smb_rqst *rqst;
195962306a36Sopenharmony_ci	struct iov_iter iter;
196062306a36Sopenharmony_ci	unsigned int remaining_data_length, klen;
196162306a36Sopenharmony_ci	int rc, i, rqst_idx;
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	if (info->transport_status != SMBD_CONNECTED)
196462306a36Sopenharmony_ci		return -EAGAIN;
196562306a36Sopenharmony_ci
196662306a36Sopenharmony_ci	/*
196762306a36Sopenharmony_ci	 * Add in the page array if there is one. The caller needs to set
196862306a36Sopenharmony_ci	 * rq_tailsz to PAGE_SIZE when the buffer has multiple pages and
196962306a36Sopenharmony_ci	 * ends at page boundary
197062306a36Sopenharmony_ci	 */
197162306a36Sopenharmony_ci	remaining_data_length = 0;
197262306a36Sopenharmony_ci	for (i = 0; i < num_rqst; i++)
197362306a36Sopenharmony_ci		remaining_data_length += smb_rqst_len(server, &rqst_array[i]);
197462306a36Sopenharmony_ci
197562306a36Sopenharmony_ci	if (unlikely(remaining_data_length > info->max_fragmented_send_size)) {
197662306a36Sopenharmony_ci		/* assertion: payload never exceeds negotiated maximum */
197762306a36Sopenharmony_ci		log_write(ERR, "payload size %d > max size %d\n",
197862306a36Sopenharmony_ci			remaining_data_length, info->max_fragmented_send_size);
197962306a36Sopenharmony_ci		return -EINVAL;
198062306a36Sopenharmony_ci	}
198162306a36Sopenharmony_ci
198262306a36Sopenharmony_ci	log_write(INFO, "num_rqst=%d total length=%u\n",
198362306a36Sopenharmony_ci			num_rqst, remaining_data_length);
198462306a36Sopenharmony_ci
198562306a36Sopenharmony_ci	rqst_idx = 0;
198662306a36Sopenharmony_ci	do {
198762306a36Sopenharmony_ci		rqst = &rqst_array[rqst_idx];
198862306a36Sopenharmony_ci
198962306a36Sopenharmony_ci		cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n",
199062306a36Sopenharmony_ci			 rqst_idx, smb_rqst_len(server, rqst));
199162306a36Sopenharmony_ci		for (i = 0; i < rqst->rq_nvec; i++)
199262306a36Sopenharmony_ci			dump_smb(rqst->rq_iov[i].iov_base, rqst->rq_iov[i].iov_len);
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_ci		log_write(INFO, "RDMA-WR[%u] nvec=%d len=%u iter=%zu rqlen=%lu\n",
199562306a36Sopenharmony_ci			  rqst_idx, rqst->rq_nvec, remaining_data_length,
199662306a36Sopenharmony_ci			  iov_iter_count(&rqst->rq_iter), smb_rqst_len(server, rqst));
199762306a36Sopenharmony_ci
199862306a36Sopenharmony_ci		/* Send the metadata pages. */
199962306a36Sopenharmony_ci		klen = 0;
200062306a36Sopenharmony_ci		for (i = 0; i < rqst->rq_nvec; i++)
200162306a36Sopenharmony_ci			klen += rqst->rq_iov[i].iov_len;
200262306a36Sopenharmony_ci		iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen);
200362306a36Sopenharmony_ci
200462306a36Sopenharmony_ci		rc = smbd_post_send_iter(info, &iter, &remaining_data_length);
200562306a36Sopenharmony_ci		if (rc < 0)
200662306a36Sopenharmony_ci			break;
200762306a36Sopenharmony_ci
200862306a36Sopenharmony_ci		if (iov_iter_count(&rqst->rq_iter) > 0) {
200962306a36Sopenharmony_ci			/* And then the data pages if there are any */
201062306a36Sopenharmony_ci			rc = smbd_post_send_iter(info, &rqst->rq_iter,
201162306a36Sopenharmony_ci						 &remaining_data_length);
201262306a36Sopenharmony_ci			if (rc < 0)
201362306a36Sopenharmony_ci				break;
201462306a36Sopenharmony_ci		}
201562306a36Sopenharmony_ci
201662306a36Sopenharmony_ci	} while (++rqst_idx < num_rqst);
201762306a36Sopenharmony_ci
201862306a36Sopenharmony_ci	/*
201962306a36Sopenharmony_ci	 * As an optimization, we don't wait for individual I/O to finish
202062306a36Sopenharmony_ci	 * before sending the next one.
202162306a36Sopenharmony_ci	 * Send them all and wait for pending send count to get to 0
202262306a36Sopenharmony_ci	 * that means all the I/Os have been out and we are good to return
202362306a36Sopenharmony_ci	 */
202462306a36Sopenharmony_ci
202562306a36Sopenharmony_ci	wait_event(info->wait_send_pending,
202662306a36Sopenharmony_ci		atomic_read(&info->send_pending) == 0);
202762306a36Sopenharmony_ci
202862306a36Sopenharmony_ci	return rc;
202962306a36Sopenharmony_ci}
203062306a36Sopenharmony_ci
203162306a36Sopenharmony_cistatic void register_mr_done(struct ib_cq *cq, struct ib_wc *wc)
203262306a36Sopenharmony_ci{
203362306a36Sopenharmony_ci	struct smbd_mr *mr;
203462306a36Sopenharmony_ci	struct ib_cqe *cqe;
203562306a36Sopenharmony_ci
203662306a36Sopenharmony_ci	if (wc->status) {
203762306a36Sopenharmony_ci		log_rdma_mr(ERR, "status=%d\n", wc->status);
203862306a36Sopenharmony_ci		cqe = wc->wr_cqe;
203962306a36Sopenharmony_ci		mr = container_of(cqe, struct smbd_mr, cqe);
204062306a36Sopenharmony_ci		smbd_disconnect_rdma_connection(mr->conn);
204162306a36Sopenharmony_ci	}
204262306a36Sopenharmony_ci}
204362306a36Sopenharmony_ci
204462306a36Sopenharmony_ci/*
204562306a36Sopenharmony_ci * The work queue function that recovers MRs
204662306a36Sopenharmony_ci * We need to call ib_dereg_mr() and ib_alloc_mr() before this MR can be used
204762306a36Sopenharmony_ci * again. Both calls are slow, so finish them in a workqueue. This will not
204862306a36Sopenharmony_ci * block I/O path.
204962306a36Sopenharmony_ci * There is one workqueue that recovers MRs, there is no need to lock as the
205062306a36Sopenharmony_ci * I/O requests calling smbd_register_mr will never update the links in the
205162306a36Sopenharmony_ci * mr_list.
205262306a36Sopenharmony_ci */
205362306a36Sopenharmony_cistatic void smbd_mr_recovery_work(struct work_struct *work)
205462306a36Sopenharmony_ci{
205562306a36Sopenharmony_ci	struct smbd_connection *info =
205662306a36Sopenharmony_ci		container_of(work, struct smbd_connection, mr_recovery_work);
205762306a36Sopenharmony_ci	struct smbd_mr *smbdirect_mr;
205862306a36Sopenharmony_ci	int rc;
205962306a36Sopenharmony_ci
206062306a36Sopenharmony_ci	list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
206162306a36Sopenharmony_ci		if (smbdirect_mr->state == MR_ERROR) {
206262306a36Sopenharmony_ci
206362306a36Sopenharmony_ci			/* recover this MR entry */
206462306a36Sopenharmony_ci			rc = ib_dereg_mr(smbdirect_mr->mr);
206562306a36Sopenharmony_ci			if (rc) {
206662306a36Sopenharmony_ci				log_rdma_mr(ERR,
206762306a36Sopenharmony_ci					"ib_dereg_mr failed rc=%x\n",
206862306a36Sopenharmony_ci					rc);
206962306a36Sopenharmony_ci				smbd_disconnect_rdma_connection(info);
207062306a36Sopenharmony_ci				continue;
207162306a36Sopenharmony_ci			}
207262306a36Sopenharmony_ci
207362306a36Sopenharmony_ci			smbdirect_mr->mr = ib_alloc_mr(
207462306a36Sopenharmony_ci				info->pd, info->mr_type,
207562306a36Sopenharmony_ci				info->max_frmr_depth);
207662306a36Sopenharmony_ci			if (IS_ERR(smbdirect_mr->mr)) {
207762306a36Sopenharmony_ci				log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
207862306a36Sopenharmony_ci					    info->mr_type,
207962306a36Sopenharmony_ci					    info->max_frmr_depth);
208062306a36Sopenharmony_ci				smbd_disconnect_rdma_connection(info);
208162306a36Sopenharmony_ci				continue;
208262306a36Sopenharmony_ci			}
208362306a36Sopenharmony_ci		} else
208462306a36Sopenharmony_ci			/* This MR is being used, don't recover it */
208562306a36Sopenharmony_ci			continue;
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci		smbdirect_mr->state = MR_READY;
208862306a36Sopenharmony_ci
208962306a36Sopenharmony_ci		/* smbdirect_mr->state is updated by this function
209062306a36Sopenharmony_ci		 * and is read and updated by I/O issuing CPUs trying
209162306a36Sopenharmony_ci		 * to get a MR, the call to atomic_inc_return
209262306a36Sopenharmony_ci		 * implicates a memory barrier and guarantees this
209362306a36Sopenharmony_ci		 * value is updated before waking up any calls to
209462306a36Sopenharmony_ci		 * get_mr() from the I/O issuing CPUs
209562306a36Sopenharmony_ci		 */
209662306a36Sopenharmony_ci		if (atomic_inc_return(&info->mr_ready_count) == 1)
209762306a36Sopenharmony_ci			wake_up_interruptible(&info->wait_mr);
209862306a36Sopenharmony_ci	}
209962306a36Sopenharmony_ci}
210062306a36Sopenharmony_ci
210162306a36Sopenharmony_cistatic void destroy_mr_list(struct smbd_connection *info)
210262306a36Sopenharmony_ci{
210362306a36Sopenharmony_ci	struct smbd_mr *mr, *tmp;
210462306a36Sopenharmony_ci
210562306a36Sopenharmony_ci	cancel_work_sync(&info->mr_recovery_work);
210662306a36Sopenharmony_ci	list_for_each_entry_safe(mr, tmp, &info->mr_list, list) {
210762306a36Sopenharmony_ci		if (mr->state == MR_INVALIDATED)
210862306a36Sopenharmony_ci			ib_dma_unmap_sg(info->id->device, mr->sgt.sgl,
210962306a36Sopenharmony_ci				mr->sgt.nents, mr->dir);
211062306a36Sopenharmony_ci		ib_dereg_mr(mr->mr);
211162306a36Sopenharmony_ci		kfree(mr->sgt.sgl);
211262306a36Sopenharmony_ci		kfree(mr);
211362306a36Sopenharmony_ci	}
211462306a36Sopenharmony_ci}
211562306a36Sopenharmony_ci
211662306a36Sopenharmony_ci/*
211762306a36Sopenharmony_ci * Allocate MRs used for RDMA read/write
211862306a36Sopenharmony_ci * The number of MRs will not exceed hardware capability in responder_resources
211962306a36Sopenharmony_ci * All MRs are kept in mr_list. The MR can be recovered after it's used
212062306a36Sopenharmony_ci * Recovery is done in smbd_mr_recovery_work. The content of list entry changes
212162306a36Sopenharmony_ci * as MRs are used and recovered for I/O, but the list links will not change
212262306a36Sopenharmony_ci */
212362306a36Sopenharmony_cistatic int allocate_mr_list(struct smbd_connection *info)
212462306a36Sopenharmony_ci{
212562306a36Sopenharmony_ci	int i;
212662306a36Sopenharmony_ci	struct smbd_mr *smbdirect_mr, *tmp;
212762306a36Sopenharmony_ci
212862306a36Sopenharmony_ci	INIT_LIST_HEAD(&info->mr_list);
212962306a36Sopenharmony_ci	init_waitqueue_head(&info->wait_mr);
213062306a36Sopenharmony_ci	spin_lock_init(&info->mr_list_lock);
213162306a36Sopenharmony_ci	atomic_set(&info->mr_ready_count, 0);
213262306a36Sopenharmony_ci	atomic_set(&info->mr_used_count, 0);
213362306a36Sopenharmony_ci	init_waitqueue_head(&info->wait_for_mr_cleanup);
213462306a36Sopenharmony_ci	INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work);
213562306a36Sopenharmony_ci	/* Allocate more MRs (2x) than hardware responder_resources */
213662306a36Sopenharmony_ci	for (i = 0; i < info->responder_resources * 2; i++) {
213762306a36Sopenharmony_ci		smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
213862306a36Sopenharmony_ci		if (!smbdirect_mr)
213962306a36Sopenharmony_ci			goto out;
214062306a36Sopenharmony_ci		smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type,
214162306a36Sopenharmony_ci					info->max_frmr_depth);
214262306a36Sopenharmony_ci		if (IS_ERR(smbdirect_mr->mr)) {
214362306a36Sopenharmony_ci			log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
214462306a36Sopenharmony_ci				    info->mr_type, info->max_frmr_depth);
214562306a36Sopenharmony_ci			goto out;
214662306a36Sopenharmony_ci		}
214762306a36Sopenharmony_ci		smbdirect_mr->sgt.sgl = kcalloc(info->max_frmr_depth,
214862306a36Sopenharmony_ci						sizeof(struct scatterlist),
214962306a36Sopenharmony_ci						GFP_KERNEL);
215062306a36Sopenharmony_ci		if (!smbdirect_mr->sgt.sgl) {
215162306a36Sopenharmony_ci			log_rdma_mr(ERR, "failed to allocate sgl\n");
215262306a36Sopenharmony_ci			ib_dereg_mr(smbdirect_mr->mr);
215362306a36Sopenharmony_ci			goto out;
215462306a36Sopenharmony_ci		}
215562306a36Sopenharmony_ci		smbdirect_mr->state = MR_READY;
215662306a36Sopenharmony_ci		smbdirect_mr->conn = info;
215762306a36Sopenharmony_ci
215862306a36Sopenharmony_ci		list_add_tail(&smbdirect_mr->list, &info->mr_list);
215962306a36Sopenharmony_ci		atomic_inc(&info->mr_ready_count);
216062306a36Sopenharmony_ci	}
216162306a36Sopenharmony_ci	return 0;
216262306a36Sopenharmony_ci
216362306a36Sopenharmony_ciout:
216462306a36Sopenharmony_ci	kfree(smbdirect_mr);
216562306a36Sopenharmony_ci
216662306a36Sopenharmony_ci	list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) {
216762306a36Sopenharmony_ci		list_del(&smbdirect_mr->list);
216862306a36Sopenharmony_ci		ib_dereg_mr(smbdirect_mr->mr);
216962306a36Sopenharmony_ci		kfree(smbdirect_mr->sgt.sgl);
217062306a36Sopenharmony_ci		kfree(smbdirect_mr);
217162306a36Sopenharmony_ci	}
217262306a36Sopenharmony_ci	return -ENOMEM;
217362306a36Sopenharmony_ci}
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ci/*
217662306a36Sopenharmony_ci * Get a MR from mr_list. This function waits until there is at least one
217762306a36Sopenharmony_ci * MR available in the list. It may access the list while the
217862306a36Sopenharmony_ci * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock
217962306a36Sopenharmony_ci * as they never modify the same places. However, there may be several CPUs
218062306a36Sopenharmony_ci * issueing I/O trying to get MR at the same time, mr_list_lock is used to
218162306a36Sopenharmony_ci * protect this situation.
218262306a36Sopenharmony_ci */
218362306a36Sopenharmony_cistatic struct smbd_mr *get_mr(struct smbd_connection *info)
218462306a36Sopenharmony_ci{
218562306a36Sopenharmony_ci	struct smbd_mr *ret;
218662306a36Sopenharmony_ci	int rc;
218762306a36Sopenharmony_ciagain:
218862306a36Sopenharmony_ci	rc = wait_event_interruptible(info->wait_mr,
218962306a36Sopenharmony_ci		atomic_read(&info->mr_ready_count) ||
219062306a36Sopenharmony_ci		info->transport_status != SMBD_CONNECTED);
219162306a36Sopenharmony_ci	if (rc) {
219262306a36Sopenharmony_ci		log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc);
219362306a36Sopenharmony_ci		return NULL;
219462306a36Sopenharmony_ci	}
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci	if (info->transport_status != SMBD_CONNECTED) {
219762306a36Sopenharmony_ci		log_rdma_mr(ERR, "info->transport_status=%x\n",
219862306a36Sopenharmony_ci			info->transport_status);
219962306a36Sopenharmony_ci		return NULL;
220062306a36Sopenharmony_ci	}
220162306a36Sopenharmony_ci
220262306a36Sopenharmony_ci	spin_lock(&info->mr_list_lock);
220362306a36Sopenharmony_ci	list_for_each_entry(ret, &info->mr_list, list) {
220462306a36Sopenharmony_ci		if (ret->state == MR_READY) {
220562306a36Sopenharmony_ci			ret->state = MR_REGISTERED;
220662306a36Sopenharmony_ci			spin_unlock(&info->mr_list_lock);
220762306a36Sopenharmony_ci			atomic_dec(&info->mr_ready_count);
220862306a36Sopenharmony_ci			atomic_inc(&info->mr_used_count);
220962306a36Sopenharmony_ci			return ret;
221062306a36Sopenharmony_ci		}
221162306a36Sopenharmony_ci	}
221262306a36Sopenharmony_ci
221362306a36Sopenharmony_ci	spin_unlock(&info->mr_list_lock);
221462306a36Sopenharmony_ci	/*
221562306a36Sopenharmony_ci	 * It is possible that we could fail to get MR because other processes may
221662306a36Sopenharmony_ci	 * try to acquire a MR at the same time. If this is the case, retry it.
221762306a36Sopenharmony_ci	 */
221862306a36Sopenharmony_ci	goto again;
221962306a36Sopenharmony_ci}
222062306a36Sopenharmony_ci
222162306a36Sopenharmony_ci/*
222262306a36Sopenharmony_ci * Transcribe the pages from an iterator into an MR scatterlist.
222362306a36Sopenharmony_ci */
222462306a36Sopenharmony_cistatic int smbd_iter_to_mr(struct smbd_connection *info,
222562306a36Sopenharmony_ci			   struct iov_iter *iter,
222662306a36Sopenharmony_ci			   struct sg_table *sgt,
222762306a36Sopenharmony_ci			   unsigned int max_sg)
222862306a36Sopenharmony_ci{
222962306a36Sopenharmony_ci	int ret;
223062306a36Sopenharmony_ci
223162306a36Sopenharmony_ci	memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist));
223262306a36Sopenharmony_ci
223362306a36Sopenharmony_ci	ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0);
223462306a36Sopenharmony_ci	WARN_ON(ret < 0);
223562306a36Sopenharmony_ci	if (sgt->nents > 0)
223662306a36Sopenharmony_ci		sg_mark_end(&sgt->sgl[sgt->nents - 1]);
223762306a36Sopenharmony_ci	return ret;
223862306a36Sopenharmony_ci}
223962306a36Sopenharmony_ci
224062306a36Sopenharmony_ci/*
224162306a36Sopenharmony_ci * Register memory for RDMA read/write
224262306a36Sopenharmony_ci * iter: the buffer to register memory with
224362306a36Sopenharmony_ci * writing: true if this is a RDMA write (SMB read), false for RDMA read
224462306a36Sopenharmony_ci * need_invalidate: true if this MR needs to be locally invalidated after I/O
224562306a36Sopenharmony_ci * return value: the MR registered, NULL if failed.
224662306a36Sopenharmony_ci */
224762306a36Sopenharmony_cistruct smbd_mr *smbd_register_mr(struct smbd_connection *info,
224862306a36Sopenharmony_ci				 struct iov_iter *iter,
224962306a36Sopenharmony_ci				 bool writing, bool need_invalidate)
225062306a36Sopenharmony_ci{
225162306a36Sopenharmony_ci	struct smbd_mr *smbdirect_mr;
225262306a36Sopenharmony_ci	int rc, num_pages;
225362306a36Sopenharmony_ci	enum dma_data_direction dir;
225462306a36Sopenharmony_ci	struct ib_reg_wr *reg_wr;
225562306a36Sopenharmony_ci
225662306a36Sopenharmony_ci	num_pages = iov_iter_npages(iter, info->max_frmr_depth + 1);
225762306a36Sopenharmony_ci	if (num_pages > info->max_frmr_depth) {
225862306a36Sopenharmony_ci		log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n",
225962306a36Sopenharmony_ci			num_pages, info->max_frmr_depth);
226062306a36Sopenharmony_ci		WARN_ON_ONCE(1);
226162306a36Sopenharmony_ci		return NULL;
226262306a36Sopenharmony_ci	}
226362306a36Sopenharmony_ci
226462306a36Sopenharmony_ci	smbdirect_mr = get_mr(info);
226562306a36Sopenharmony_ci	if (!smbdirect_mr) {
226662306a36Sopenharmony_ci		log_rdma_mr(ERR, "get_mr returning NULL\n");
226762306a36Sopenharmony_ci		return NULL;
226862306a36Sopenharmony_ci	}
226962306a36Sopenharmony_ci
227062306a36Sopenharmony_ci	dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
227162306a36Sopenharmony_ci	smbdirect_mr->dir = dir;
227262306a36Sopenharmony_ci	smbdirect_mr->need_invalidate = need_invalidate;
227362306a36Sopenharmony_ci	smbdirect_mr->sgt.nents = 0;
227462306a36Sopenharmony_ci	smbdirect_mr->sgt.orig_nents = 0;
227562306a36Sopenharmony_ci
227662306a36Sopenharmony_ci	log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n",
227762306a36Sopenharmony_ci		    num_pages, iov_iter_count(iter), info->max_frmr_depth);
227862306a36Sopenharmony_ci	smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth);
227962306a36Sopenharmony_ci
228062306a36Sopenharmony_ci	rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl,
228162306a36Sopenharmony_ci			   smbdirect_mr->sgt.nents, dir);
228262306a36Sopenharmony_ci	if (!rc) {
228362306a36Sopenharmony_ci		log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
228462306a36Sopenharmony_ci			num_pages, dir, rc);
228562306a36Sopenharmony_ci		goto dma_map_error;
228662306a36Sopenharmony_ci	}
228762306a36Sopenharmony_ci
228862306a36Sopenharmony_ci	rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl,
228962306a36Sopenharmony_ci			  smbdirect_mr->sgt.nents, NULL, PAGE_SIZE);
229062306a36Sopenharmony_ci	if (rc != smbdirect_mr->sgt.nents) {
229162306a36Sopenharmony_ci		log_rdma_mr(ERR,
229262306a36Sopenharmony_ci			"ib_map_mr_sg failed rc = %d nents = %x\n",
229362306a36Sopenharmony_ci			rc, smbdirect_mr->sgt.nents);
229462306a36Sopenharmony_ci		goto map_mr_error;
229562306a36Sopenharmony_ci	}
229662306a36Sopenharmony_ci
229762306a36Sopenharmony_ci	ib_update_fast_reg_key(smbdirect_mr->mr,
229862306a36Sopenharmony_ci		ib_inc_rkey(smbdirect_mr->mr->rkey));
229962306a36Sopenharmony_ci	reg_wr = &smbdirect_mr->wr;
230062306a36Sopenharmony_ci	reg_wr->wr.opcode = IB_WR_REG_MR;
230162306a36Sopenharmony_ci	smbdirect_mr->cqe.done = register_mr_done;
230262306a36Sopenharmony_ci	reg_wr->wr.wr_cqe = &smbdirect_mr->cqe;
230362306a36Sopenharmony_ci	reg_wr->wr.num_sge = 0;
230462306a36Sopenharmony_ci	reg_wr->wr.send_flags = IB_SEND_SIGNALED;
230562306a36Sopenharmony_ci	reg_wr->mr = smbdirect_mr->mr;
230662306a36Sopenharmony_ci	reg_wr->key = smbdirect_mr->mr->rkey;
230762306a36Sopenharmony_ci	reg_wr->access = writing ?
230862306a36Sopenharmony_ci			IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
230962306a36Sopenharmony_ci			IB_ACCESS_REMOTE_READ;
231062306a36Sopenharmony_ci
231162306a36Sopenharmony_ci	/*
231262306a36Sopenharmony_ci	 * There is no need for waiting for complemtion on ib_post_send
231362306a36Sopenharmony_ci	 * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution
231462306a36Sopenharmony_ci	 * on the next ib_post_send when we actaully send I/O to remote peer
231562306a36Sopenharmony_ci	 */
231662306a36Sopenharmony_ci	rc = ib_post_send(info->id->qp, &reg_wr->wr, NULL);
231762306a36Sopenharmony_ci	if (!rc)
231862306a36Sopenharmony_ci		return smbdirect_mr;
231962306a36Sopenharmony_ci
232062306a36Sopenharmony_ci	log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n",
232162306a36Sopenharmony_ci		rc, reg_wr->key);
232262306a36Sopenharmony_ci
232362306a36Sopenharmony_ci	/* If all failed, attempt to recover this MR by setting it MR_ERROR*/
232462306a36Sopenharmony_cimap_mr_error:
232562306a36Sopenharmony_ci	ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl,
232662306a36Sopenharmony_ci			smbdirect_mr->sgt.nents, smbdirect_mr->dir);
232762306a36Sopenharmony_ci
232862306a36Sopenharmony_cidma_map_error:
232962306a36Sopenharmony_ci	smbdirect_mr->state = MR_ERROR;
233062306a36Sopenharmony_ci	if (atomic_dec_and_test(&info->mr_used_count))
233162306a36Sopenharmony_ci		wake_up(&info->wait_for_mr_cleanup);
233262306a36Sopenharmony_ci
233362306a36Sopenharmony_ci	smbd_disconnect_rdma_connection(info);
233462306a36Sopenharmony_ci
233562306a36Sopenharmony_ci	return NULL;
233662306a36Sopenharmony_ci}
233762306a36Sopenharmony_ci
233862306a36Sopenharmony_cistatic void local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
233962306a36Sopenharmony_ci{
234062306a36Sopenharmony_ci	struct smbd_mr *smbdirect_mr;
234162306a36Sopenharmony_ci	struct ib_cqe *cqe;
234262306a36Sopenharmony_ci
234362306a36Sopenharmony_ci	cqe = wc->wr_cqe;
234462306a36Sopenharmony_ci	smbdirect_mr = container_of(cqe, struct smbd_mr, cqe);
234562306a36Sopenharmony_ci	smbdirect_mr->state = MR_INVALIDATED;
234662306a36Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS) {
234762306a36Sopenharmony_ci		log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status);
234862306a36Sopenharmony_ci		smbdirect_mr->state = MR_ERROR;
234962306a36Sopenharmony_ci	}
235062306a36Sopenharmony_ci	complete(&smbdirect_mr->invalidate_done);
235162306a36Sopenharmony_ci}
235262306a36Sopenharmony_ci
235362306a36Sopenharmony_ci/*
235462306a36Sopenharmony_ci * Deregister a MR after I/O is done
235562306a36Sopenharmony_ci * This function may wait if remote invalidation is not used
235662306a36Sopenharmony_ci * and we have to locally invalidate the buffer to prevent data is being
235762306a36Sopenharmony_ci * modified by remote peer after upper layer consumes it
235862306a36Sopenharmony_ci */
235962306a36Sopenharmony_ciint smbd_deregister_mr(struct smbd_mr *smbdirect_mr)
236062306a36Sopenharmony_ci{
236162306a36Sopenharmony_ci	struct ib_send_wr *wr;
236262306a36Sopenharmony_ci	struct smbd_connection *info = smbdirect_mr->conn;
236362306a36Sopenharmony_ci	int rc = 0;
236462306a36Sopenharmony_ci
236562306a36Sopenharmony_ci	if (smbdirect_mr->need_invalidate) {
236662306a36Sopenharmony_ci		/* Need to finish local invalidation before returning */
236762306a36Sopenharmony_ci		wr = &smbdirect_mr->inv_wr;
236862306a36Sopenharmony_ci		wr->opcode = IB_WR_LOCAL_INV;
236962306a36Sopenharmony_ci		smbdirect_mr->cqe.done = local_inv_done;
237062306a36Sopenharmony_ci		wr->wr_cqe = &smbdirect_mr->cqe;
237162306a36Sopenharmony_ci		wr->num_sge = 0;
237262306a36Sopenharmony_ci		wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey;
237362306a36Sopenharmony_ci		wr->send_flags = IB_SEND_SIGNALED;
237462306a36Sopenharmony_ci
237562306a36Sopenharmony_ci		init_completion(&smbdirect_mr->invalidate_done);
237662306a36Sopenharmony_ci		rc = ib_post_send(info->id->qp, wr, NULL);
237762306a36Sopenharmony_ci		if (rc) {
237862306a36Sopenharmony_ci			log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
237962306a36Sopenharmony_ci			smbd_disconnect_rdma_connection(info);
238062306a36Sopenharmony_ci			goto done;
238162306a36Sopenharmony_ci		}
238262306a36Sopenharmony_ci		wait_for_completion(&smbdirect_mr->invalidate_done);
238362306a36Sopenharmony_ci		smbdirect_mr->need_invalidate = false;
238462306a36Sopenharmony_ci	} else
238562306a36Sopenharmony_ci		/*
238662306a36Sopenharmony_ci		 * For remote invalidation, just set it to MR_INVALIDATED
238762306a36Sopenharmony_ci		 * and defer to mr_recovery_work to recover the MR for next use
238862306a36Sopenharmony_ci		 */
238962306a36Sopenharmony_ci		smbdirect_mr->state = MR_INVALIDATED;
239062306a36Sopenharmony_ci
239162306a36Sopenharmony_ci	if (smbdirect_mr->state == MR_INVALIDATED) {
239262306a36Sopenharmony_ci		ib_dma_unmap_sg(
239362306a36Sopenharmony_ci			info->id->device, smbdirect_mr->sgt.sgl,
239462306a36Sopenharmony_ci			smbdirect_mr->sgt.nents,
239562306a36Sopenharmony_ci			smbdirect_mr->dir);
239662306a36Sopenharmony_ci		smbdirect_mr->state = MR_READY;
239762306a36Sopenharmony_ci		if (atomic_inc_return(&info->mr_ready_count) == 1)
239862306a36Sopenharmony_ci			wake_up_interruptible(&info->wait_mr);
239962306a36Sopenharmony_ci	} else
240062306a36Sopenharmony_ci		/*
240162306a36Sopenharmony_ci		 * Schedule the work to do MR recovery for future I/Os MR
240262306a36Sopenharmony_ci		 * recovery is slow and don't want it to block current I/O
240362306a36Sopenharmony_ci		 */
240462306a36Sopenharmony_ci		queue_work(info->workqueue, &info->mr_recovery_work);
240562306a36Sopenharmony_ci
240662306a36Sopenharmony_cidone:
240762306a36Sopenharmony_ci	if (atomic_dec_and_test(&info->mr_used_count))
240862306a36Sopenharmony_ci		wake_up(&info->wait_for_mr_cleanup);
240962306a36Sopenharmony_ci
241062306a36Sopenharmony_ci	return rc;
241162306a36Sopenharmony_ci}
241262306a36Sopenharmony_ci
241362306a36Sopenharmony_cistatic bool smb_set_sge(struct smb_extract_to_rdma *rdma,
241462306a36Sopenharmony_ci			struct page *lowest_page, size_t off, size_t len)
241562306a36Sopenharmony_ci{
241662306a36Sopenharmony_ci	struct ib_sge *sge = &rdma->sge[rdma->nr_sge];
241762306a36Sopenharmony_ci	u64 addr;
241862306a36Sopenharmony_ci
241962306a36Sopenharmony_ci	addr = ib_dma_map_page(rdma->device, lowest_page,
242062306a36Sopenharmony_ci			       off, len, rdma->direction);
242162306a36Sopenharmony_ci	if (ib_dma_mapping_error(rdma->device, addr))
242262306a36Sopenharmony_ci		return false;
242362306a36Sopenharmony_ci
242462306a36Sopenharmony_ci	sge->addr   = addr;
242562306a36Sopenharmony_ci	sge->length = len;
242662306a36Sopenharmony_ci	sge->lkey   = rdma->local_dma_lkey;
242762306a36Sopenharmony_ci	rdma->nr_sge++;
242862306a36Sopenharmony_ci	return true;
242962306a36Sopenharmony_ci}
243062306a36Sopenharmony_ci
243162306a36Sopenharmony_ci/*
243262306a36Sopenharmony_ci * Extract page fragments from a BVEC-class iterator and add them to an RDMA
243362306a36Sopenharmony_ci * element list.  The pages are not pinned.
243462306a36Sopenharmony_ci */
243562306a36Sopenharmony_cistatic ssize_t smb_extract_bvec_to_rdma(struct iov_iter *iter,
243662306a36Sopenharmony_ci					struct smb_extract_to_rdma *rdma,
243762306a36Sopenharmony_ci					ssize_t maxsize)
243862306a36Sopenharmony_ci{
243962306a36Sopenharmony_ci	const struct bio_vec *bv = iter->bvec;
244062306a36Sopenharmony_ci	unsigned long start = iter->iov_offset;
244162306a36Sopenharmony_ci	unsigned int i;
244262306a36Sopenharmony_ci	ssize_t ret = 0;
244362306a36Sopenharmony_ci
244462306a36Sopenharmony_ci	for (i = 0; i < iter->nr_segs; i++) {
244562306a36Sopenharmony_ci		size_t off, len;
244662306a36Sopenharmony_ci
244762306a36Sopenharmony_ci		len = bv[i].bv_len;
244862306a36Sopenharmony_ci		if (start >= len) {
244962306a36Sopenharmony_ci			start -= len;
245062306a36Sopenharmony_ci			continue;
245162306a36Sopenharmony_ci		}
245262306a36Sopenharmony_ci
245362306a36Sopenharmony_ci		len = min_t(size_t, maxsize, len - start);
245462306a36Sopenharmony_ci		off = bv[i].bv_offset + start;
245562306a36Sopenharmony_ci
245662306a36Sopenharmony_ci		if (!smb_set_sge(rdma, bv[i].bv_page, off, len))
245762306a36Sopenharmony_ci			return -EIO;
245862306a36Sopenharmony_ci
245962306a36Sopenharmony_ci		ret += len;
246062306a36Sopenharmony_ci		maxsize -= len;
246162306a36Sopenharmony_ci		if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
246262306a36Sopenharmony_ci			break;
246362306a36Sopenharmony_ci		start = 0;
246462306a36Sopenharmony_ci	}
246562306a36Sopenharmony_ci
246662306a36Sopenharmony_ci	return ret;
246762306a36Sopenharmony_ci}
246862306a36Sopenharmony_ci
246962306a36Sopenharmony_ci/*
247062306a36Sopenharmony_ci * Extract fragments from a KVEC-class iterator and add them to an RDMA list.
247162306a36Sopenharmony_ci * This can deal with vmalloc'd buffers as well as kmalloc'd or static buffers.
247262306a36Sopenharmony_ci * The pages are not pinned.
247362306a36Sopenharmony_ci */
247462306a36Sopenharmony_cistatic ssize_t smb_extract_kvec_to_rdma(struct iov_iter *iter,
247562306a36Sopenharmony_ci					struct smb_extract_to_rdma *rdma,
247662306a36Sopenharmony_ci					ssize_t maxsize)
247762306a36Sopenharmony_ci{
247862306a36Sopenharmony_ci	const struct kvec *kv = iter->kvec;
247962306a36Sopenharmony_ci	unsigned long start = iter->iov_offset;
248062306a36Sopenharmony_ci	unsigned int i;
248162306a36Sopenharmony_ci	ssize_t ret = 0;
248262306a36Sopenharmony_ci
248362306a36Sopenharmony_ci	for (i = 0; i < iter->nr_segs; i++) {
248462306a36Sopenharmony_ci		struct page *page;
248562306a36Sopenharmony_ci		unsigned long kaddr;
248662306a36Sopenharmony_ci		size_t off, len, seg;
248762306a36Sopenharmony_ci
248862306a36Sopenharmony_ci		len = kv[i].iov_len;
248962306a36Sopenharmony_ci		if (start >= len) {
249062306a36Sopenharmony_ci			start -= len;
249162306a36Sopenharmony_ci			continue;
249262306a36Sopenharmony_ci		}
249362306a36Sopenharmony_ci
249462306a36Sopenharmony_ci		kaddr = (unsigned long)kv[i].iov_base + start;
249562306a36Sopenharmony_ci		off = kaddr & ~PAGE_MASK;
249662306a36Sopenharmony_ci		len = min_t(size_t, maxsize, len - start);
249762306a36Sopenharmony_ci		kaddr &= PAGE_MASK;
249862306a36Sopenharmony_ci
249962306a36Sopenharmony_ci		maxsize -= len;
250062306a36Sopenharmony_ci		do {
250162306a36Sopenharmony_ci			seg = min_t(size_t, len, PAGE_SIZE - off);
250262306a36Sopenharmony_ci
250362306a36Sopenharmony_ci			if (is_vmalloc_or_module_addr((void *)kaddr))
250462306a36Sopenharmony_ci				page = vmalloc_to_page((void *)kaddr);
250562306a36Sopenharmony_ci			else
250662306a36Sopenharmony_ci				page = virt_to_page((void *)kaddr);
250762306a36Sopenharmony_ci
250862306a36Sopenharmony_ci			if (!smb_set_sge(rdma, page, off, seg))
250962306a36Sopenharmony_ci				return -EIO;
251062306a36Sopenharmony_ci
251162306a36Sopenharmony_ci			ret += seg;
251262306a36Sopenharmony_ci			len -= seg;
251362306a36Sopenharmony_ci			kaddr += PAGE_SIZE;
251462306a36Sopenharmony_ci			off = 0;
251562306a36Sopenharmony_ci		} while (len > 0 && rdma->nr_sge < rdma->max_sge);
251662306a36Sopenharmony_ci
251762306a36Sopenharmony_ci		if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
251862306a36Sopenharmony_ci			break;
251962306a36Sopenharmony_ci		start = 0;
252062306a36Sopenharmony_ci	}
252162306a36Sopenharmony_ci
252262306a36Sopenharmony_ci	return ret;
252362306a36Sopenharmony_ci}
252462306a36Sopenharmony_ci
252562306a36Sopenharmony_ci/*
252662306a36Sopenharmony_ci * Extract folio fragments from an XARRAY-class iterator and add them to an
252762306a36Sopenharmony_ci * RDMA list.  The folios are not pinned.
252862306a36Sopenharmony_ci */
252962306a36Sopenharmony_cistatic ssize_t smb_extract_xarray_to_rdma(struct iov_iter *iter,
253062306a36Sopenharmony_ci					  struct smb_extract_to_rdma *rdma,
253162306a36Sopenharmony_ci					  ssize_t maxsize)
253262306a36Sopenharmony_ci{
253362306a36Sopenharmony_ci	struct xarray *xa = iter->xarray;
253462306a36Sopenharmony_ci	struct folio *folio;
253562306a36Sopenharmony_ci	loff_t start = iter->xarray_start + iter->iov_offset;
253662306a36Sopenharmony_ci	pgoff_t index = start / PAGE_SIZE;
253762306a36Sopenharmony_ci	ssize_t ret = 0;
253862306a36Sopenharmony_ci	size_t off, len;
253962306a36Sopenharmony_ci	XA_STATE(xas, xa, index);
254062306a36Sopenharmony_ci
254162306a36Sopenharmony_ci	rcu_read_lock();
254262306a36Sopenharmony_ci
254362306a36Sopenharmony_ci	xas_for_each(&xas, folio, ULONG_MAX) {
254462306a36Sopenharmony_ci		if (xas_retry(&xas, folio))
254562306a36Sopenharmony_ci			continue;
254662306a36Sopenharmony_ci		if (WARN_ON(xa_is_value(folio)))
254762306a36Sopenharmony_ci			break;
254862306a36Sopenharmony_ci		if (WARN_ON(folio_test_hugetlb(folio)))
254962306a36Sopenharmony_ci			break;
255062306a36Sopenharmony_ci
255162306a36Sopenharmony_ci		off = offset_in_folio(folio, start);
255262306a36Sopenharmony_ci		len = min_t(size_t, maxsize, folio_size(folio) - off);
255362306a36Sopenharmony_ci
255462306a36Sopenharmony_ci		if (!smb_set_sge(rdma, folio_page(folio, 0), off, len)) {
255562306a36Sopenharmony_ci			rcu_read_unlock();
255662306a36Sopenharmony_ci			return -EIO;
255762306a36Sopenharmony_ci		}
255862306a36Sopenharmony_ci
255962306a36Sopenharmony_ci		maxsize -= len;
256062306a36Sopenharmony_ci		ret += len;
256162306a36Sopenharmony_ci		if (rdma->nr_sge >= rdma->max_sge || maxsize <= 0)
256262306a36Sopenharmony_ci			break;
256362306a36Sopenharmony_ci	}
256462306a36Sopenharmony_ci
256562306a36Sopenharmony_ci	rcu_read_unlock();
256662306a36Sopenharmony_ci	return ret;
256762306a36Sopenharmony_ci}
256862306a36Sopenharmony_ci
256962306a36Sopenharmony_ci/*
257062306a36Sopenharmony_ci * Extract page fragments from up to the given amount of the source iterator
257162306a36Sopenharmony_ci * and build up an RDMA list that refers to all of those bits.  The RDMA list
257262306a36Sopenharmony_ci * is appended to, up to the maximum number of elements set in the parameter
257362306a36Sopenharmony_ci * block.
257462306a36Sopenharmony_ci *
257562306a36Sopenharmony_ci * The extracted page fragments are not pinned or ref'd in any way; if an
257662306a36Sopenharmony_ci * IOVEC/UBUF-type iterator is to be used, it should be converted to a
257762306a36Sopenharmony_ci * BVEC-type iterator and the pages pinned, ref'd or otherwise held in some
257862306a36Sopenharmony_ci * way.
257962306a36Sopenharmony_ci */
258062306a36Sopenharmony_cistatic ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len,
258162306a36Sopenharmony_ci					struct smb_extract_to_rdma *rdma)
258262306a36Sopenharmony_ci{
258362306a36Sopenharmony_ci	ssize_t ret;
258462306a36Sopenharmony_ci	int before = rdma->nr_sge;
258562306a36Sopenharmony_ci
258662306a36Sopenharmony_ci	switch (iov_iter_type(iter)) {
258762306a36Sopenharmony_ci	case ITER_BVEC:
258862306a36Sopenharmony_ci		ret = smb_extract_bvec_to_rdma(iter, rdma, len);
258962306a36Sopenharmony_ci		break;
259062306a36Sopenharmony_ci	case ITER_KVEC:
259162306a36Sopenharmony_ci		ret = smb_extract_kvec_to_rdma(iter, rdma, len);
259262306a36Sopenharmony_ci		break;
259362306a36Sopenharmony_ci	case ITER_XARRAY:
259462306a36Sopenharmony_ci		ret = smb_extract_xarray_to_rdma(iter, rdma, len);
259562306a36Sopenharmony_ci		break;
259662306a36Sopenharmony_ci	default:
259762306a36Sopenharmony_ci		WARN_ON_ONCE(1);
259862306a36Sopenharmony_ci		return -EIO;
259962306a36Sopenharmony_ci	}
260062306a36Sopenharmony_ci
260162306a36Sopenharmony_ci	if (ret > 0) {
260262306a36Sopenharmony_ci		iov_iter_advance(iter, ret);
260362306a36Sopenharmony_ci	} else if (ret < 0) {
260462306a36Sopenharmony_ci		while (rdma->nr_sge > before) {
260562306a36Sopenharmony_ci			struct ib_sge *sge = &rdma->sge[rdma->nr_sge--];
260662306a36Sopenharmony_ci
260762306a36Sopenharmony_ci			ib_dma_unmap_single(rdma->device, sge->addr, sge->length,
260862306a36Sopenharmony_ci					    rdma->direction);
260962306a36Sopenharmony_ci			sge->addr = 0;
261062306a36Sopenharmony_ci		}
261162306a36Sopenharmony_ci	}
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci	return ret;
261462306a36Sopenharmony_ci}
2615