18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * linux/fs/9p/trans_rdma.c
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * RDMA transport layer based on the trans_fd.c implementation.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *  Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com>
88c2ecf20Sopenharmony_ci *  Copyright (C) 2006 by Russ Cox <rsc@swtch.com>
98c2ecf20Sopenharmony_ci *  Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net>
108c2ecf20Sopenharmony_ci *  Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com>
118c2ecf20Sopenharmony_ci *  Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com>
128c2ecf20Sopenharmony_ci */
138c2ecf20Sopenharmony_ci
148c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci#include <linux/in.h>
178c2ecf20Sopenharmony_ci#include <linux/module.h>
188c2ecf20Sopenharmony_ci#include <linux/net.h>
198c2ecf20Sopenharmony_ci#include <linux/ipv6.h>
208c2ecf20Sopenharmony_ci#include <linux/kthread.h>
218c2ecf20Sopenharmony_ci#include <linux/errno.h>
228c2ecf20Sopenharmony_ci#include <linux/kernel.h>
238c2ecf20Sopenharmony_ci#include <linux/un.h>
248c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
258c2ecf20Sopenharmony_ci#include <linux/inet.h>
268c2ecf20Sopenharmony_ci#include <linux/idr.h>
278c2ecf20Sopenharmony_ci#include <linux/file.h>
288c2ecf20Sopenharmony_ci#include <linux/parser.h>
298c2ecf20Sopenharmony_ci#include <linux/semaphore.h>
308c2ecf20Sopenharmony_ci#include <linux/slab.h>
318c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
328c2ecf20Sopenharmony_ci#include <net/9p/9p.h>
338c2ecf20Sopenharmony_ci#include <net/9p/client.h>
348c2ecf20Sopenharmony_ci#include <net/9p/transport.h>
358c2ecf20Sopenharmony_ci#include <rdma/ib_verbs.h>
368c2ecf20Sopenharmony_ci#include <rdma/rdma_cm.h>
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci#define P9_PORT			5640
398c2ecf20Sopenharmony_ci#define P9_RDMA_SQ_DEPTH	32
408c2ecf20Sopenharmony_ci#define P9_RDMA_RQ_DEPTH	32
418c2ecf20Sopenharmony_ci#define P9_RDMA_SEND_SGE	4
428c2ecf20Sopenharmony_ci#define P9_RDMA_RECV_SGE	4
438c2ecf20Sopenharmony_ci#define P9_RDMA_IRD		0
448c2ecf20Sopenharmony_ci#define P9_RDMA_ORD		0
458c2ecf20Sopenharmony_ci#define P9_RDMA_TIMEOUT		30000		/* 30 seconds */
468c2ecf20Sopenharmony_ci#define P9_RDMA_MAXSIZE		(1024*1024)	/* 1MB */
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci/**
498c2ecf20Sopenharmony_ci * struct p9_trans_rdma - RDMA transport instance
508c2ecf20Sopenharmony_ci *
518c2ecf20Sopenharmony_ci * @state: tracks the transport state machine for connection setup and tear down
528c2ecf20Sopenharmony_ci * @cm_id: The RDMA CM ID
538c2ecf20Sopenharmony_ci * @pd: Protection Domain pointer
548c2ecf20Sopenharmony_ci * @qp: Queue Pair pointer
558c2ecf20Sopenharmony_ci * @cq: Completion Queue pointer
568c2ecf20Sopenharmony_ci * @timeout: Number of uSecs to wait for connection management events
578c2ecf20Sopenharmony_ci * @privport: Whether a privileged port may be used
588c2ecf20Sopenharmony_ci * @port: The port to use
598c2ecf20Sopenharmony_ci * @sq_depth: The depth of the Send Queue
608c2ecf20Sopenharmony_ci * @sq_sem: Semaphore for the SQ
618c2ecf20Sopenharmony_ci * @rq_depth: The depth of the Receive Queue.
628c2ecf20Sopenharmony_ci * @rq_sem: Semaphore for the RQ
638c2ecf20Sopenharmony_ci * @excess_rc : Amount of posted Receive Contexts without a pending request.
648c2ecf20Sopenharmony_ci *		See rdma_request()
658c2ecf20Sopenharmony_ci * @addr: The remote peer's address
668c2ecf20Sopenharmony_ci * @req_lock: Protects the active request list
678c2ecf20Sopenharmony_ci * @cm_done: Completion event for connection management tracking
688c2ecf20Sopenharmony_ci */
698c2ecf20Sopenharmony_cistruct p9_trans_rdma {
708c2ecf20Sopenharmony_ci	enum {
718c2ecf20Sopenharmony_ci		P9_RDMA_INIT,
728c2ecf20Sopenharmony_ci		P9_RDMA_ADDR_RESOLVED,
738c2ecf20Sopenharmony_ci		P9_RDMA_ROUTE_RESOLVED,
748c2ecf20Sopenharmony_ci		P9_RDMA_CONNECTED,
758c2ecf20Sopenharmony_ci		P9_RDMA_FLUSHING,
768c2ecf20Sopenharmony_ci		P9_RDMA_CLOSING,
778c2ecf20Sopenharmony_ci		P9_RDMA_CLOSED,
788c2ecf20Sopenharmony_ci	} state;
798c2ecf20Sopenharmony_ci	struct rdma_cm_id *cm_id;
808c2ecf20Sopenharmony_ci	struct ib_pd *pd;
818c2ecf20Sopenharmony_ci	struct ib_qp *qp;
828c2ecf20Sopenharmony_ci	struct ib_cq *cq;
838c2ecf20Sopenharmony_ci	long timeout;
848c2ecf20Sopenharmony_ci	bool privport;
858c2ecf20Sopenharmony_ci	u16 port;
868c2ecf20Sopenharmony_ci	int sq_depth;
878c2ecf20Sopenharmony_ci	struct semaphore sq_sem;
888c2ecf20Sopenharmony_ci	int rq_depth;
898c2ecf20Sopenharmony_ci	struct semaphore rq_sem;
908c2ecf20Sopenharmony_ci	atomic_t excess_rc;
918c2ecf20Sopenharmony_ci	struct sockaddr_in addr;
928c2ecf20Sopenharmony_ci	spinlock_t req_lock;
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci	struct completion cm_done;
958c2ecf20Sopenharmony_ci};
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_cistruct p9_rdma_req;
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci/**
1008c2ecf20Sopenharmony_ci * struct p9_rdma_context - Keeps track of in-process WR
1018c2ecf20Sopenharmony_ci *
1028c2ecf20Sopenharmony_ci * @busa: Bus address to unmap when the WR completes
1038c2ecf20Sopenharmony_ci * @req: Keeps track of requests (send)
1048c2ecf20Sopenharmony_ci * @rc: Keepts track of replies (receive)
1058c2ecf20Sopenharmony_ci */
1068c2ecf20Sopenharmony_cistruct p9_rdma_context {
1078c2ecf20Sopenharmony_ci	struct ib_cqe cqe;
1088c2ecf20Sopenharmony_ci	dma_addr_t busa;
1098c2ecf20Sopenharmony_ci	union {
1108c2ecf20Sopenharmony_ci		struct p9_req_t *req;
1118c2ecf20Sopenharmony_ci		struct p9_fcall rc;
1128c2ecf20Sopenharmony_ci	};
1138c2ecf20Sopenharmony_ci};
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci/**
1168c2ecf20Sopenharmony_ci * struct p9_rdma_opts - Collection of mount options
1178c2ecf20Sopenharmony_ci * @port: port of connection
1188c2ecf20Sopenharmony_ci * @sq_depth: The requested depth of the SQ. This really doesn't need
1198c2ecf20Sopenharmony_ci * to be any deeper than the number of threads used in the client
1208c2ecf20Sopenharmony_ci * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth
1218c2ecf20Sopenharmony_ci * @timeout: Time to wait in msecs for CM events
1228c2ecf20Sopenharmony_ci */
1238c2ecf20Sopenharmony_cistruct p9_rdma_opts {
1248c2ecf20Sopenharmony_ci	short port;
1258c2ecf20Sopenharmony_ci	bool privport;
1268c2ecf20Sopenharmony_ci	int sq_depth;
1278c2ecf20Sopenharmony_ci	int rq_depth;
1288c2ecf20Sopenharmony_ci	long timeout;
1298c2ecf20Sopenharmony_ci};
1308c2ecf20Sopenharmony_ci
1318c2ecf20Sopenharmony_ci/*
1328c2ecf20Sopenharmony_ci * Option Parsing (code inspired by NFS code)
1338c2ecf20Sopenharmony_ci */
1348c2ecf20Sopenharmony_cienum {
1358c2ecf20Sopenharmony_ci	/* Options that take integer arguments */
1368c2ecf20Sopenharmony_ci	Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout,
1378c2ecf20Sopenharmony_ci	/* Options that take no argument */
1388c2ecf20Sopenharmony_ci	Opt_privport,
1398c2ecf20Sopenharmony_ci	Opt_err,
1408c2ecf20Sopenharmony_ci};
1418c2ecf20Sopenharmony_ci
1428c2ecf20Sopenharmony_cistatic match_table_t tokens = {
1438c2ecf20Sopenharmony_ci	{Opt_port, "port=%u"},
1448c2ecf20Sopenharmony_ci	{Opt_sq_depth, "sq=%u"},
1458c2ecf20Sopenharmony_ci	{Opt_rq_depth, "rq=%u"},
1468c2ecf20Sopenharmony_ci	{Opt_timeout, "timeout=%u"},
1478c2ecf20Sopenharmony_ci	{Opt_privport, "privport"},
1488c2ecf20Sopenharmony_ci	{Opt_err, NULL},
1498c2ecf20Sopenharmony_ci};
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_cistatic int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt)
1528c2ecf20Sopenharmony_ci{
1538c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma = clnt->trans;
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci	if (rdma->port != P9_PORT)
1568c2ecf20Sopenharmony_ci		seq_printf(m, ",port=%u", rdma->port);
1578c2ecf20Sopenharmony_ci	if (rdma->sq_depth != P9_RDMA_SQ_DEPTH)
1588c2ecf20Sopenharmony_ci		seq_printf(m, ",sq=%u", rdma->sq_depth);
1598c2ecf20Sopenharmony_ci	if (rdma->rq_depth != P9_RDMA_RQ_DEPTH)
1608c2ecf20Sopenharmony_ci		seq_printf(m, ",rq=%u", rdma->rq_depth);
1618c2ecf20Sopenharmony_ci	if (rdma->timeout != P9_RDMA_TIMEOUT)
1628c2ecf20Sopenharmony_ci		seq_printf(m, ",timeout=%lu", rdma->timeout);
1638c2ecf20Sopenharmony_ci	if (rdma->privport)
1648c2ecf20Sopenharmony_ci		seq_puts(m, ",privport");
1658c2ecf20Sopenharmony_ci	return 0;
1668c2ecf20Sopenharmony_ci}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci/**
1698c2ecf20Sopenharmony_ci * parse_opts - parse mount options into rdma options structure
1708c2ecf20Sopenharmony_ci * @params: options string passed from mount
1718c2ecf20Sopenharmony_ci * @opts: rdma transport-specific structure to parse options into
1728c2ecf20Sopenharmony_ci *
1738c2ecf20Sopenharmony_ci * Returns 0 upon success, -ERRNO upon failure
1748c2ecf20Sopenharmony_ci */
1758c2ecf20Sopenharmony_cistatic int parse_opts(char *params, struct p9_rdma_opts *opts)
1768c2ecf20Sopenharmony_ci{
1778c2ecf20Sopenharmony_ci	char *p;
1788c2ecf20Sopenharmony_ci	substring_t args[MAX_OPT_ARGS];
1798c2ecf20Sopenharmony_ci	int option;
1808c2ecf20Sopenharmony_ci	char *options, *tmp_options;
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ci	opts->port = P9_PORT;
1838c2ecf20Sopenharmony_ci	opts->sq_depth = P9_RDMA_SQ_DEPTH;
1848c2ecf20Sopenharmony_ci	opts->rq_depth = P9_RDMA_RQ_DEPTH;
1858c2ecf20Sopenharmony_ci	opts->timeout = P9_RDMA_TIMEOUT;
1868c2ecf20Sopenharmony_ci	opts->privport = false;
1878c2ecf20Sopenharmony_ci
1888c2ecf20Sopenharmony_ci	if (!params)
1898c2ecf20Sopenharmony_ci		return 0;
1908c2ecf20Sopenharmony_ci
1918c2ecf20Sopenharmony_ci	tmp_options = kstrdup(params, GFP_KERNEL);
1928c2ecf20Sopenharmony_ci	if (!tmp_options) {
1938c2ecf20Sopenharmony_ci		p9_debug(P9_DEBUG_ERROR,
1948c2ecf20Sopenharmony_ci			 "failed to allocate copy of option string\n");
1958c2ecf20Sopenharmony_ci		return -ENOMEM;
1968c2ecf20Sopenharmony_ci	}
1978c2ecf20Sopenharmony_ci	options = tmp_options;
1988c2ecf20Sopenharmony_ci
1998c2ecf20Sopenharmony_ci	while ((p = strsep(&options, ",")) != NULL) {
2008c2ecf20Sopenharmony_ci		int token;
2018c2ecf20Sopenharmony_ci		int r;
2028c2ecf20Sopenharmony_ci		if (!*p)
2038c2ecf20Sopenharmony_ci			continue;
2048c2ecf20Sopenharmony_ci		token = match_token(p, tokens, args);
2058c2ecf20Sopenharmony_ci		if ((token != Opt_err) && (token != Opt_privport)) {
2068c2ecf20Sopenharmony_ci			r = match_int(&args[0], &option);
2078c2ecf20Sopenharmony_ci			if (r < 0) {
2088c2ecf20Sopenharmony_ci				p9_debug(P9_DEBUG_ERROR,
2098c2ecf20Sopenharmony_ci					 "integer field, but no integer?\n");
2108c2ecf20Sopenharmony_ci				continue;
2118c2ecf20Sopenharmony_ci			}
2128c2ecf20Sopenharmony_ci		}
2138c2ecf20Sopenharmony_ci		switch (token) {
2148c2ecf20Sopenharmony_ci		case Opt_port:
2158c2ecf20Sopenharmony_ci			opts->port = option;
2168c2ecf20Sopenharmony_ci			break;
2178c2ecf20Sopenharmony_ci		case Opt_sq_depth:
2188c2ecf20Sopenharmony_ci			opts->sq_depth = option;
2198c2ecf20Sopenharmony_ci			break;
2208c2ecf20Sopenharmony_ci		case Opt_rq_depth:
2218c2ecf20Sopenharmony_ci			opts->rq_depth = option;
2228c2ecf20Sopenharmony_ci			break;
2238c2ecf20Sopenharmony_ci		case Opt_timeout:
2248c2ecf20Sopenharmony_ci			opts->timeout = option;
2258c2ecf20Sopenharmony_ci			break;
2268c2ecf20Sopenharmony_ci		case Opt_privport:
2278c2ecf20Sopenharmony_ci			opts->privport = true;
2288c2ecf20Sopenharmony_ci			break;
2298c2ecf20Sopenharmony_ci		default:
2308c2ecf20Sopenharmony_ci			continue;
2318c2ecf20Sopenharmony_ci		}
2328c2ecf20Sopenharmony_ci	}
2338c2ecf20Sopenharmony_ci	/* RQ must be at least as large as the SQ */
2348c2ecf20Sopenharmony_ci	opts->rq_depth = max(opts->rq_depth, opts->sq_depth);
2358c2ecf20Sopenharmony_ci	kfree(tmp_options);
2368c2ecf20Sopenharmony_ci	return 0;
2378c2ecf20Sopenharmony_ci}
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_cistatic int
2408c2ecf20Sopenharmony_cip9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
2418c2ecf20Sopenharmony_ci{
2428c2ecf20Sopenharmony_ci	struct p9_client *c = id->context;
2438c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma = c->trans;
2448c2ecf20Sopenharmony_ci	switch (event->event) {
2458c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ADDR_RESOLVED:
2468c2ecf20Sopenharmony_ci		BUG_ON(rdma->state != P9_RDMA_INIT);
2478c2ecf20Sopenharmony_ci		rdma->state = P9_RDMA_ADDR_RESOLVED;
2488c2ecf20Sopenharmony_ci		break;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ROUTE_RESOLVED:
2518c2ecf20Sopenharmony_ci		BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED);
2528c2ecf20Sopenharmony_ci		rdma->state = P9_RDMA_ROUTE_RESOLVED;
2538c2ecf20Sopenharmony_ci		break;
2548c2ecf20Sopenharmony_ci
2558c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ESTABLISHED:
2568c2ecf20Sopenharmony_ci		BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED);
2578c2ecf20Sopenharmony_ci		rdma->state = P9_RDMA_CONNECTED;
2588c2ecf20Sopenharmony_ci		break;
2598c2ecf20Sopenharmony_ci
2608c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_DISCONNECTED:
2618c2ecf20Sopenharmony_ci		if (rdma)
2628c2ecf20Sopenharmony_ci			rdma->state = P9_RDMA_CLOSED;
2638c2ecf20Sopenharmony_ci		c->status = Disconnected;
2648c2ecf20Sopenharmony_ci		break;
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2678c2ecf20Sopenharmony_ci		break;
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ADDR_CHANGE:
2708c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ROUTE_ERROR:
2718c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_DEVICE_REMOVAL:
2728c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_MULTICAST_JOIN:
2738c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_MULTICAST_ERROR:
2748c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_REJECTED:
2758c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_CONNECT_REQUEST:
2768c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_CONNECT_RESPONSE:
2778c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_CONNECT_ERROR:
2788c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_ADDR_ERROR:
2798c2ecf20Sopenharmony_ci	case RDMA_CM_EVENT_UNREACHABLE:
2808c2ecf20Sopenharmony_ci		c->status = Disconnected;
2818c2ecf20Sopenharmony_ci		rdma_disconnect(rdma->cm_id);
2828c2ecf20Sopenharmony_ci		break;
2838c2ecf20Sopenharmony_ci	default:
2848c2ecf20Sopenharmony_ci		BUG();
2858c2ecf20Sopenharmony_ci	}
2868c2ecf20Sopenharmony_ci	complete(&rdma->cm_done);
2878c2ecf20Sopenharmony_ci	return 0;
2888c2ecf20Sopenharmony_ci}
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_cistatic void
2918c2ecf20Sopenharmony_cirecv_done(struct ib_cq *cq, struct ib_wc *wc)
2928c2ecf20Sopenharmony_ci{
2938c2ecf20Sopenharmony_ci	struct p9_client *client = cq->cq_context;
2948c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma = client->trans;
2958c2ecf20Sopenharmony_ci	struct p9_rdma_context *c =
2968c2ecf20Sopenharmony_ci		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
2978c2ecf20Sopenharmony_ci	struct p9_req_t *req;
2988c2ecf20Sopenharmony_ci	int err = 0;
2998c2ecf20Sopenharmony_ci	int16_t tag;
3008c2ecf20Sopenharmony_ci
3018c2ecf20Sopenharmony_ci	req = NULL;
3028c2ecf20Sopenharmony_ci	ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize,
3038c2ecf20Sopenharmony_ci							 DMA_FROM_DEVICE);
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS)
3068c2ecf20Sopenharmony_ci		goto err_out;
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci	c->rc.size = wc->byte_len;
3098c2ecf20Sopenharmony_ci	err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1);
3108c2ecf20Sopenharmony_ci	if (err)
3118c2ecf20Sopenharmony_ci		goto err_out;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	req = p9_tag_lookup(client, tag);
3148c2ecf20Sopenharmony_ci	if (!req)
3158c2ecf20Sopenharmony_ci		goto err_out;
3168c2ecf20Sopenharmony_ci
3178c2ecf20Sopenharmony_ci	/* Check that we have not yet received a reply for this request.
3188c2ecf20Sopenharmony_ci	 */
3198c2ecf20Sopenharmony_ci	if (unlikely(req->rc.sdata)) {
3208c2ecf20Sopenharmony_ci		pr_err("Duplicate reply for request %d", tag);
3218c2ecf20Sopenharmony_ci		goto err_out;
3228c2ecf20Sopenharmony_ci	}
3238c2ecf20Sopenharmony_ci
3248c2ecf20Sopenharmony_ci	req->rc.size = c->rc.size;
3258c2ecf20Sopenharmony_ci	req->rc.sdata = c->rc.sdata;
3268c2ecf20Sopenharmony_ci	p9_client_cb(client, req, REQ_STATUS_RCVD);
3278c2ecf20Sopenharmony_ci
3288c2ecf20Sopenharmony_ci out:
3298c2ecf20Sopenharmony_ci	up(&rdma->rq_sem);
3308c2ecf20Sopenharmony_ci	kfree(c);
3318c2ecf20Sopenharmony_ci	return;
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ci err_out:
3348c2ecf20Sopenharmony_ci	p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n",
3358c2ecf20Sopenharmony_ci			req, err, wc->status);
3368c2ecf20Sopenharmony_ci	rdma->state = P9_RDMA_FLUSHING;
3378c2ecf20Sopenharmony_ci	client->status = Disconnected;
3388c2ecf20Sopenharmony_ci	goto out;
3398c2ecf20Sopenharmony_ci}
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_cistatic void
3428c2ecf20Sopenharmony_cisend_done(struct ib_cq *cq, struct ib_wc *wc)
3438c2ecf20Sopenharmony_ci{
3448c2ecf20Sopenharmony_ci	struct p9_client *client = cq->cq_context;
3458c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma = client->trans;
3468c2ecf20Sopenharmony_ci	struct p9_rdma_context *c =
3478c2ecf20Sopenharmony_ci		container_of(wc->wr_cqe, struct p9_rdma_context, cqe);
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci	ib_dma_unmap_single(rdma->cm_id->device,
3508c2ecf20Sopenharmony_ci			    c->busa, c->req->tc.size,
3518c2ecf20Sopenharmony_ci			    DMA_TO_DEVICE);
3528c2ecf20Sopenharmony_ci	up(&rdma->sq_sem);
3538c2ecf20Sopenharmony_ci	p9_req_put(c->req);
3548c2ecf20Sopenharmony_ci	kfree(c);
3558c2ecf20Sopenharmony_ci}
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_cistatic void qp_event_handler(struct ib_event *event, void *context)
3588c2ecf20Sopenharmony_ci{
3598c2ecf20Sopenharmony_ci	p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n",
3608c2ecf20Sopenharmony_ci		 event->event, context);
3618c2ecf20Sopenharmony_ci}
3628c2ecf20Sopenharmony_ci
3638c2ecf20Sopenharmony_cistatic void rdma_destroy_trans(struct p9_trans_rdma *rdma)
3648c2ecf20Sopenharmony_ci{
3658c2ecf20Sopenharmony_ci	if (!rdma)
3668c2ecf20Sopenharmony_ci		return;
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	if (rdma->qp && !IS_ERR(rdma->qp))
3698c2ecf20Sopenharmony_ci		ib_destroy_qp(rdma->qp);
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	if (rdma->pd && !IS_ERR(rdma->pd))
3728c2ecf20Sopenharmony_ci		ib_dealloc_pd(rdma->pd);
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	if (rdma->cq && !IS_ERR(rdma->cq))
3758c2ecf20Sopenharmony_ci		ib_free_cq(rdma->cq);
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	if (rdma->cm_id && !IS_ERR(rdma->cm_id))
3788c2ecf20Sopenharmony_ci		rdma_destroy_id(rdma->cm_id);
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	kfree(rdma);
3818c2ecf20Sopenharmony_ci}
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_cistatic int
3848c2ecf20Sopenharmony_cipost_recv(struct p9_client *client, struct p9_rdma_context *c)
3858c2ecf20Sopenharmony_ci{
3868c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma = client->trans;
3878c2ecf20Sopenharmony_ci	struct ib_recv_wr wr;
3888c2ecf20Sopenharmony_ci	struct ib_sge sge;
3898c2ecf20Sopenharmony_ci	int ret;
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci	c->busa = ib_dma_map_single(rdma->cm_id->device,
3928c2ecf20Sopenharmony_ci				    c->rc.sdata, client->msize,
3938c2ecf20Sopenharmony_ci				    DMA_FROM_DEVICE);
3948c2ecf20Sopenharmony_ci	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
3958c2ecf20Sopenharmony_ci		goto error;
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	c->cqe.done = recv_done;
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	sge.addr = c->busa;
4008c2ecf20Sopenharmony_ci	sge.length = client->msize;
4018c2ecf20Sopenharmony_ci	sge.lkey = rdma->pd->local_dma_lkey;
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	wr.next = NULL;
4048c2ecf20Sopenharmony_ci	wr.wr_cqe = &c->cqe;
4058c2ecf20Sopenharmony_ci	wr.sg_list = &sge;
4068c2ecf20Sopenharmony_ci	wr.num_sge = 1;
4078c2ecf20Sopenharmony_ci
4088c2ecf20Sopenharmony_ci	ret = ib_post_recv(rdma->qp, &wr, NULL);
4098c2ecf20Sopenharmony_ci	if (ret)
4108c2ecf20Sopenharmony_ci		ib_dma_unmap_single(rdma->cm_id->device, c->busa,
4118c2ecf20Sopenharmony_ci				    client->msize, DMA_FROM_DEVICE);
4128c2ecf20Sopenharmony_ci	return ret;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci error:
4158c2ecf20Sopenharmony_ci	p9_debug(P9_DEBUG_ERROR, "EIO\n");
4168c2ecf20Sopenharmony_ci	return -EIO;
4178c2ecf20Sopenharmony_ci}
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_cistatic int rdma_request(struct p9_client *client, struct p9_req_t *req)
4208c2ecf20Sopenharmony_ci{
4218c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma = client->trans;
4228c2ecf20Sopenharmony_ci	struct ib_send_wr wr;
4238c2ecf20Sopenharmony_ci	struct ib_sge sge;
4248c2ecf20Sopenharmony_ci	int err = 0;
4258c2ecf20Sopenharmony_ci	unsigned long flags;
4268c2ecf20Sopenharmony_ci	struct p9_rdma_context *c = NULL;
4278c2ecf20Sopenharmony_ci	struct p9_rdma_context *rpl_context = NULL;
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci	/* When an error occurs between posting the recv and the send,
4308c2ecf20Sopenharmony_ci	 * there will be a receive context posted without a pending request.
4318c2ecf20Sopenharmony_ci	 * Since there is no way to "un-post" it, we remember it and skip
4328c2ecf20Sopenharmony_ci	 * post_recv() for the next request.
4338c2ecf20Sopenharmony_ci	 * So here,
4348c2ecf20Sopenharmony_ci	 * see if we are this `next request' and need to absorb an excess rc.
4358c2ecf20Sopenharmony_ci	 * If yes, then drop and free our own, and do not recv_post().
4368c2ecf20Sopenharmony_ci	 **/
4378c2ecf20Sopenharmony_ci	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
4388c2ecf20Sopenharmony_ci		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
4398c2ecf20Sopenharmony_ci			/* Got one! */
4408c2ecf20Sopenharmony_ci			p9_fcall_fini(&req->rc);
4418c2ecf20Sopenharmony_ci			req->rc.sdata = NULL;
4428c2ecf20Sopenharmony_ci			goto dont_need_post_recv;
4438c2ecf20Sopenharmony_ci		} else {
4448c2ecf20Sopenharmony_ci			/* We raced and lost. */
4458c2ecf20Sopenharmony_ci			atomic_inc(&rdma->excess_rc);
4468c2ecf20Sopenharmony_ci		}
4478c2ecf20Sopenharmony_ci	}
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_ci	/* Allocate an fcall for the reply */
4508c2ecf20Sopenharmony_ci	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
4518c2ecf20Sopenharmony_ci	if (!rpl_context) {
4528c2ecf20Sopenharmony_ci		err = -ENOMEM;
4538c2ecf20Sopenharmony_ci		goto recv_error;
4548c2ecf20Sopenharmony_ci	}
4558c2ecf20Sopenharmony_ci	rpl_context->rc.sdata = req->rc.sdata;
4568c2ecf20Sopenharmony_ci
4578c2ecf20Sopenharmony_ci	/*
4588c2ecf20Sopenharmony_ci	 * Post a receive buffer for this request. We need to ensure
4598c2ecf20Sopenharmony_ci	 * there is a reply buffer available for every outstanding
4608c2ecf20Sopenharmony_ci	 * request. A flushed request can result in no reply for an
4618c2ecf20Sopenharmony_ci	 * outstanding request, so we must keep a count to avoid
4628c2ecf20Sopenharmony_ci	 * overflowing the RQ.
4638c2ecf20Sopenharmony_ci	 */
4648c2ecf20Sopenharmony_ci	if (down_interruptible(&rdma->rq_sem)) {
4658c2ecf20Sopenharmony_ci		err = -EINTR;
4668c2ecf20Sopenharmony_ci		goto recv_error;
4678c2ecf20Sopenharmony_ci	}
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	err = post_recv(client, rpl_context);
4708c2ecf20Sopenharmony_ci	if (err) {
4718c2ecf20Sopenharmony_ci		p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err);
4728c2ecf20Sopenharmony_ci		goto recv_error;
4738c2ecf20Sopenharmony_ci	}
4748c2ecf20Sopenharmony_ci	/* remove posted receive buffer from request structure */
4758c2ecf20Sopenharmony_ci	req->rc.sdata = NULL;
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_cidont_need_post_recv:
4788c2ecf20Sopenharmony_ci	/* Post the request */
4798c2ecf20Sopenharmony_ci	c = kmalloc(sizeof *c, GFP_NOFS);
4808c2ecf20Sopenharmony_ci	if (!c) {
4818c2ecf20Sopenharmony_ci		err = -ENOMEM;
4828c2ecf20Sopenharmony_ci		goto send_error;
4838c2ecf20Sopenharmony_ci	}
4848c2ecf20Sopenharmony_ci	c->req = req;
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci	c->busa = ib_dma_map_single(rdma->cm_id->device,
4878c2ecf20Sopenharmony_ci				    c->req->tc.sdata, c->req->tc.size,
4888c2ecf20Sopenharmony_ci				    DMA_TO_DEVICE);
4898c2ecf20Sopenharmony_ci	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
4908c2ecf20Sopenharmony_ci		err = -EIO;
4918c2ecf20Sopenharmony_ci		goto send_error;
4928c2ecf20Sopenharmony_ci	}
4938c2ecf20Sopenharmony_ci
4948c2ecf20Sopenharmony_ci	c->cqe.done = send_done;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	sge.addr = c->busa;
4978c2ecf20Sopenharmony_ci	sge.length = c->req->tc.size;
4988c2ecf20Sopenharmony_ci	sge.lkey = rdma->pd->local_dma_lkey;
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci	wr.next = NULL;
5018c2ecf20Sopenharmony_ci	wr.wr_cqe = &c->cqe;
5028c2ecf20Sopenharmony_ci	wr.opcode = IB_WR_SEND;
5038c2ecf20Sopenharmony_ci	wr.send_flags = IB_SEND_SIGNALED;
5048c2ecf20Sopenharmony_ci	wr.sg_list = &sge;
5058c2ecf20Sopenharmony_ci	wr.num_sge = 1;
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci	if (down_interruptible(&rdma->sq_sem)) {
5088c2ecf20Sopenharmony_ci		err = -EINTR;
5098c2ecf20Sopenharmony_ci		goto dma_unmap;
5108c2ecf20Sopenharmony_ci	}
5118c2ecf20Sopenharmony_ci
5128c2ecf20Sopenharmony_ci	/* Mark request as `sent' *before* we actually send it,
5138c2ecf20Sopenharmony_ci	 * because doing if after could erase the REQ_STATUS_RCVD
5148c2ecf20Sopenharmony_ci	 * status in case of a very fast reply.
5158c2ecf20Sopenharmony_ci	 */
5168c2ecf20Sopenharmony_ci	req->status = REQ_STATUS_SENT;
5178c2ecf20Sopenharmony_ci	err = ib_post_send(rdma->qp, &wr, NULL);
5188c2ecf20Sopenharmony_ci	if (err)
5198c2ecf20Sopenharmony_ci		goto dma_unmap;
5208c2ecf20Sopenharmony_ci
5218c2ecf20Sopenharmony_ci	/* Success */
5228c2ecf20Sopenharmony_ci	return 0;
5238c2ecf20Sopenharmony_ci
5248c2ecf20Sopenharmony_cidma_unmap:
5258c2ecf20Sopenharmony_ci	ib_dma_unmap_single(rdma->cm_id->device, c->busa,
5268c2ecf20Sopenharmony_ci			    c->req->tc.size, DMA_TO_DEVICE);
5278c2ecf20Sopenharmony_ci /* Handle errors that happened during or while preparing the send: */
5288c2ecf20Sopenharmony_ci send_error:
5298c2ecf20Sopenharmony_ci	req->status = REQ_STATUS_ERROR;
5308c2ecf20Sopenharmony_ci	kfree(c);
5318c2ecf20Sopenharmony_ci	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
5328c2ecf20Sopenharmony_ci
5338c2ecf20Sopenharmony_ci	/* Ach.
5348c2ecf20Sopenharmony_ci	 *  We did recv_post(), but not send. We have one recv_post in excess.
5358c2ecf20Sopenharmony_ci	 */
5368c2ecf20Sopenharmony_ci	atomic_inc(&rdma->excess_rc);
5378c2ecf20Sopenharmony_ci	return err;
5388c2ecf20Sopenharmony_ci
5398c2ecf20Sopenharmony_ci /* Handle errors that happened during or while preparing post_recv(): */
5408c2ecf20Sopenharmony_ci recv_error:
5418c2ecf20Sopenharmony_ci	kfree(rpl_context);
5428c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rdma->req_lock, flags);
5438c2ecf20Sopenharmony_ci	if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) {
5448c2ecf20Sopenharmony_ci		rdma->state = P9_RDMA_CLOSING;
5458c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&rdma->req_lock, flags);
5468c2ecf20Sopenharmony_ci		rdma_disconnect(rdma->cm_id);
5478c2ecf20Sopenharmony_ci	} else
5488c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&rdma->req_lock, flags);
5498c2ecf20Sopenharmony_ci	return err;
5508c2ecf20Sopenharmony_ci}
5518c2ecf20Sopenharmony_ci
5528c2ecf20Sopenharmony_cistatic void rdma_close(struct p9_client *client)
5538c2ecf20Sopenharmony_ci{
5548c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma;
5558c2ecf20Sopenharmony_ci
5568c2ecf20Sopenharmony_ci	if (!client)
5578c2ecf20Sopenharmony_ci		return;
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	rdma = client->trans;
5608c2ecf20Sopenharmony_ci	if (!rdma)
5618c2ecf20Sopenharmony_ci		return;
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci	client->status = Disconnected;
5648c2ecf20Sopenharmony_ci	rdma_disconnect(rdma->cm_id);
5658c2ecf20Sopenharmony_ci	rdma_destroy_trans(rdma);
5668c2ecf20Sopenharmony_ci}
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci/**
5698c2ecf20Sopenharmony_ci * alloc_rdma - Allocate and initialize the rdma transport structure
5708c2ecf20Sopenharmony_ci * @opts: Mount options structure
5718c2ecf20Sopenharmony_ci */
5728c2ecf20Sopenharmony_cistatic struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
5738c2ecf20Sopenharmony_ci{
5748c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma;
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci	rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL);
5778c2ecf20Sopenharmony_ci	if (!rdma)
5788c2ecf20Sopenharmony_ci		return NULL;
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci	rdma->port = opts->port;
5818c2ecf20Sopenharmony_ci	rdma->privport = opts->privport;
5828c2ecf20Sopenharmony_ci	rdma->sq_depth = opts->sq_depth;
5838c2ecf20Sopenharmony_ci	rdma->rq_depth = opts->rq_depth;
5848c2ecf20Sopenharmony_ci	rdma->timeout = opts->timeout;
5858c2ecf20Sopenharmony_ci	spin_lock_init(&rdma->req_lock);
5868c2ecf20Sopenharmony_ci	init_completion(&rdma->cm_done);
5878c2ecf20Sopenharmony_ci	sema_init(&rdma->sq_sem, rdma->sq_depth);
5888c2ecf20Sopenharmony_ci	sema_init(&rdma->rq_sem, rdma->rq_depth);
5898c2ecf20Sopenharmony_ci	atomic_set(&rdma->excess_rc, 0);
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci	return rdma;
5928c2ecf20Sopenharmony_ci}
5938c2ecf20Sopenharmony_ci
5948c2ecf20Sopenharmony_cistatic int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
5958c2ecf20Sopenharmony_ci{
5968c2ecf20Sopenharmony_ci	/* Nothing to do here.
5978c2ecf20Sopenharmony_ci	 * We will take care of it (if we have to) in rdma_cancelled()
5988c2ecf20Sopenharmony_ci	 */
5998c2ecf20Sopenharmony_ci	return 1;
6008c2ecf20Sopenharmony_ci}
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci/* A request has been fully flushed without a reply.
6038c2ecf20Sopenharmony_ci * That means we have posted one buffer in excess.
6048c2ecf20Sopenharmony_ci */
6058c2ecf20Sopenharmony_cistatic int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
6068c2ecf20Sopenharmony_ci{
6078c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma = client->trans;
6088c2ecf20Sopenharmony_ci	atomic_inc(&rdma->excess_rc);
6098c2ecf20Sopenharmony_ci	return 0;
6108c2ecf20Sopenharmony_ci}
6118c2ecf20Sopenharmony_ci
6128c2ecf20Sopenharmony_cistatic int p9_rdma_bind_privport(struct p9_trans_rdma *rdma)
6138c2ecf20Sopenharmony_ci{
6148c2ecf20Sopenharmony_ci	struct sockaddr_in cl = {
6158c2ecf20Sopenharmony_ci		.sin_family = AF_INET,
6168c2ecf20Sopenharmony_ci		.sin_addr.s_addr = htonl(INADDR_ANY),
6178c2ecf20Sopenharmony_ci	};
6188c2ecf20Sopenharmony_ci	int port, err = -EINVAL;
6198c2ecf20Sopenharmony_ci
6208c2ecf20Sopenharmony_ci	for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) {
6218c2ecf20Sopenharmony_ci		cl.sin_port = htons((ushort)port);
6228c2ecf20Sopenharmony_ci		err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl);
6238c2ecf20Sopenharmony_ci		if (err != -EADDRINUSE)
6248c2ecf20Sopenharmony_ci			break;
6258c2ecf20Sopenharmony_ci	}
6268c2ecf20Sopenharmony_ci	return err;
6278c2ecf20Sopenharmony_ci}
6288c2ecf20Sopenharmony_ci
6298c2ecf20Sopenharmony_ci/**
6308c2ecf20Sopenharmony_ci * rdma_create_trans - Transport method for creating a transport instance
6318c2ecf20Sopenharmony_ci * @client: client instance
6328c2ecf20Sopenharmony_ci * @addr: IP address string
6338c2ecf20Sopenharmony_ci * @args: Mount options string
6348c2ecf20Sopenharmony_ci */
6358c2ecf20Sopenharmony_cistatic int
6368c2ecf20Sopenharmony_cirdma_create_trans(struct p9_client *client, const char *addr, char *args)
6378c2ecf20Sopenharmony_ci{
6388c2ecf20Sopenharmony_ci	int err;
6398c2ecf20Sopenharmony_ci	struct p9_rdma_opts opts;
6408c2ecf20Sopenharmony_ci	struct p9_trans_rdma *rdma;
6418c2ecf20Sopenharmony_ci	struct rdma_conn_param conn_param;
6428c2ecf20Sopenharmony_ci	struct ib_qp_init_attr qp_attr;
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_ci	if (addr == NULL)
6458c2ecf20Sopenharmony_ci		return -EINVAL;
6468c2ecf20Sopenharmony_ci
6478c2ecf20Sopenharmony_ci	/* Parse the transport specific mount options */
6488c2ecf20Sopenharmony_ci	err = parse_opts(args, &opts);
6498c2ecf20Sopenharmony_ci	if (err < 0)
6508c2ecf20Sopenharmony_ci		return err;
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	/* Create and initialize the RDMA transport structure */
6538c2ecf20Sopenharmony_ci	rdma = alloc_rdma(&opts);
6548c2ecf20Sopenharmony_ci	if (!rdma)
6558c2ecf20Sopenharmony_ci		return -ENOMEM;
6568c2ecf20Sopenharmony_ci
6578c2ecf20Sopenharmony_ci	/* Create the RDMA CM ID */
6588c2ecf20Sopenharmony_ci	rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client,
6598c2ecf20Sopenharmony_ci				     RDMA_PS_TCP, IB_QPT_RC);
6608c2ecf20Sopenharmony_ci	if (IS_ERR(rdma->cm_id))
6618c2ecf20Sopenharmony_ci		goto error;
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci	/* Associate the client with the transport */
6648c2ecf20Sopenharmony_ci	client->trans = rdma;
6658c2ecf20Sopenharmony_ci
6668c2ecf20Sopenharmony_ci	/* Bind to a privileged port if we need to */
6678c2ecf20Sopenharmony_ci	if (opts.privport) {
6688c2ecf20Sopenharmony_ci		err = p9_rdma_bind_privport(rdma);
6698c2ecf20Sopenharmony_ci		if (err < 0) {
6708c2ecf20Sopenharmony_ci			pr_err("%s (%d): problem binding to privport: %d\n",
6718c2ecf20Sopenharmony_ci			       __func__, task_pid_nr(current), -err);
6728c2ecf20Sopenharmony_ci			goto error;
6738c2ecf20Sopenharmony_ci		}
6748c2ecf20Sopenharmony_ci	}
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	/* Resolve the server's address */
6778c2ecf20Sopenharmony_ci	rdma->addr.sin_family = AF_INET;
6788c2ecf20Sopenharmony_ci	rdma->addr.sin_addr.s_addr = in_aton(addr);
6798c2ecf20Sopenharmony_ci	rdma->addr.sin_port = htons(opts.port);
6808c2ecf20Sopenharmony_ci	err = rdma_resolve_addr(rdma->cm_id, NULL,
6818c2ecf20Sopenharmony_ci				(struct sockaddr *)&rdma->addr,
6828c2ecf20Sopenharmony_ci				rdma->timeout);
6838c2ecf20Sopenharmony_ci	if (err)
6848c2ecf20Sopenharmony_ci		goto error;
6858c2ecf20Sopenharmony_ci	err = wait_for_completion_interruptible(&rdma->cm_done);
6868c2ecf20Sopenharmony_ci	if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED))
6878c2ecf20Sopenharmony_ci		goto error;
6888c2ecf20Sopenharmony_ci
6898c2ecf20Sopenharmony_ci	/* Resolve the route to the server */
6908c2ecf20Sopenharmony_ci	err = rdma_resolve_route(rdma->cm_id, rdma->timeout);
6918c2ecf20Sopenharmony_ci	if (err)
6928c2ecf20Sopenharmony_ci		goto error;
6938c2ecf20Sopenharmony_ci	err = wait_for_completion_interruptible(&rdma->cm_done);
6948c2ecf20Sopenharmony_ci	if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED))
6958c2ecf20Sopenharmony_ci		goto error;
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_ci	/* Create the Completion Queue */
6988c2ecf20Sopenharmony_ci	rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client,
6998c2ecf20Sopenharmony_ci				   opts.sq_depth + opts.rq_depth + 1,
7008c2ecf20Sopenharmony_ci				   IB_POLL_SOFTIRQ);
7018c2ecf20Sopenharmony_ci	if (IS_ERR(rdma->cq))
7028c2ecf20Sopenharmony_ci		goto error;
7038c2ecf20Sopenharmony_ci
7048c2ecf20Sopenharmony_ci	/* Create the Protection Domain */
7058c2ecf20Sopenharmony_ci	rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0);
7068c2ecf20Sopenharmony_ci	if (IS_ERR(rdma->pd))
7078c2ecf20Sopenharmony_ci		goto error;
7088c2ecf20Sopenharmony_ci
7098c2ecf20Sopenharmony_ci	/* Create the Queue Pair */
7108c2ecf20Sopenharmony_ci	memset(&qp_attr, 0, sizeof qp_attr);
7118c2ecf20Sopenharmony_ci	qp_attr.event_handler = qp_event_handler;
7128c2ecf20Sopenharmony_ci	qp_attr.qp_context = client;
7138c2ecf20Sopenharmony_ci	qp_attr.cap.max_send_wr = opts.sq_depth;
7148c2ecf20Sopenharmony_ci	qp_attr.cap.max_recv_wr = opts.rq_depth;
7158c2ecf20Sopenharmony_ci	qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE;
7168c2ecf20Sopenharmony_ci	qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE;
7178c2ecf20Sopenharmony_ci	qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
7188c2ecf20Sopenharmony_ci	qp_attr.qp_type = IB_QPT_RC;
7198c2ecf20Sopenharmony_ci	qp_attr.send_cq = rdma->cq;
7208c2ecf20Sopenharmony_ci	qp_attr.recv_cq = rdma->cq;
7218c2ecf20Sopenharmony_ci	err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr);
7228c2ecf20Sopenharmony_ci	if (err)
7238c2ecf20Sopenharmony_ci		goto error;
7248c2ecf20Sopenharmony_ci	rdma->qp = rdma->cm_id->qp;
7258c2ecf20Sopenharmony_ci
7268c2ecf20Sopenharmony_ci	/* Request a connection */
7278c2ecf20Sopenharmony_ci	memset(&conn_param, 0, sizeof(conn_param));
7288c2ecf20Sopenharmony_ci	conn_param.private_data = NULL;
7298c2ecf20Sopenharmony_ci	conn_param.private_data_len = 0;
7308c2ecf20Sopenharmony_ci	conn_param.responder_resources = P9_RDMA_IRD;
7318c2ecf20Sopenharmony_ci	conn_param.initiator_depth = P9_RDMA_ORD;
7328c2ecf20Sopenharmony_ci	err = rdma_connect(rdma->cm_id, &conn_param);
7338c2ecf20Sopenharmony_ci	if (err)
7348c2ecf20Sopenharmony_ci		goto error;
7358c2ecf20Sopenharmony_ci	err = wait_for_completion_interruptible(&rdma->cm_done);
7368c2ecf20Sopenharmony_ci	if (err || (rdma->state != P9_RDMA_CONNECTED))
7378c2ecf20Sopenharmony_ci		goto error;
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci	client->status = Connected;
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_ci	return 0;
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_cierror:
7448c2ecf20Sopenharmony_ci	rdma_destroy_trans(rdma);
7458c2ecf20Sopenharmony_ci	return -ENOTCONN;
7468c2ecf20Sopenharmony_ci}
7478c2ecf20Sopenharmony_ci
7488c2ecf20Sopenharmony_cistatic struct p9_trans_module p9_rdma_trans = {
7498c2ecf20Sopenharmony_ci	.name = "rdma",
7508c2ecf20Sopenharmony_ci	.maxsize = P9_RDMA_MAXSIZE,
7518c2ecf20Sopenharmony_ci	.def = 0,
7528c2ecf20Sopenharmony_ci	.owner = THIS_MODULE,
7538c2ecf20Sopenharmony_ci	.create = rdma_create_trans,
7548c2ecf20Sopenharmony_ci	.close = rdma_close,
7558c2ecf20Sopenharmony_ci	.request = rdma_request,
7568c2ecf20Sopenharmony_ci	.cancel = rdma_cancel,
7578c2ecf20Sopenharmony_ci	.cancelled = rdma_cancelled,
7588c2ecf20Sopenharmony_ci	.show_options = p9_rdma_show_options,
7598c2ecf20Sopenharmony_ci};
7608c2ecf20Sopenharmony_ci
7618c2ecf20Sopenharmony_ci/**
7628c2ecf20Sopenharmony_ci * p9_trans_rdma_init - Register the 9P RDMA transport driver
7638c2ecf20Sopenharmony_ci */
7648c2ecf20Sopenharmony_cistatic int __init p9_trans_rdma_init(void)
7658c2ecf20Sopenharmony_ci{
7668c2ecf20Sopenharmony_ci	v9fs_register_trans(&p9_rdma_trans);
7678c2ecf20Sopenharmony_ci	return 0;
7688c2ecf20Sopenharmony_ci}
7698c2ecf20Sopenharmony_ci
7708c2ecf20Sopenharmony_cistatic void __exit p9_trans_rdma_exit(void)
7718c2ecf20Sopenharmony_ci{
7728c2ecf20Sopenharmony_ci	v9fs_unregister_trans(&p9_rdma_trans);
7738c2ecf20Sopenharmony_ci}
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_cimodule_init(p9_trans_rdma_init);
7768c2ecf20Sopenharmony_cimodule_exit(p9_trans_rdma_exit);
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ciMODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>");
7798c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("RDMA Transport for 9P");
7808c2ecf20Sopenharmony_ciMODULE_LICENSE("Dual BSD/GPL");
781