18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * linux/fs/9p/trans_rdma.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * RDMA transport layer based on the trans_fd.c implementation. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> 88c2ecf20Sopenharmony_ci * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> 98c2ecf20Sopenharmony_ci * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> 108c2ecf20Sopenharmony_ci * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> 118c2ecf20Sopenharmony_ci * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> 128c2ecf20Sopenharmony_ci */ 138c2ecf20Sopenharmony_ci 148c2ecf20Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci#include <linux/in.h> 178c2ecf20Sopenharmony_ci#include <linux/module.h> 188c2ecf20Sopenharmony_ci#include <linux/net.h> 198c2ecf20Sopenharmony_ci#include <linux/ipv6.h> 208c2ecf20Sopenharmony_ci#include <linux/kthread.h> 218c2ecf20Sopenharmony_ci#include <linux/errno.h> 228c2ecf20Sopenharmony_ci#include <linux/kernel.h> 238c2ecf20Sopenharmony_ci#include <linux/un.h> 248c2ecf20Sopenharmony_ci#include <linux/uaccess.h> 258c2ecf20Sopenharmony_ci#include <linux/inet.h> 268c2ecf20Sopenharmony_ci#include <linux/idr.h> 278c2ecf20Sopenharmony_ci#include <linux/file.h> 288c2ecf20Sopenharmony_ci#include <linux/parser.h> 298c2ecf20Sopenharmony_ci#include <linux/semaphore.h> 308c2ecf20Sopenharmony_ci#include <linux/slab.h> 318c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 328c2ecf20Sopenharmony_ci#include <net/9p/9p.h> 338c2ecf20Sopenharmony_ci#include <net/9p/client.h> 348c2ecf20Sopenharmony_ci#include <net/9p/transport.h> 358c2ecf20Sopenharmony_ci#include <rdma/ib_verbs.h> 368c2ecf20Sopenharmony_ci#include <rdma/rdma_cm.h> 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#define P9_PORT 5640 398c2ecf20Sopenharmony_ci#define P9_RDMA_SQ_DEPTH 32 408c2ecf20Sopenharmony_ci#define P9_RDMA_RQ_DEPTH 32 418c2ecf20Sopenharmony_ci#define P9_RDMA_SEND_SGE 4 428c2ecf20Sopenharmony_ci#define P9_RDMA_RECV_SGE 4 438c2ecf20Sopenharmony_ci#define P9_RDMA_IRD 0 448c2ecf20Sopenharmony_ci#define P9_RDMA_ORD 0 458c2ecf20Sopenharmony_ci#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ 468c2ecf20Sopenharmony_ci#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci/** 498c2ecf20Sopenharmony_ci * struct p9_trans_rdma - RDMA transport instance 508c2ecf20Sopenharmony_ci * 518c2ecf20Sopenharmony_ci * @state: tracks the transport state machine for connection setup and tear down 528c2ecf20Sopenharmony_ci * @cm_id: The RDMA CM ID 538c2ecf20Sopenharmony_ci * @pd: Protection Domain pointer 548c2ecf20Sopenharmony_ci * @qp: Queue Pair pointer 558c2ecf20Sopenharmony_ci * @cq: Completion Queue pointer 568c2ecf20Sopenharmony_ci * @timeout: Number of uSecs to wait for connection management events 578c2ecf20Sopenharmony_ci * @privport: Whether a privileged port may be used 588c2ecf20Sopenharmony_ci * @port: The port to use 598c2ecf20Sopenharmony_ci * @sq_depth: The depth of the Send Queue 608c2ecf20Sopenharmony_ci * @sq_sem: Semaphore for the SQ 618c2ecf20Sopenharmony_ci * @rq_depth: The depth of the Receive Queue. 628c2ecf20Sopenharmony_ci * @rq_sem: Semaphore for the RQ 638c2ecf20Sopenharmony_ci * @excess_rc : Amount of posted Receive Contexts without a pending request. 648c2ecf20Sopenharmony_ci * See rdma_request() 658c2ecf20Sopenharmony_ci * @addr: The remote peer's address 668c2ecf20Sopenharmony_ci * @req_lock: Protects the active request list 678c2ecf20Sopenharmony_ci * @cm_done: Completion event for connection management tracking 688c2ecf20Sopenharmony_ci */ 698c2ecf20Sopenharmony_cistruct p9_trans_rdma { 708c2ecf20Sopenharmony_ci enum { 718c2ecf20Sopenharmony_ci P9_RDMA_INIT, 728c2ecf20Sopenharmony_ci P9_RDMA_ADDR_RESOLVED, 738c2ecf20Sopenharmony_ci P9_RDMA_ROUTE_RESOLVED, 748c2ecf20Sopenharmony_ci P9_RDMA_CONNECTED, 758c2ecf20Sopenharmony_ci P9_RDMA_FLUSHING, 768c2ecf20Sopenharmony_ci P9_RDMA_CLOSING, 778c2ecf20Sopenharmony_ci P9_RDMA_CLOSED, 788c2ecf20Sopenharmony_ci } state; 798c2ecf20Sopenharmony_ci struct rdma_cm_id *cm_id; 808c2ecf20Sopenharmony_ci struct ib_pd *pd; 818c2ecf20Sopenharmony_ci struct ib_qp *qp; 828c2ecf20Sopenharmony_ci struct ib_cq *cq; 838c2ecf20Sopenharmony_ci long timeout; 848c2ecf20Sopenharmony_ci bool privport; 858c2ecf20Sopenharmony_ci u16 port; 868c2ecf20Sopenharmony_ci int sq_depth; 878c2ecf20Sopenharmony_ci struct semaphore sq_sem; 888c2ecf20Sopenharmony_ci int rq_depth; 898c2ecf20Sopenharmony_ci struct semaphore rq_sem; 908c2ecf20Sopenharmony_ci atomic_t excess_rc; 918c2ecf20Sopenharmony_ci struct sockaddr_in addr; 928c2ecf20Sopenharmony_ci spinlock_t req_lock; 938c2ecf20Sopenharmony_ci 948c2ecf20Sopenharmony_ci struct completion cm_done; 958c2ecf20Sopenharmony_ci}; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_cistruct p9_rdma_req; 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci/** 1008c2ecf20Sopenharmony_ci * struct p9_rdma_context - Keeps track of in-process WR 1018c2ecf20Sopenharmony_ci * 1028c2ecf20Sopenharmony_ci * @busa: Bus address to unmap when the WR completes 1038c2ecf20Sopenharmony_ci * @req: Keeps track of requests (send) 1048c2ecf20Sopenharmony_ci * @rc: Keepts track of replies (receive) 1058c2ecf20Sopenharmony_ci */ 1068c2ecf20Sopenharmony_cistruct p9_rdma_context { 1078c2ecf20Sopenharmony_ci struct ib_cqe cqe; 1088c2ecf20Sopenharmony_ci dma_addr_t busa; 1098c2ecf20Sopenharmony_ci union { 1108c2ecf20Sopenharmony_ci struct p9_req_t *req; 1118c2ecf20Sopenharmony_ci struct p9_fcall rc; 1128c2ecf20Sopenharmony_ci }; 1138c2ecf20Sopenharmony_ci}; 1148c2ecf20Sopenharmony_ci 1158c2ecf20Sopenharmony_ci/** 1168c2ecf20Sopenharmony_ci * struct p9_rdma_opts - Collection of mount options 1178c2ecf20Sopenharmony_ci * @port: port of connection 1188c2ecf20Sopenharmony_ci * @sq_depth: The requested depth of the SQ. This really doesn't need 1198c2ecf20Sopenharmony_ci * to be any deeper than the number of threads used in the client 1208c2ecf20Sopenharmony_ci * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth 1218c2ecf20Sopenharmony_ci * @timeout: Time to wait in msecs for CM events 1228c2ecf20Sopenharmony_ci */ 1238c2ecf20Sopenharmony_cistruct p9_rdma_opts { 1248c2ecf20Sopenharmony_ci short port; 1258c2ecf20Sopenharmony_ci bool privport; 1268c2ecf20Sopenharmony_ci int sq_depth; 1278c2ecf20Sopenharmony_ci int rq_depth; 1288c2ecf20Sopenharmony_ci long timeout; 1298c2ecf20Sopenharmony_ci}; 1308c2ecf20Sopenharmony_ci 1318c2ecf20Sopenharmony_ci/* 1328c2ecf20Sopenharmony_ci * Option Parsing (code inspired by NFS code) 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_cienum { 1358c2ecf20Sopenharmony_ci /* Options that take integer arguments */ 1368c2ecf20Sopenharmony_ci Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, 1378c2ecf20Sopenharmony_ci /* Options that take no argument */ 1388c2ecf20Sopenharmony_ci Opt_privport, 1398c2ecf20Sopenharmony_ci Opt_err, 1408c2ecf20Sopenharmony_ci}; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_cistatic match_table_t tokens = { 1438c2ecf20Sopenharmony_ci {Opt_port, "port=%u"}, 1448c2ecf20Sopenharmony_ci {Opt_sq_depth, "sq=%u"}, 1458c2ecf20Sopenharmony_ci {Opt_rq_depth, "rq=%u"}, 1468c2ecf20Sopenharmony_ci {Opt_timeout, "timeout=%u"}, 1478c2ecf20Sopenharmony_ci {Opt_privport, "privport"}, 1488c2ecf20Sopenharmony_ci {Opt_err, NULL}, 1498c2ecf20Sopenharmony_ci}; 1508c2ecf20Sopenharmony_ci 1518c2ecf20Sopenharmony_cistatic int p9_rdma_show_options(struct seq_file *m, struct p9_client *clnt) 1528c2ecf20Sopenharmony_ci{ 1538c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma = clnt->trans; 1548c2ecf20Sopenharmony_ci 1558c2ecf20Sopenharmony_ci if (rdma->port != P9_PORT) 1568c2ecf20Sopenharmony_ci seq_printf(m, ",port=%u", rdma->port); 1578c2ecf20Sopenharmony_ci if (rdma->sq_depth != P9_RDMA_SQ_DEPTH) 1588c2ecf20Sopenharmony_ci seq_printf(m, ",sq=%u", rdma->sq_depth); 1598c2ecf20Sopenharmony_ci if (rdma->rq_depth != P9_RDMA_RQ_DEPTH) 1608c2ecf20Sopenharmony_ci seq_printf(m, ",rq=%u", rdma->rq_depth); 1618c2ecf20Sopenharmony_ci if (rdma->timeout != P9_RDMA_TIMEOUT) 1628c2ecf20Sopenharmony_ci seq_printf(m, ",timeout=%lu", rdma->timeout); 1638c2ecf20Sopenharmony_ci if (rdma->privport) 1648c2ecf20Sopenharmony_ci seq_puts(m, ",privport"); 1658c2ecf20Sopenharmony_ci return 0; 1668c2ecf20Sopenharmony_ci} 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ci/** 1698c2ecf20Sopenharmony_ci * parse_opts - parse mount options into rdma options structure 1708c2ecf20Sopenharmony_ci * @params: options string passed from mount 1718c2ecf20Sopenharmony_ci * @opts: rdma transport-specific structure to parse options into 1728c2ecf20Sopenharmony_ci * 1738c2ecf20Sopenharmony_ci * Returns 0 upon success, -ERRNO upon failure 1748c2ecf20Sopenharmony_ci */ 1758c2ecf20Sopenharmony_cistatic int parse_opts(char *params, struct p9_rdma_opts *opts) 1768c2ecf20Sopenharmony_ci{ 1778c2ecf20Sopenharmony_ci char *p; 1788c2ecf20Sopenharmony_ci substring_t args[MAX_OPT_ARGS]; 1798c2ecf20Sopenharmony_ci int option; 1808c2ecf20Sopenharmony_ci char *options, *tmp_options; 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci opts->port = P9_PORT; 1838c2ecf20Sopenharmony_ci opts->sq_depth = P9_RDMA_SQ_DEPTH; 1848c2ecf20Sopenharmony_ci opts->rq_depth = P9_RDMA_RQ_DEPTH; 1858c2ecf20Sopenharmony_ci opts->timeout = P9_RDMA_TIMEOUT; 1868c2ecf20Sopenharmony_ci opts->privport = false; 1878c2ecf20Sopenharmony_ci 1888c2ecf20Sopenharmony_ci if (!params) 1898c2ecf20Sopenharmony_ci return 0; 1908c2ecf20Sopenharmony_ci 1918c2ecf20Sopenharmony_ci tmp_options = kstrdup(params, GFP_KERNEL); 1928c2ecf20Sopenharmony_ci if (!tmp_options) { 1938c2ecf20Sopenharmony_ci p9_debug(P9_DEBUG_ERROR, 1948c2ecf20Sopenharmony_ci "failed to allocate copy of option string\n"); 1958c2ecf20Sopenharmony_ci return -ENOMEM; 1968c2ecf20Sopenharmony_ci } 1978c2ecf20Sopenharmony_ci options = tmp_options; 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci while ((p = strsep(&options, ",")) != NULL) { 2008c2ecf20Sopenharmony_ci int token; 2018c2ecf20Sopenharmony_ci int r; 2028c2ecf20Sopenharmony_ci if (!*p) 2038c2ecf20Sopenharmony_ci continue; 2048c2ecf20Sopenharmony_ci token = match_token(p, tokens, args); 2058c2ecf20Sopenharmony_ci if ((token != Opt_err) && (token != Opt_privport)) { 2068c2ecf20Sopenharmony_ci r = match_int(&args[0], &option); 2078c2ecf20Sopenharmony_ci if (r < 0) { 2088c2ecf20Sopenharmony_ci p9_debug(P9_DEBUG_ERROR, 2098c2ecf20Sopenharmony_ci "integer field, but no integer?\n"); 2108c2ecf20Sopenharmony_ci continue; 2118c2ecf20Sopenharmony_ci } 2128c2ecf20Sopenharmony_ci } 2138c2ecf20Sopenharmony_ci switch (token) { 2148c2ecf20Sopenharmony_ci case Opt_port: 2158c2ecf20Sopenharmony_ci opts->port = option; 2168c2ecf20Sopenharmony_ci break; 2178c2ecf20Sopenharmony_ci case Opt_sq_depth: 2188c2ecf20Sopenharmony_ci opts->sq_depth = option; 2198c2ecf20Sopenharmony_ci break; 2208c2ecf20Sopenharmony_ci case Opt_rq_depth: 2218c2ecf20Sopenharmony_ci opts->rq_depth = option; 2228c2ecf20Sopenharmony_ci break; 2238c2ecf20Sopenharmony_ci case Opt_timeout: 2248c2ecf20Sopenharmony_ci opts->timeout = option; 2258c2ecf20Sopenharmony_ci break; 2268c2ecf20Sopenharmony_ci case Opt_privport: 2278c2ecf20Sopenharmony_ci opts->privport = true; 2288c2ecf20Sopenharmony_ci break; 2298c2ecf20Sopenharmony_ci default: 2308c2ecf20Sopenharmony_ci continue; 2318c2ecf20Sopenharmony_ci } 2328c2ecf20Sopenharmony_ci } 2338c2ecf20Sopenharmony_ci /* RQ must be at least as large as the SQ */ 2348c2ecf20Sopenharmony_ci opts->rq_depth = max(opts->rq_depth, opts->sq_depth); 2358c2ecf20Sopenharmony_ci kfree(tmp_options); 2368c2ecf20Sopenharmony_ci return 0; 2378c2ecf20Sopenharmony_ci} 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_cistatic int 2408c2ecf20Sopenharmony_cip9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 2418c2ecf20Sopenharmony_ci{ 2428c2ecf20Sopenharmony_ci struct p9_client *c = id->context; 2438c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma = c->trans; 2448c2ecf20Sopenharmony_ci switch (event->event) { 2458c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_ADDR_RESOLVED: 2468c2ecf20Sopenharmony_ci BUG_ON(rdma->state != P9_RDMA_INIT); 2478c2ecf20Sopenharmony_ci rdma->state = P9_RDMA_ADDR_RESOLVED; 2488c2ecf20Sopenharmony_ci break; 2498c2ecf20Sopenharmony_ci 2508c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_ROUTE_RESOLVED: 2518c2ecf20Sopenharmony_ci BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); 2528c2ecf20Sopenharmony_ci rdma->state = P9_RDMA_ROUTE_RESOLVED; 2538c2ecf20Sopenharmony_ci break; 2548c2ecf20Sopenharmony_ci 2558c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_ESTABLISHED: 2568c2ecf20Sopenharmony_ci BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); 2578c2ecf20Sopenharmony_ci rdma->state = P9_RDMA_CONNECTED; 2588c2ecf20Sopenharmony_ci break; 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_DISCONNECTED: 2618c2ecf20Sopenharmony_ci if (rdma) 2628c2ecf20Sopenharmony_ci rdma->state = P9_RDMA_CLOSED; 2638c2ecf20Sopenharmony_ci c->status = Disconnected; 2648c2ecf20Sopenharmony_ci break; 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_TIMEWAIT_EXIT: 2678c2ecf20Sopenharmony_ci break; 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_ADDR_CHANGE: 2708c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_ROUTE_ERROR: 2718c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_DEVICE_REMOVAL: 2728c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_MULTICAST_JOIN: 2738c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_MULTICAST_ERROR: 2748c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_REJECTED: 2758c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_CONNECT_REQUEST: 2768c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_CONNECT_RESPONSE: 2778c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_CONNECT_ERROR: 2788c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_ADDR_ERROR: 2798c2ecf20Sopenharmony_ci case RDMA_CM_EVENT_UNREACHABLE: 2808c2ecf20Sopenharmony_ci c->status = Disconnected; 2818c2ecf20Sopenharmony_ci rdma_disconnect(rdma->cm_id); 2828c2ecf20Sopenharmony_ci break; 2838c2ecf20Sopenharmony_ci default: 2848c2ecf20Sopenharmony_ci BUG(); 2858c2ecf20Sopenharmony_ci } 2868c2ecf20Sopenharmony_ci complete(&rdma->cm_done); 2878c2ecf20Sopenharmony_ci return 0; 2888c2ecf20Sopenharmony_ci} 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_cistatic void 2918c2ecf20Sopenharmony_cirecv_done(struct ib_cq *cq, struct ib_wc *wc) 2928c2ecf20Sopenharmony_ci{ 2938c2ecf20Sopenharmony_ci struct p9_client *client = cq->cq_context; 2948c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma = client->trans; 2958c2ecf20Sopenharmony_ci struct p9_rdma_context *c = 2968c2ecf20Sopenharmony_ci container_of(wc->wr_cqe, struct p9_rdma_context, cqe); 2978c2ecf20Sopenharmony_ci struct p9_req_t *req; 2988c2ecf20Sopenharmony_ci int err = 0; 2998c2ecf20Sopenharmony_ci int16_t tag; 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci req = NULL; 3028c2ecf20Sopenharmony_ci ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, 3038c2ecf20Sopenharmony_ci DMA_FROM_DEVICE); 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci if (wc->status != IB_WC_SUCCESS) 3068c2ecf20Sopenharmony_ci goto err_out; 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci c->rc.size = wc->byte_len; 3098c2ecf20Sopenharmony_ci err = p9_parse_header(&c->rc, NULL, NULL, &tag, 1); 3108c2ecf20Sopenharmony_ci if (err) 3118c2ecf20Sopenharmony_ci goto err_out; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci req = p9_tag_lookup(client, tag); 3148c2ecf20Sopenharmony_ci if (!req) 3158c2ecf20Sopenharmony_ci goto err_out; 3168c2ecf20Sopenharmony_ci 3178c2ecf20Sopenharmony_ci /* Check that we have not yet received a reply for this request. 3188c2ecf20Sopenharmony_ci */ 3198c2ecf20Sopenharmony_ci if (unlikely(req->rc.sdata)) { 3208c2ecf20Sopenharmony_ci pr_err("Duplicate reply for request %d", tag); 3218c2ecf20Sopenharmony_ci goto err_out; 3228c2ecf20Sopenharmony_ci } 3238c2ecf20Sopenharmony_ci 3248c2ecf20Sopenharmony_ci req->rc.size = c->rc.size; 3258c2ecf20Sopenharmony_ci req->rc.sdata = c->rc.sdata; 3268c2ecf20Sopenharmony_ci p9_client_cb(client, req, REQ_STATUS_RCVD); 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci out: 3298c2ecf20Sopenharmony_ci up(&rdma->rq_sem); 3308c2ecf20Sopenharmony_ci kfree(c); 3318c2ecf20Sopenharmony_ci return; 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci err_out: 3348c2ecf20Sopenharmony_ci p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", 3358c2ecf20Sopenharmony_ci req, err, wc->status); 3368c2ecf20Sopenharmony_ci rdma->state = P9_RDMA_FLUSHING; 3378c2ecf20Sopenharmony_ci client->status = Disconnected; 3388c2ecf20Sopenharmony_ci goto out; 3398c2ecf20Sopenharmony_ci} 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_cistatic void 3428c2ecf20Sopenharmony_cisend_done(struct ib_cq *cq, struct ib_wc *wc) 3438c2ecf20Sopenharmony_ci{ 3448c2ecf20Sopenharmony_ci struct p9_client *client = cq->cq_context; 3458c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma = client->trans; 3468c2ecf20Sopenharmony_ci struct p9_rdma_context *c = 3478c2ecf20Sopenharmony_ci container_of(wc->wr_cqe, struct p9_rdma_context, cqe); 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci ib_dma_unmap_single(rdma->cm_id->device, 3508c2ecf20Sopenharmony_ci c->busa, c->req->tc.size, 3518c2ecf20Sopenharmony_ci DMA_TO_DEVICE); 3528c2ecf20Sopenharmony_ci up(&rdma->sq_sem); 3538c2ecf20Sopenharmony_ci p9_req_put(c->req); 3548c2ecf20Sopenharmony_ci kfree(c); 3558c2ecf20Sopenharmony_ci} 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_cistatic void qp_event_handler(struct ib_event *event, void *context) 3588c2ecf20Sopenharmony_ci{ 3598c2ecf20Sopenharmony_ci p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", 3608c2ecf20Sopenharmony_ci event->event, context); 3618c2ecf20Sopenharmony_ci} 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_cistatic void rdma_destroy_trans(struct p9_trans_rdma *rdma) 3648c2ecf20Sopenharmony_ci{ 3658c2ecf20Sopenharmony_ci if (!rdma) 3668c2ecf20Sopenharmony_ci return; 3678c2ecf20Sopenharmony_ci 3688c2ecf20Sopenharmony_ci if (rdma->qp && !IS_ERR(rdma->qp)) 3698c2ecf20Sopenharmony_ci ib_destroy_qp(rdma->qp); 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_ci if (rdma->pd && !IS_ERR(rdma->pd)) 3728c2ecf20Sopenharmony_ci ib_dealloc_pd(rdma->pd); 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci if (rdma->cq && !IS_ERR(rdma->cq)) 3758c2ecf20Sopenharmony_ci ib_free_cq(rdma->cq); 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci if (rdma->cm_id && !IS_ERR(rdma->cm_id)) 3788c2ecf20Sopenharmony_ci rdma_destroy_id(rdma->cm_id); 3798c2ecf20Sopenharmony_ci 3808c2ecf20Sopenharmony_ci kfree(rdma); 3818c2ecf20Sopenharmony_ci} 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_cistatic int 3848c2ecf20Sopenharmony_cipost_recv(struct p9_client *client, struct p9_rdma_context *c) 3858c2ecf20Sopenharmony_ci{ 3868c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma = client->trans; 3878c2ecf20Sopenharmony_ci struct ib_recv_wr wr; 3888c2ecf20Sopenharmony_ci struct ib_sge sge; 3898c2ecf20Sopenharmony_ci int ret; 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci c->busa = ib_dma_map_single(rdma->cm_id->device, 3928c2ecf20Sopenharmony_ci c->rc.sdata, client->msize, 3938c2ecf20Sopenharmony_ci DMA_FROM_DEVICE); 3948c2ecf20Sopenharmony_ci if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) 3958c2ecf20Sopenharmony_ci goto error; 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci c->cqe.done = recv_done; 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci sge.addr = c->busa; 4008c2ecf20Sopenharmony_ci sge.length = client->msize; 4018c2ecf20Sopenharmony_ci sge.lkey = rdma->pd->local_dma_lkey; 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci wr.next = NULL; 4048c2ecf20Sopenharmony_ci wr.wr_cqe = &c->cqe; 4058c2ecf20Sopenharmony_ci wr.sg_list = &sge; 4068c2ecf20Sopenharmony_ci wr.num_sge = 1; 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci ret = ib_post_recv(rdma->qp, &wr, NULL); 4098c2ecf20Sopenharmony_ci if (ret) 4108c2ecf20Sopenharmony_ci ib_dma_unmap_single(rdma->cm_id->device, c->busa, 4118c2ecf20Sopenharmony_ci client->msize, DMA_FROM_DEVICE); 4128c2ecf20Sopenharmony_ci return ret; 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci error: 4158c2ecf20Sopenharmony_ci p9_debug(P9_DEBUG_ERROR, "EIO\n"); 4168c2ecf20Sopenharmony_ci return -EIO; 4178c2ecf20Sopenharmony_ci} 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_cistatic int rdma_request(struct p9_client *client, struct p9_req_t *req) 4208c2ecf20Sopenharmony_ci{ 4218c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma = client->trans; 4228c2ecf20Sopenharmony_ci struct ib_send_wr wr; 4238c2ecf20Sopenharmony_ci struct ib_sge sge; 4248c2ecf20Sopenharmony_ci int err = 0; 4258c2ecf20Sopenharmony_ci unsigned long flags; 4268c2ecf20Sopenharmony_ci struct p9_rdma_context *c = NULL; 4278c2ecf20Sopenharmony_ci struct p9_rdma_context *rpl_context = NULL; 4288c2ecf20Sopenharmony_ci 4298c2ecf20Sopenharmony_ci /* When an error occurs between posting the recv and the send, 4308c2ecf20Sopenharmony_ci * there will be a receive context posted without a pending request. 4318c2ecf20Sopenharmony_ci * Since there is no way to "un-post" it, we remember it and skip 4328c2ecf20Sopenharmony_ci * post_recv() for the next request. 4338c2ecf20Sopenharmony_ci * So here, 4348c2ecf20Sopenharmony_ci * see if we are this `next request' and need to absorb an excess rc. 4358c2ecf20Sopenharmony_ci * If yes, then drop and free our own, and do not recv_post(). 4368c2ecf20Sopenharmony_ci **/ 4378c2ecf20Sopenharmony_ci if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { 4388c2ecf20Sopenharmony_ci if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { 4398c2ecf20Sopenharmony_ci /* Got one! */ 4408c2ecf20Sopenharmony_ci p9_fcall_fini(&req->rc); 4418c2ecf20Sopenharmony_ci req->rc.sdata = NULL; 4428c2ecf20Sopenharmony_ci goto dont_need_post_recv; 4438c2ecf20Sopenharmony_ci } else { 4448c2ecf20Sopenharmony_ci /* We raced and lost. */ 4458c2ecf20Sopenharmony_ci atomic_inc(&rdma->excess_rc); 4468c2ecf20Sopenharmony_ci } 4478c2ecf20Sopenharmony_ci } 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci /* Allocate an fcall for the reply */ 4508c2ecf20Sopenharmony_ci rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); 4518c2ecf20Sopenharmony_ci if (!rpl_context) { 4528c2ecf20Sopenharmony_ci err = -ENOMEM; 4538c2ecf20Sopenharmony_ci goto recv_error; 4548c2ecf20Sopenharmony_ci } 4558c2ecf20Sopenharmony_ci rpl_context->rc.sdata = req->rc.sdata; 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci /* 4588c2ecf20Sopenharmony_ci * Post a receive buffer for this request. We need to ensure 4598c2ecf20Sopenharmony_ci * there is a reply buffer available for every outstanding 4608c2ecf20Sopenharmony_ci * request. A flushed request can result in no reply for an 4618c2ecf20Sopenharmony_ci * outstanding request, so we must keep a count to avoid 4628c2ecf20Sopenharmony_ci * overflowing the RQ. 4638c2ecf20Sopenharmony_ci */ 4648c2ecf20Sopenharmony_ci if (down_interruptible(&rdma->rq_sem)) { 4658c2ecf20Sopenharmony_ci err = -EINTR; 4668c2ecf20Sopenharmony_ci goto recv_error; 4678c2ecf20Sopenharmony_ci } 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci err = post_recv(client, rpl_context); 4708c2ecf20Sopenharmony_ci if (err) { 4718c2ecf20Sopenharmony_ci p9_debug(P9_DEBUG_ERROR, "POST RECV failed: %d\n", err); 4728c2ecf20Sopenharmony_ci goto recv_error; 4738c2ecf20Sopenharmony_ci } 4748c2ecf20Sopenharmony_ci /* remove posted receive buffer from request structure */ 4758c2ecf20Sopenharmony_ci req->rc.sdata = NULL; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_cidont_need_post_recv: 4788c2ecf20Sopenharmony_ci /* Post the request */ 4798c2ecf20Sopenharmony_ci c = kmalloc(sizeof *c, GFP_NOFS); 4808c2ecf20Sopenharmony_ci if (!c) { 4818c2ecf20Sopenharmony_ci err = -ENOMEM; 4828c2ecf20Sopenharmony_ci goto send_error; 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci c->req = req; 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_ci c->busa = ib_dma_map_single(rdma->cm_id->device, 4878c2ecf20Sopenharmony_ci c->req->tc.sdata, c->req->tc.size, 4888c2ecf20Sopenharmony_ci DMA_TO_DEVICE); 4898c2ecf20Sopenharmony_ci if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { 4908c2ecf20Sopenharmony_ci err = -EIO; 4918c2ecf20Sopenharmony_ci goto send_error; 4928c2ecf20Sopenharmony_ci } 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci c->cqe.done = send_done; 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci sge.addr = c->busa; 4978c2ecf20Sopenharmony_ci sge.length = c->req->tc.size; 4988c2ecf20Sopenharmony_ci sge.lkey = rdma->pd->local_dma_lkey; 4998c2ecf20Sopenharmony_ci 5008c2ecf20Sopenharmony_ci wr.next = NULL; 5018c2ecf20Sopenharmony_ci wr.wr_cqe = &c->cqe; 5028c2ecf20Sopenharmony_ci wr.opcode = IB_WR_SEND; 5038c2ecf20Sopenharmony_ci wr.send_flags = IB_SEND_SIGNALED; 5048c2ecf20Sopenharmony_ci wr.sg_list = &sge; 5058c2ecf20Sopenharmony_ci wr.num_sge = 1; 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_ci if (down_interruptible(&rdma->sq_sem)) { 5088c2ecf20Sopenharmony_ci err = -EINTR; 5098c2ecf20Sopenharmony_ci goto dma_unmap; 5108c2ecf20Sopenharmony_ci } 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci /* Mark request as `sent' *before* we actually send it, 5138c2ecf20Sopenharmony_ci * because doing if after could erase the REQ_STATUS_RCVD 5148c2ecf20Sopenharmony_ci * status in case of a very fast reply. 5158c2ecf20Sopenharmony_ci */ 5168c2ecf20Sopenharmony_ci req->status = REQ_STATUS_SENT; 5178c2ecf20Sopenharmony_ci err = ib_post_send(rdma->qp, &wr, NULL); 5188c2ecf20Sopenharmony_ci if (err) 5198c2ecf20Sopenharmony_ci goto dma_unmap; 5208c2ecf20Sopenharmony_ci 5218c2ecf20Sopenharmony_ci /* Success */ 5228c2ecf20Sopenharmony_ci return 0; 5238c2ecf20Sopenharmony_ci 5248c2ecf20Sopenharmony_cidma_unmap: 5258c2ecf20Sopenharmony_ci ib_dma_unmap_single(rdma->cm_id->device, c->busa, 5268c2ecf20Sopenharmony_ci c->req->tc.size, DMA_TO_DEVICE); 5278c2ecf20Sopenharmony_ci /* Handle errors that happened during or while preparing the send: */ 5288c2ecf20Sopenharmony_ci send_error: 5298c2ecf20Sopenharmony_ci req->status = REQ_STATUS_ERROR; 5308c2ecf20Sopenharmony_ci kfree(c); 5318c2ecf20Sopenharmony_ci p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci /* Ach. 5348c2ecf20Sopenharmony_ci * We did recv_post(), but not send. We have one recv_post in excess. 5358c2ecf20Sopenharmony_ci */ 5368c2ecf20Sopenharmony_ci atomic_inc(&rdma->excess_rc); 5378c2ecf20Sopenharmony_ci return err; 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci /* Handle errors that happened during or while preparing post_recv(): */ 5408c2ecf20Sopenharmony_ci recv_error: 5418c2ecf20Sopenharmony_ci kfree(rpl_context); 5428c2ecf20Sopenharmony_ci spin_lock_irqsave(&rdma->req_lock, flags); 5438c2ecf20Sopenharmony_ci if (err != -EINTR && rdma->state < P9_RDMA_CLOSING) { 5448c2ecf20Sopenharmony_ci rdma->state = P9_RDMA_CLOSING; 5458c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rdma->req_lock, flags); 5468c2ecf20Sopenharmony_ci rdma_disconnect(rdma->cm_id); 5478c2ecf20Sopenharmony_ci } else 5488c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rdma->req_lock, flags); 5498c2ecf20Sopenharmony_ci return err; 5508c2ecf20Sopenharmony_ci} 5518c2ecf20Sopenharmony_ci 5528c2ecf20Sopenharmony_cistatic void rdma_close(struct p9_client *client) 5538c2ecf20Sopenharmony_ci{ 5548c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma; 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ci if (!client) 5578c2ecf20Sopenharmony_ci return; 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci rdma = client->trans; 5608c2ecf20Sopenharmony_ci if (!rdma) 5618c2ecf20Sopenharmony_ci return; 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci client->status = Disconnected; 5648c2ecf20Sopenharmony_ci rdma_disconnect(rdma->cm_id); 5658c2ecf20Sopenharmony_ci rdma_destroy_trans(rdma); 5668c2ecf20Sopenharmony_ci} 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci/** 5698c2ecf20Sopenharmony_ci * alloc_rdma - Allocate and initialize the rdma transport structure 5708c2ecf20Sopenharmony_ci * @opts: Mount options structure 5718c2ecf20Sopenharmony_ci */ 5728c2ecf20Sopenharmony_cistatic struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) 5738c2ecf20Sopenharmony_ci{ 5748c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma; 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); 5778c2ecf20Sopenharmony_ci if (!rdma) 5788c2ecf20Sopenharmony_ci return NULL; 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci rdma->port = opts->port; 5818c2ecf20Sopenharmony_ci rdma->privport = opts->privport; 5828c2ecf20Sopenharmony_ci rdma->sq_depth = opts->sq_depth; 5838c2ecf20Sopenharmony_ci rdma->rq_depth = opts->rq_depth; 5848c2ecf20Sopenharmony_ci rdma->timeout = opts->timeout; 5858c2ecf20Sopenharmony_ci spin_lock_init(&rdma->req_lock); 5868c2ecf20Sopenharmony_ci init_completion(&rdma->cm_done); 5878c2ecf20Sopenharmony_ci sema_init(&rdma->sq_sem, rdma->sq_depth); 5888c2ecf20Sopenharmony_ci sema_init(&rdma->rq_sem, rdma->rq_depth); 5898c2ecf20Sopenharmony_ci atomic_set(&rdma->excess_rc, 0); 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci return rdma; 5928c2ecf20Sopenharmony_ci} 5938c2ecf20Sopenharmony_ci 5948c2ecf20Sopenharmony_cistatic int rdma_cancel(struct p9_client *client, struct p9_req_t *req) 5958c2ecf20Sopenharmony_ci{ 5968c2ecf20Sopenharmony_ci /* Nothing to do here. 5978c2ecf20Sopenharmony_ci * We will take care of it (if we have to) in rdma_cancelled() 5988c2ecf20Sopenharmony_ci */ 5998c2ecf20Sopenharmony_ci return 1; 6008c2ecf20Sopenharmony_ci} 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci/* A request has been fully flushed without a reply. 6038c2ecf20Sopenharmony_ci * That means we have posted one buffer in excess. 6048c2ecf20Sopenharmony_ci */ 6058c2ecf20Sopenharmony_cistatic int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) 6068c2ecf20Sopenharmony_ci{ 6078c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma = client->trans; 6088c2ecf20Sopenharmony_ci atomic_inc(&rdma->excess_rc); 6098c2ecf20Sopenharmony_ci return 0; 6108c2ecf20Sopenharmony_ci} 6118c2ecf20Sopenharmony_ci 6128c2ecf20Sopenharmony_cistatic int p9_rdma_bind_privport(struct p9_trans_rdma *rdma) 6138c2ecf20Sopenharmony_ci{ 6148c2ecf20Sopenharmony_ci struct sockaddr_in cl = { 6158c2ecf20Sopenharmony_ci .sin_family = AF_INET, 6168c2ecf20Sopenharmony_ci .sin_addr.s_addr = htonl(INADDR_ANY), 6178c2ecf20Sopenharmony_ci }; 6188c2ecf20Sopenharmony_ci int port, err = -EINVAL; 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci for (port = P9_DEF_MAX_RESVPORT; port >= P9_DEF_MIN_RESVPORT; port--) { 6218c2ecf20Sopenharmony_ci cl.sin_port = htons((ushort)port); 6228c2ecf20Sopenharmony_ci err = rdma_bind_addr(rdma->cm_id, (struct sockaddr *)&cl); 6238c2ecf20Sopenharmony_ci if (err != -EADDRINUSE) 6248c2ecf20Sopenharmony_ci break; 6258c2ecf20Sopenharmony_ci } 6268c2ecf20Sopenharmony_ci return err; 6278c2ecf20Sopenharmony_ci} 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci/** 6308c2ecf20Sopenharmony_ci * rdma_create_trans - Transport method for creating a transport instance 6318c2ecf20Sopenharmony_ci * @client: client instance 6328c2ecf20Sopenharmony_ci * @addr: IP address string 6338c2ecf20Sopenharmony_ci * @args: Mount options string 6348c2ecf20Sopenharmony_ci */ 6358c2ecf20Sopenharmony_cistatic int 6368c2ecf20Sopenharmony_cirdma_create_trans(struct p9_client *client, const char *addr, char *args) 6378c2ecf20Sopenharmony_ci{ 6388c2ecf20Sopenharmony_ci int err; 6398c2ecf20Sopenharmony_ci struct p9_rdma_opts opts; 6408c2ecf20Sopenharmony_ci struct p9_trans_rdma *rdma; 6418c2ecf20Sopenharmony_ci struct rdma_conn_param conn_param; 6428c2ecf20Sopenharmony_ci struct ib_qp_init_attr qp_attr; 6438c2ecf20Sopenharmony_ci 6448c2ecf20Sopenharmony_ci if (addr == NULL) 6458c2ecf20Sopenharmony_ci return -EINVAL; 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci /* Parse the transport specific mount options */ 6488c2ecf20Sopenharmony_ci err = parse_opts(args, &opts); 6498c2ecf20Sopenharmony_ci if (err < 0) 6508c2ecf20Sopenharmony_ci return err; 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci /* Create and initialize the RDMA transport structure */ 6538c2ecf20Sopenharmony_ci rdma = alloc_rdma(&opts); 6548c2ecf20Sopenharmony_ci if (!rdma) 6558c2ecf20Sopenharmony_ci return -ENOMEM; 6568c2ecf20Sopenharmony_ci 6578c2ecf20Sopenharmony_ci /* Create the RDMA CM ID */ 6588c2ecf20Sopenharmony_ci rdma->cm_id = rdma_create_id(&init_net, p9_cm_event_handler, client, 6598c2ecf20Sopenharmony_ci RDMA_PS_TCP, IB_QPT_RC); 6608c2ecf20Sopenharmony_ci if (IS_ERR(rdma->cm_id)) 6618c2ecf20Sopenharmony_ci goto error; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci /* Associate the client with the transport */ 6648c2ecf20Sopenharmony_ci client->trans = rdma; 6658c2ecf20Sopenharmony_ci 6668c2ecf20Sopenharmony_ci /* Bind to a privileged port if we need to */ 6678c2ecf20Sopenharmony_ci if (opts.privport) { 6688c2ecf20Sopenharmony_ci err = p9_rdma_bind_privport(rdma); 6698c2ecf20Sopenharmony_ci if (err < 0) { 6708c2ecf20Sopenharmony_ci pr_err("%s (%d): problem binding to privport: %d\n", 6718c2ecf20Sopenharmony_ci __func__, task_pid_nr(current), -err); 6728c2ecf20Sopenharmony_ci goto error; 6738c2ecf20Sopenharmony_ci } 6748c2ecf20Sopenharmony_ci } 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_ci /* Resolve the server's address */ 6778c2ecf20Sopenharmony_ci rdma->addr.sin_family = AF_INET; 6788c2ecf20Sopenharmony_ci rdma->addr.sin_addr.s_addr = in_aton(addr); 6798c2ecf20Sopenharmony_ci rdma->addr.sin_port = htons(opts.port); 6808c2ecf20Sopenharmony_ci err = rdma_resolve_addr(rdma->cm_id, NULL, 6818c2ecf20Sopenharmony_ci (struct sockaddr *)&rdma->addr, 6828c2ecf20Sopenharmony_ci rdma->timeout); 6838c2ecf20Sopenharmony_ci if (err) 6848c2ecf20Sopenharmony_ci goto error; 6858c2ecf20Sopenharmony_ci err = wait_for_completion_interruptible(&rdma->cm_done); 6868c2ecf20Sopenharmony_ci if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) 6878c2ecf20Sopenharmony_ci goto error; 6888c2ecf20Sopenharmony_ci 6898c2ecf20Sopenharmony_ci /* Resolve the route to the server */ 6908c2ecf20Sopenharmony_ci err = rdma_resolve_route(rdma->cm_id, rdma->timeout); 6918c2ecf20Sopenharmony_ci if (err) 6928c2ecf20Sopenharmony_ci goto error; 6938c2ecf20Sopenharmony_ci err = wait_for_completion_interruptible(&rdma->cm_done); 6948c2ecf20Sopenharmony_ci if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) 6958c2ecf20Sopenharmony_ci goto error; 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci /* Create the Completion Queue */ 6988c2ecf20Sopenharmony_ci rdma->cq = ib_alloc_cq_any(rdma->cm_id->device, client, 6998c2ecf20Sopenharmony_ci opts.sq_depth + opts.rq_depth + 1, 7008c2ecf20Sopenharmony_ci IB_POLL_SOFTIRQ); 7018c2ecf20Sopenharmony_ci if (IS_ERR(rdma->cq)) 7028c2ecf20Sopenharmony_ci goto error; 7038c2ecf20Sopenharmony_ci 7048c2ecf20Sopenharmony_ci /* Create the Protection Domain */ 7058c2ecf20Sopenharmony_ci rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0); 7068c2ecf20Sopenharmony_ci if (IS_ERR(rdma->pd)) 7078c2ecf20Sopenharmony_ci goto error; 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci /* Create the Queue Pair */ 7108c2ecf20Sopenharmony_ci memset(&qp_attr, 0, sizeof qp_attr); 7118c2ecf20Sopenharmony_ci qp_attr.event_handler = qp_event_handler; 7128c2ecf20Sopenharmony_ci qp_attr.qp_context = client; 7138c2ecf20Sopenharmony_ci qp_attr.cap.max_send_wr = opts.sq_depth; 7148c2ecf20Sopenharmony_ci qp_attr.cap.max_recv_wr = opts.rq_depth; 7158c2ecf20Sopenharmony_ci qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; 7168c2ecf20Sopenharmony_ci qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; 7178c2ecf20Sopenharmony_ci qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 7188c2ecf20Sopenharmony_ci qp_attr.qp_type = IB_QPT_RC; 7198c2ecf20Sopenharmony_ci qp_attr.send_cq = rdma->cq; 7208c2ecf20Sopenharmony_ci qp_attr.recv_cq = rdma->cq; 7218c2ecf20Sopenharmony_ci err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); 7228c2ecf20Sopenharmony_ci if (err) 7238c2ecf20Sopenharmony_ci goto error; 7248c2ecf20Sopenharmony_ci rdma->qp = rdma->cm_id->qp; 7258c2ecf20Sopenharmony_ci 7268c2ecf20Sopenharmony_ci /* Request a connection */ 7278c2ecf20Sopenharmony_ci memset(&conn_param, 0, sizeof(conn_param)); 7288c2ecf20Sopenharmony_ci conn_param.private_data = NULL; 7298c2ecf20Sopenharmony_ci conn_param.private_data_len = 0; 7308c2ecf20Sopenharmony_ci conn_param.responder_resources = P9_RDMA_IRD; 7318c2ecf20Sopenharmony_ci conn_param.initiator_depth = P9_RDMA_ORD; 7328c2ecf20Sopenharmony_ci err = rdma_connect(rdma->cm_id, &conn_param); 7338c2ecf20Sopenharmony_ci if (err) 7348c2ecf20Sopenharmony_ci goto error; 7358c2ecf20Sopenharmony_ci err = wait_for_completion_interruptible(&rdma->cm_done); 7368c2ecf20Sopenharmony_ci if (err || (rdma->state != P9_RDMA_CONNECTED)) 7378c2ecf20Sopenharmony_ci goto error; 7388c2ecf20Sopenharmony_ci 7398c2ecf20Sopenharmony_ci client->status = Connected; 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_ci return 0; 7428c2ecf20Sopenharmony_ci 7438c2ecf20Sopenharmony_cierror: 7448c2ecf20Sopenharmony_ci rdma_destroy_trans(rdma); 7458c2ecf20Sopenharmony_ci return -ENOTCONN; 7468c2ecf20Sopenharmony_ci} 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_cistatic struct p9_trans_module p9_rdma_trans = { 7498c2ecf20Sopenharmony_ci .name = "rdma", 7508c2ecf20Sopenharmony_ci .maxsize = P9_RDMA_MAXSIZE, 7518c2ecf20Sopenharmony_ci .def = 0, 7528c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 7538c2ecf20Sopenharmony_ci .create = rdma_create_trans, 7548c2ecf20Sopenharmony_ci .close = rdma_close, 7558c2ecf20Sopenharmony_ci .request = rdma_request, 7568c2ecf20Sopenharmony_ci .cancel = rdma_cancel, 7578c2ecf20Sopenharmony_ci .cancelled = rdma_cancelled, 7588c2ecf20Sopenharmony_ci .show_options = p9_rdma_show_options, 7598c2ecf20Sopenharmony_ci}; 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci/** 7628c2ecf20Sopenharmony_ci * p9_trans_rdma_init - Register the 9P RDMA transport driver 7638c2ecf20Sopenharmony_ci */ 7648c2ecf20Sopenharmony_cistatic int __init p9_trans_rdma_init(void) 7658c2ecf20Sopenharmony_ci{ 7668c2ecf20Sopenharmony_ci v9fs_register_trans(&p9_rdma_trans); 7678c2ecf20Sopenharmony_ci return 0; 7688c2ecf20Sopenharmony_ci} 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_cistatic void __exit p9_trans_rdma_exit(void) 7718c2ecf20Sopenharmony_ci{ 7728c2ecf20Sopenharmony_ci v9fs_unregister_trans(&p9_rdma_trans); 7738c2ecf20Sopenharmony_ci} 7748c2ecf20Sopenharmony_ci 7758c2ecf20Sopenharmony_cimodule_init(p9_trans_rdma_init); 7768c2ecf20Sopenharmony_cimodule_exit(p9_trans_rdma_exit); 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ciMODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); 7798c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("RDMA Transport for 9P"); 7808c2ecf20Sopenharmony_ciMODULE_LICENSE("Dual BSD/GPL"); 781