1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * RDMA Transport Layer 4 * 5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. 6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. 7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. 8 */ 9 10#ifndef RTRS_PRI_H 11#define RTRS_PRI_H 12 13#include <linux/uuid.h> 14#include <rdma/rdma_cm.h> 15#include <rdma/ib_verbs.h> 16#include <rdma/ib.h> 17 18#include "rtrs.h" 19 20#define RTRS_PROTO_VER_MAJOR 2 21#define RTRS_PROTO_VER_MINOR 0 22 23#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \ 24 __stringify(RTRS_PROTO_VER_MINOR) 25 26/* 27 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS) 28 * and the minimum chunk size is 4096 (2^12). 29 * So the maximum sess_queue_depth is 65536 (2^16) in theory. 30 * But mempool_create, create_qp and ib_post_send fail with 31 * "cannot allocate memory" error if sess_queue_depth is too big. 32 * Therefore the pratical max value of sess_queue_depth is 33 * somewhere between 1 and 65534 and it depends on the system. 34 */ 35#define MAX_SESS_QUEUE_DEPTH 65535 36 37enum rtrs_imm_const { 38 MAX_IMM_TYPE_BITS = 4, 39 MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1), 40 MAX_IMM_PAYL_BITS = 28, 41 MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1), 42}; 43 44enum rtrs_imm_type { 45 RTRS_IO_REQ_IMM = 0, /* client to server */ 46 RTRS_IO_RSP_IMM = 1, /* server to client */ 47 RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */ 48 49 RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */ 50 RTRS_HB_ACK_IMM = 9, 51 52 RTRS_LAST_IMM, 53}; 54 55enum { 56 SERVICE_CON_QUEUE_DEPTH = 512, 57 58 MAX_PATHS_NUM = 128, 59 60 MIN_CHUNK_SIZE = 8192, 61 62 RTRS_HB_INTERVAL_MS = 5000, 63 RTRS_HB_MISSED_MAX = 5, 64 65 RTRS_MAGIC = 0x1BBD, 66 RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR, 67}; 68 69struct rtrs_ib_dev; 70 71struct rtrs_rdma_dev_pd_ops { 72 struct rtrs_ib_dev *(*alloc)(void); 73 void (*free)(struct rtrs_ib_dev *dev); 74 int (*init)(struct rtrs_ib_dev *dev); 75 void (*deinit)(struct rtrs_ib_dev *dev); 76}; 77 78struct rtrs_rdma_dev_pd { 79 struct mutex mutex; 80 struct list_head list; 81 enum ib_pd_flags pd_flags; 82 const struct rtrs_rdma_dev_pd_ops *ops; 83}; 84 85struct rtrs_ib_dev { 86 struct ib_device *ib_dev; 87 struct ib_pd *ib_pd; 88 struct kref ref; 89 struct list_head entry; 90 struct rtrs_rdma_dev_pd *pool; 91}; 92 93struct rtrs_con { 94 struct rtrs_sess *sess; 95 struct ib_qp *qp; 96 struct ib_cq *cq; 97 struct rdma_cm_id *cm_id; 98 unsigned int cid; 99}; 100 101struct rtrs_sess { 102 struct list_head entry; 103 struct sockaddr_storage dst_addr; 104 struct sockaddr_storage src_addr; 105 char sessname[NAME_MAX]; 106 uuid_t uuid; 107 struct rtrs_con **con; 108 unsigned int con_num; 109 unsigned int recon_cnt; 110 struct rtrs_ib_dev *dev; 111 int dev_ref; 112 struct ib_cqe *hb_cqe; 113 void (*hb_err_handler)(struct rtrs_con *con); 114 struct workqueue_struct *hb_wq; 115 struct delayed_work hb_dwork; 116 unsigned int hb_interval_ms; 117 unsigned int hb_missed_cnt; 118 unsigned int hb_missed_max; 119}; 120 121/* rtrs information unit */ 122struct rtrs_iu { 123 struct ib_cqe cqe; 124 dma_addr_t dma_addr; 125 void *buf; 126 size_t size; 127 enum dma_data_direction direction; 128}; 129 130/** 131 * enum rtrs_msg_types - RTRS message types, see also rtrs/README 132 * @RTRS_MSG_INFO_REQ: Client additional info request to the server 133 * @RTRS_MSG_INFO_RSP: Server additional info response to the client 134 * @RTRS_MSG_WRITE: Client writes data per RDMA to server 135 * @RTRS_MSG_READ: Client requests data transfer from server 136 * @RTRS_MSG_RKEY_RSP: Server refreshed rkey for rbuf 137 */ 138enum rtrs_msg_types { 139 RTRS_MSG_INFO_REQ, 140 RTRS_MSG_INFO_RSP, 141 RTRS_MSG_WRITE, 142 RTRS_MSG_READ, 143 RTRS_MSG_RKEY_RSP, 144}; 145 146/** 147 * enum rtrs_msg_flags - RTRS message flags. 148 * @RTRS_NEED_INVAL: Send invalidation in response. 149 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response. 150 */ 151enum rtrs_msg_flags { 152 RTRS_MSG_NEED_INVAL_F = 1 << 0, 153 RTRS_MSG_NEW_RKEY_F = 1 << 1, 154}; 155 156/** 157 * struct rtrs_sg_desc - RDMA-Buffer entry description 158 * @addr: Address of RDMA destination buffer 159 * @key: Authorization rkey to write to the buffer 160 * @len: Size of the buffer 161 */ 162struct rtrs_sg_desc { 163 __le64 addr; 164 __le32 key; 165 __le32 len; 166}; 167 168/** 169 * struct rtrs_msg_conn_req - Client connection request to the server 170 * @magic: RTRS magic 171 * @version: RTRS protocol version 172 * @cid: Current connection id 173 * @cid_num: Number of connections per session 174 * @recon_cnt: Reconnections counter 175 * @sess_uuid: UUID of a session (path) 176 * @paths_uuid: UUID of a group of sessions (paths) 177 * 178 * NOTE: max size 56 bytes, see man rdma_connect(). 179 */ 180struct rtrs_msg_conn_req { 181 /* Is set to 0 by cma.c in case of AF_IB, do not touch that. 182 * see https://www.spinics.net/lists/linux-rdma/msg22397.html 183 */ 184 u8 __cma_version; 185 /* On sender side that should be set to 0, or cma_save_ip_info() 186 * extract garbage and will fail. 187 */ 188 u8 __ip_version; 189 __le16 magic; 190 __le16 version; 191 __le16 cid; 192 __le16 cid_num; 193 __le16 recon_cnt; 194 uuid_t sess_uuid; 195 uuid_t paths_uuid; 196 u8 first_conn : 1; 197 u8 reserved_bits : 7; 198 u8 reserved[11]; 199}; 200 201/** 202 * struct rtrs_msg_conn_rsp - Server connection response to the client 203 * @magic: RTRS magic 204 * @version: RTRS protocol version 205 * @errno: If rdma_accept() then 0, if rdma_reject() indicates error 206 * @queue_depth: max inflight messages (queue-depth) in this session 207 * @max_io_size: max io size server supports 208 * @max_hdr_size: max msg header size server supports 209 * 210 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept(). 211 */ 212struct rtrs_msg_conn_rsp { 213 __le16 magic; 214 __le16 version; 215 __le16 errno; 216 __le16 queue_depth; 217 __le32 max_io_size; 218 __le32 max_hdr_size; 219 __le32 flags; 220 u8 reserved[36]; 221}; 222 223/** 224 * struct rtrs_msg_info_req 225 * @type: @RTRS_MSG_INFO_REQ 226 * @sessname: Session name chosen by client 227 */ 228struct rtrs_msg_info_req { 229 __le16 type; 230 u8 sessname[NAME_MAX]; 231 u8 reserved[15]; 232}; 233 234/** 235 * struct rtrs_msg_info_rsp 236 * @type: @RTRS_MSG_INFO_RSP 237 * @sg_cnt: Number of @desc entries 238 * @desc: RDMA buffers where the client can write to server 239 */ 240struct rtrs_msg_info_rsp { 241 __le16 type; 242 __le16 sg_cnt; 243 u8 reserved[4]; 244 struct rtrs_sg_desc desc[]; 245}; 246 247/** 248 * struct rtrs_msg_rkey_rsp 249 * @type: @RTRS_MSG_RKEY_RSP 250 * @buf_id: RDMA buf_id of the new rkey 251 * @rkey: new remote key for RDMA buffers id from server 252 */ 253struct rtrs_msg_rkey_rsp { 254 __le16 type; 255 __le16 buf_id; 256 __le32 rkey; 257}; 258 259/** 260 * struct rtrs_msg_rdma_read - RDMA data transfer request from client 261 * @type: always @RTRS_MSG_READ 262 * @usr_len: length of user payload 263 * @sg_cnt: number of @desc entries 264 * @desc: RDMA buffers where the server can write the result to 265 */ 266struct rtrs_msg_rdma_read { 267 __le16 type; 268 __le16 usr_len; 269 __le16 flags; 270 __le16 sg_cnt; 271 struct rtrs_sg_desc desc[]; 272}; 273 274/** 275 * struct_msg_rdma_write - Message transferred to server with RDMA-Write 276 * @type: always @RTRS_MSG_WRITE 277 * @usr_len: length of user payload 278 */ 279struct rtrs_msg_rdma_write { 280 __le16 type; 281 __le16 usr_len; 282}; 283 284/** 285 * struct_msg_rdma_hdr - header for read or write request 286 * @type: @RTRS_MSG_WRITE | @RTRS_MSG_READ 287 */ 288struct rtrs_msg_rdma_hdr { 289 __le16 type; 290}; 291 292/* rtrs.c */ 293 294struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t, 295 struct ib_device *dev, enum dma_data_direction, 296 void (*done)(struct ib_cq *cq, struct ib_wc *wc)); 297void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size); 298int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu); 299int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size, 300 struct ib_send_wr *head); 301int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu, 302 struct ib_sge *sge, unsigned int num_sge, 303 u32 rkey, u64 rdma_addr, u32 imm_data, 304 enum ib_send_flags flags, 305 struct ib_send_wr *head); 306 307int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe); 308int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe, 309 u32 imm_data, enum ib_send_flags flags, 310 struct ib_send_wr *head); 311 312int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con, 313 u32 max_send_sge, int cq_vector, int cq_size, 314 u32 max_send_wr, u32 max_recv_wr, 315 enum ib_poll_context poll_ctx); 316void rtrs_cq_qp_destroy(struct rtrs_con *con); 317 318void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe, 319 unsigned int interval_ms, unsigned int missed_max, 320 void (*err_handler)(struct rtrs_con *con), 321 struct workqueue_struct *wq); 322void rtrs_start_hb(struct rtrs_sess *sess); 323void rtrs_stop_hb(struct rtrs_sess *sess); 324void rtrs_send_hb_ack(struct rtrs_sess *sess); 325 326void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, 327 struct rtrs_rdma_dev_pd *pool); 328void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool); 329 330struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev, 331 struct rtrs_rdma_dev_pd *pool); 332int rtrs_ib_dev_put(struct rtrs_ib_dev *dev); 333 334static inline u32 rtrs_to_imm(u32 type, u32 payload) 335{ 336 BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32); 337 BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS)); 338 return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) | 339 (payload & MAX_IMM_PAYL_MASK); 340} 341 342static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload) 343{ 344 *payload = imm & MAX_IMM_PAYL_MASK; 345 *type = imm >> MAX_IMM_PAYL_BITS; 346} 347 348static inline u32 rtrs_to_io_req_imm(u32 addr) 349{ 350 return rtrs_to_imm(RTRS_IO_REQ_IMM, addr); 351} 352 353static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval) 354{ 355 enum rtrs_imm_type type; 356 u32 payload; 357 358 /* 9 bits for errno, 19 bits for msg_id */ 359 payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff); 360 type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM; 361 362 return rtrs_to_imm(type, payload); 363} 364 365static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno) 366{ 367 /* 9 bits for errno, 19 bits for msg_id */ 368 *msg_id = payload & 0x7ffff; 369 *errno = -(int)((payload >> 19) & 0x1ff); 370} 371 372#define STAT_STORE_FUNC(type, set_value, reset) \ 373static ssize_t set_value##_store(struct kobject *kobj, \ 374 struct kobj_attribute *attr, \ 375 const char *buf, size_t count) \ 376{ \ 377 int ret = -EINVAL; \ 378 type *stats = container_of(kobj, type, kobj_stats); \ 379 \ 380 if (sysfs_streq(buf, "1")) \ 381 ret = reset(stats, true); \ 382 else if (sysfs_streq(buf, "0")) \ 383 ret = reset(stats, false); \ 384 if (ret) \ 385 return ret; \ 386 \ 387 return count; \ 388} 389 390#define STAT_SHOW_FUNC(type, get_value, print) \ 391static ssize_t get_value##_show(struct kobject *kobj, \ 392 struct kobj_attribute *attr, \ 393 char *page) \ 394{ \ 395 type *stats = container_of(kobj, type, kobj_stats); \ 396 \ 397 return print(stats, page, PAGE_SIZE); \ 398} 399 400#define STAT_ATTR(type, stat, print, reset) \ 401STAT_STORE_FUNC(type, stat, reset) \ 402STAT_SHOW_FUNC(type, stat, print) \ 403static struct kobj_attribute stat##_attr = __ATTR_RW(stat) 404 405#endif /* RTRS_PRI_H */ 406