1/* SPDX-License-Identifier: GPL-2.0-or-later */
2/*
3 * RDMA Transport Layer
4 *
5 * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved.
6 * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved.
7 * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved.
8 */
9
10#ifndef RTRS_PRI_H
11#define RTRS_PRI_H
12
13#include <linux/uuid.h>
14#include <rdma/rdma_cm.h>
15#include <rdma/ib_verbs.h>
16#include <rdma/ib.h>
17
18#include "rtrs.h"
19
20#define RTRS_PROTO_VER_MAJOR 2
21#define RTRS_PROTO_VER_MINOR 0
22
23#define RTRS_PROTO_VER_STRING __stringify(RTRS_PROTO_VER_MAJOR) "." \
24			       __stringify(RTRS_PROTO_VER_MINOR)
25
26/*
27 * Max IB immediate data size is 2^28 (MAX_IMM_PAYL_BITS)
28 * and the minimum chunk size is 4096 (2^12).
29 * So the maximum sess_queue_depth is 65536 (2^16) in theory.
30 * But mempool_create, create_qp and ib_post_send fail with
31 * "cannot allocate memory" error if sess_queue_depth is too big.
32 * Therefore the pratical max value of sess_queue_depth is
33 * somewhere between 1 and 65534 and it depends on the system.
34 */
35#define MAX_SESS_QUEUE_DEPTH 65535
36
37enum rtrs_imm_const {
38	MAX_IMM_TYPE_BITS = 4,
39	MAX_IMM_TYPE_MASK = ((1 << MAX_IMM_TYPE_BITS) - 1),
40	MAX_IMM_PAYL_BITS = 28,
41	MAX_IMM_PAYL_MASK = ((1 << MAX_IMM_PAYL_BITS) - 1),
42};
43
44enum rtrs_imm_type {
45	RTRS_IO_REQ_IMM       = 0, /* client to server */
46	RTRS_IO_RSP_IMM       = 1, /* server to client */
47	RTRS_IO_RSP_W_INV_IMM = 2, /* server to client */
48
49	RTRS_HB_MSG_IMM = 8, /* HB: HeartBeat */
50	RTRS_HB_ACK_IMM = 9,
51
52	RTRS_LAST_IMM,
53};
54
55enum {
56	SERVICE_CON_QUEUE_DEPTH = 512,
57
58	MAX_PATHS_NUM = 128,
59
60	MIN_CHUNK_SIZE = 8192,
61
62	RTRS_HB_INTERVAL_MS = 5000,
63	RTRS_HB_MISSED_MAX = 5,
64
65	RTRS_MAGIC = 0x1BBD,
66	RTRS_PROTO_VER = (RTRS_PROTO_VER_MAJOR << 8) | RTRS_PROTO_VER_MINOR,
67};
68
69struct rtrs_ib_dev;
70
71struct rtrs_rdma_dev_pd_ops {
72	struct rtrs_ib_dev *(*alloc)(void);
73	void (*free)(struct rtrs_ib_dev *dev);
74	int (*init)(struct rtrs_ib_dev *dev);
75	void (*deinit)(struct rtrs_ib_dev *dev);
76};
77
78struct rtrs_rdma_dev_pd {
79	struct mutex		mutex;
80	struct list_head	list;
81	enum ib_pd_flags	pd_flags;
82	const struct rtrs_rdma_dev_pd_ops *ops;
83};
84
85struct rtrs_ib_dev {
86	struct ib_device	 *ib_dev;
87	struct ib_pd		 *ib_pd;
88	struct kref		 ref;
89	struct list_head	 entry;
90	struct rtrs_rdma_dev_pd *pool;
91};
92
93struct rtrs_con {
94	struct rtrs_sess	*sess;
95	struct ib_qp		*qp;
96	struct ib_cq		*cq;
97	struct rdma_cm_id	*cm_id;
98	unsigned int		cid;
99};
100
101struct rtrs_sess {
102	struct list_head	entry;
103	struct sockaddr_storage dst_addr;
104	struct sockaddr_storage src_addr;
105	char			sessname[NAME_MAX];
106	uuid_t			uuid;
107	struct rtrs_con	**con;
108	unsigned int		con_num;
109	unsigned int		recon_cnt;
110	struct rtrs_ib_dev	*dev;
111	int			dev_ref;
112	struct ib_cqe		*hb_cqe;
113	void			(*hb_err_handler)(struct rtrs_con *con);
114	struct workqueue_struct *hb_wq;
115	struct delayed_work	hb_dwork;
116	unsigned int		hb_interval_ms;
117	unsigned int		hb_missed_cnt;
118	unsigned int		hb_missed_max;
119};
120
121/* rtrs information unit */
122struct rtrs_iu {
123	struct ib_cqe           cqe;
124	dma_addr_t              dma_addr;
125	void                    *buf;
126	size_t                  size;
127	enum dma_data_direction direction;
128};
129
130/**
131 * enum rtrs_msg_types - RTRS message types, see also rtrs/README
132 * @RTRS_MSG_INFO_REQ:		Client additional info request to the server
133 * @RTRS_MSG_INFO_RSP:		Server additional info response to the client
134 * @RTRS_MSG_WRITE:		Client writes data per RDMA to server
135 * @RTRS_MSG_READ:		Client requests data transfer from server
136 * @RTRS_MSG_RKEY_RSP:		Server refreshed rkey for rbuf
137 */
138enum rtrs_msg_types {
139	RTRS_MSG_INFO_REQ,
140	RTRS_MSG_INFO_RSP,
141	RTRS_MSG_WRITE,
142	RTRS_MSG_READ,
143	RTRS_MSG_RKEY_RSP,
144};
145
146/**
147 * enum rtrs_msg_flags - RTRS message flags.
148 * @RTRS_NEED_INVAL:	Send invalidation in response.
149 * @RTRS_MSG_NEW_RKEY_F: Send refreshed rkey in response.
150 */
151enum rtrs_msg_flags {
152	RTRS_MSG_NEED_INVAL_F = 1 << 0,
153	RTRS_MSG_NEW_RKEY_F = 1 << 1,
154};
155
156/**
157 * struct rtrs_sg_desc - RDMA-Buffer entry description
158 * @addr:	Address of RDMA destination buffer
159 * @key:	Authorization rkey to write to the buffer
160 * @len:	Size of the buffer
161 */
162struct rtrs_sg_desc {
163	__le64			addr;
164	__le32			key;
165	__le32			len;
166};
167
168/**
169 * struct rtrs_msg_conn_req - Client connection request to the server
170 * @magic:	   RTRS magic
171 * @version:	   RTRS protocol version
172 * @cid:	   Current connection id
173 * @cid_num:	   Number of connections per session
174 * @recon_cnt:	   Reconnections counter
175 * @sess_uuid:	   UUID of a session (path)
176 * @paths_uuid:	   UUID of a group of sessions (paths)
177 *
178 * NOTE: max size 56 bytes, see man rdma_connect().
179 */
180struct rtrs_msg_conn_req {
181	/* Is set to 0 by cma.c in case of AF_IB, do not touch that.
182	 * see https://www.spinics.net/lists/linux-rdma/msg22397.html
183	 */
184	u8		__cma_version;
185	/* On sender side that should be set to 0, or cma_save_ip_info()
186	 * extract garbage and will fail.
187	 */
188	u8		__ip_version;
189	__le16		magic;
190	__le16		version;
191	__le16		cid;
192	__le16		cid_num;
193	__le16		recon_cnt;
194	uuid_t		sess_uuid;
195	uuid_t		paths_uuid;
196	u8		first_conn : 1;
197	u8		reserved_bits : 7;
198	u8		reserved[11];
199};
200
201/**
202 * struct rtrs_msg_conn_rsp - Server connection response to the client
203 * @magic:	   RTRS magic
204 * @version:	   RTRS protocol version
205 * @errno:	   If rdma_accept() then 0, if rdma_reject() indicates error
206 * @queue_depth:   max inflight messages (queue-depth) in this session
207 * @max_io_size:   max io size server supports
208 * @max_hdr_size:  max msg header size server supports
209 *
210 * NOTE: size is 56 bytes, max possible is 136 bytes, see man rdma_accept().
211 */
212struct rtrs_msg_conn_rsp {
213	__le16		magic;
214	__le16		version;
215	__le16		errno;
216	__le16		queue_depth;
217	__le32		max_io_size;
218	__le32		max_hdr_size;
219	__le32		flags;
220	u8		reserved[36];
221};
222
223/**
224 * struct rtrs_msg_info_req
225 * @type:		@RTRS_MSG_INFO_REQ
226 * @sessname:		Session name chosen by client
227 */
228struct rtrs_msg_info_req {
229	__le16		type;
230	u8		sessname[NAME_MAX];
231	u8		reserved[15];
232};
233
234/**
235 * struct rtrs_msg_info_rsp
236 * @type:		@RTRS_MSG_INFO_RSP
237 * @sg_cnt:		Number of @desc entries
238 * @desc:		RDMA buffers where the client can write to server
239 */
240struct rtrs_msg_info_rsp {
241	__le16		type;
242	__le16          sg_cnt;
243	u8              reserved[4];
244	struct rtrs_sg_desc desc[];
245};
246
247/**
248 * struct rtrs_msg_rkey_rsp
249 * @type:		@RTRS_MSG_RKEY_RSP
250 * @buf_id:		RDMA buf_id of the new rkey
251 * @rkey:		new remote key for RDMA buffers id from server
252 */
253struct rtrs_msg_rkey_rsp {
254	__le16		type;
255	__le16          buf_id;
256	__le32		rkey;
257};
258
259/**
260 * struct rtrs_msg_rdma_read - RDMA data transfer request from client
261 * @type:		always @RTRS_MSG_READ
262 * @usr_len:		length of user payload
263 * @sg_cnt:		number of @desc entries
264 * @desc:		RDMA buffers where the server can write the result to
265 */
266struct rtrs_msg_rdma_read {
267	__le16			type;
268	__le16			usr_len;
269	__le16			flags;
270	__le16			sg_cnt;
271	struct rtrs_sg_desc    desc[];
272};
273
274/**
275 * struct_msg_rdma_write - Message transferred to server with RDMA-Write
276 * @type:		always @RTRS_MSG_WRITE
277 * @usr_len:		length of user payload
278 */
279struct rtrs_msg_rdma_write {
280	__le16			type;
281	__le16			usr_len;
282};
283
284/**
285 * struct_msg_rdma_hdr - header for read or write request
286 * @type:		@RTRS_MSG_WRITE | @RTRS_MSG_READ
287 */
288struct rtrs_msg_rdma_hdr {
289	__le16			type;
290};
291
292/* rtrs.c */
293
294struct rtrs_iu *rtrs_iu_alloc(u32 queue_size, size_t size, gfp_t t,
295			      struct ib_device *dev, enum dma_data_direction,
296			      void (*done)(struct ib_cq *cq, struct ib_wc *wc));
297void rtrs_iu_free(struct rtrs_iu *iu, struct ib_device *dev, u32 queue_size);
298int rtrs_iu_post_recv(struct rtrs_con *con, struct rtrs_iu *iu);
299int rtrs_iu_post_send(struct rtrs_con *con, struct rtrs_iu *iu, size_t size,
300		      struct ib_send_wr *head);
301int rtrs_iu_post_rdma_write_imm(struct rtrs_con *con, struct rtrs_iu *iu,
302				struct ib_sge *sge, unsigned int num_sge,
303				u32 rkey, u64 rdma_addr, u32 imm_data,
304				enum ib_send_flags flags,
305				struct ib_send_wr *head);
306
307int rtrs_post_recv_empty(struct rtrs_con *con, struct ib_cqe *cqe);
308int rtrs_post_rdma_write_imm_empty(struct rtrs_con *con, struct ib_cqe *cqe,
309				   u32 imm_data, enum ib_send_flags flags,
310				   struct ib_send_wr *head);
311
312int rtrs_cq_qp_create(struct rtrs_sess *rtrs_sess, struct rtrs_con *con,
313		      u32 max_send_sge, int cq_vector, int cq_size,
314		      u32 max_send_wr, u32 max_recv_wr,
315		      enum ib_poll_context poll_ctx);
316void rtrs_cq_qp_destroy(struct rtrs_con *con);
317
318void rtrs_init_hb(struct rtrs_sess *sess, struct ib_cqe *cqe,
319		  unsigned int interval_ms, unsigned int missed_max,
320		  void (*err_handler)(struct rtrs_con *con),
321		  struct workqueue_struct *wq);
322void rtrs_start_hb(struct rtrs_sess *sess);
323void rtrs_stop_hb(struct rtrs_sess *sess);
324void rtrs_send_hb_ack(struct rtrs_sess *sess);
325
326void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
327			   struct rtrs_rdma_dev_pd *pool);
328void rtrs_rdma_dev_pd_deinit(struct rtrs_rdma_dev_pd *pool);
329
330struct rtrs_ib_dev *rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
331					    struct rtrs_rdma_dev_pd *pool);
332int rtrs_ib_dev_put(struct rtrs_ib_dev *dev);
333
334static inline u32 rtrs_to_imm(u32 type, u32 payload)
335{
336	BUILD_BUG_ON(MAX_IMM_PAYL_BITS + MAX_IMM_TYPE_BITS != 32);
337	BUILD_BUG_ON(RTRS_LAST_IMM > (1<<MAX_IMM_TYPE_BITS));
338	return ((type & MAX_IMM_TYPE_MASK) << MAX_IMM_PAYL_BITS) |
339		(payload & MAX_IMM_PAYL_MASK);
340}
341
342static inline void rtrs_from_imm(u32 imm, u32 *type, u32 *payload)
343{
344	*payload = imm & MAX_IMM_PAYL_MASK;
345	*type = imm >> MAX_IMM_PAYL_BITS;
346}
347
348static inline u32 rtrs_to_io_req_imm(u32 addr)
349{
350	return rtrs_to_imm(RTRS_IO_REQ_IMM, addr);
351}
352
353static inline u32 rtrs_to_io_rsp_imm(u32 msg_id, int errno, bool w_inval)
354{
355	enum rtrs_imm_type type;
356	u32 payload;
357
358	/* 9 bits for errno, 19 bits for msg_id */
359	payload = (abs(errno) & 0x1ff) << 19 | (msg_id & 0x7ffff);
360	type = w_inval ? RTRS_IO_RSP_W_INV_IMM : RTRS_IO_RSP_IMM;
361
362	return rtrs_to_imm(type, payload);
363}
364
365static inline void rtrs_from_io_rsp_imm(u32 payload, u32 *msg_id, int *errno)
366{
367	/* 9 bits for errno, 19 bits for msg_id */
368	*msg_id = payload & 0x7ffff;
369	*errno = -(int)((payload >> 19) & 0x1ff);
370}
371
372#define STAT_STORE_FUNC(type, set_value, reset)				\
373static ssize_t set_value##_store(struct kobject *kobj,			\
374			     struct kobj_attribute *attr,		\
375			     const char *buf, size_t count)		\
376{									\
377	int ret = -EINVAL;						\
378	type *stats = container_of(kobj, type, kobj_stats);		\
379									\
380	if (sysfs_streq(buf, "1"))					\
381		ret = reset(stats, true);			\
382	else if (sysfs_streq(buf, "0"))					\
383		ret = reset(stats, false);			\
384	if (ret)							\
385		return ret;						\
386									\
387	return count;							\
388}
389
390#define STAT_SHOW_FUNC(type, get_value, print)				\
391static ssize_t get_value##_show(struct kobject *kobj,			\
392			   struct kobj_attribute *attr,			\
393			   char *page)					\
394{									\
395	type *stats = container_of(kobj, type, kobj_stats);		\
396									\
397	return print(stats, page, PAGE_SIZE);			\
398}
399
400#define STAT_ATTR(type, stat, print, reset)				\
401STAT_STORE_FUNC(type, stat, reset)					\
402STAT_SHOW_FUNC(type, stat, print)					\
403static struct kobj_attribute stat##_attr = __ATTR_RW(stat)
404
405#endif /* RTRS_PRI_H */
406