1/*
2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6 *
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 * 3. Neither the names of the copyright holders nor the names of its
13 *    contributors may be used to endorse or promote products derived from
14 *    this software without specific prior written permission.
15 *
16 * Alternatively, this software may be distributed under the terms of the
17 * GNU General Public License ("GPL") version 2 as published by the Free
18 * Software Foundation.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#include <linux/module.h>
34#include <linux/pid.h>
35#include <linux/pid_namespace.h>
36#include <linux/mutex.h>
37#include <net/netlink.h>
38#include <rdma/rdma_cm.h>
39#include <rdma/rdma_netlink.h>
40
41#include "core_priv.h"
42#include "cma_priv.h"
43#include "restrack.h"
44#include "uverbs.h"
45
46typedef int (*res_fill_func_t)(struct sk_buff*, bool,
47			       struct rdma_restrack_entry*, uint32_t);
48
49/*
50 * Sort array elements by the netlink attribute name
51 */
52static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
53	[RDMA_NLDEV_ATTR_CHARDEV]		= { .type = NLA_U64 },
54	[RDMA_NLDEV_ATTR_CHARDEV_ABI]		= { .type = NLA_U64 },
55	[RDMA_NLDEV_ATTR_CHARDEV_NAME]		= { .type = NLA_NUL_STRING,
56					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
57	[RDMA_NLDEV_ATTR_CHARDEV_TYPE]		= { .type = NLA_NUL_STRING,
58					.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
59	[RDMA_NLDEV_ATTR_DEV_DIM]               = { .type = NLA_U8 },
60	[RDMA_NLDEV_ATTR_DEV_INDEX]		= { .type = NLA_U32 },
61	[RDMA_NLDEV_ATTR_DEV_NAME]		= { .type = NLA_NUL_STRING,
62					.len = IB_DEVICE_NAME_MAX },
63	[RDMA_NLDEV_ATTR_DEV_NODE_TYPE]		= { .type = NLA_U8 },
64	[RDMA_NLDEV_ATTR_DEV_PROTOCOL]		= { .type = NLA_NUL_STRING,
65					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
66	[RDMA_NLDEV_ATTR_DRIVER]		= { .type = NLA_NESTED },
67	[RDMA_NLDEV_ATTR_DRIVER_ENTRY]		= { .type = NLA_NESTED },
68	[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE]	= { .type = NLA_U8 },
69	[RDMA_NLDEV_ATTR_DRIVER_STRING]		= { .type = NLA_NUL_STRING,
70					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
71	[RDMA_NLDEV_ATTR_DRIVER_S32]		= { .type = NLA_S32 },
72	[RDMA_NLDEV_ATTR_DRIVER_S64]		= { .type = NLA_S64 },
73	[RDMA_NLDEV_ATTR_DRIVER_U32]		= { .type = NLA_U32 },
74	[RDMA_NLDEV_ATTR_DRIVER_U64]		= { .type = NLA_U64 },
75	[RDMA_NLDEV_ATTR_FW_VERSION]		= { .type = NLA_NUL_STRING,
76					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
77	[RDMA_NLDEV_ATTR_LID]			= { .type = NLA_U32 },
78	[RDMA_NLDEV_ATTR_LINK_TYPE]		= { .type = NLA_NUL_STRING,
79					.len = IFNAMSIZ },
80	[RDMA_NLDEV_ATTR_LMC]			= { .type = NLA_U8 },
81	[RDMA_NLDEV_ATTR_NDEV_INDEX]		= { .type = NLA_U32 },
82	[RDMA_NLDEV_ATTR_NDEV_NAME]		= { .type = NLA_NUL_STRING,
83					.len = IFNAMSIZ },
84	[RDMA_NLDEV_ATTR_NODE_GUID]		= { .type = NLA_U64 },
85	[RDMA_NLDEV_ATTR_PORT_INDEX]		= { .type = NLA_U32 },
86	[RDMA_NLDEV_ATTR_PORT_PHYS_STATE]	= { .type = NLA_U8 },
87	[RDMA_NLDEV_ATTR_PORT_STATE]		= { .type = NLA_U8 },
88	[RDMA_NLDEV_ATTR_RES_CM_ID]		= { .type = NLA_NESTED },
89	[RDMA_NLDEV_ATTR_RES_CM_IDN]		= { .type = NLA_U32 },
90	[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY]	= { .type = NLA_NESTED },
91	[RDMA_NLDEV_ATTR_RES_CQ]		= { .type = NLA_NESTED },
92	[RDMA_NLDEV_ATTR_RES_CQE]		= { .type = NLA_U32 },
93	[RDMA_NLDEV_ATTR_RES_CQN]		= { .type = NLA_U32 },
94	[RDMA_NLDEV_ATTR_RES_CQ_ENTRY]		= { .type = NLA_NESTED },
95	[RDMA_NLDEV_ATTR_RES_CTXN]		= { .type = NLA_U32 },
96	[RDMA_NLDEV_ATTR_RES_DST_ADDR]		= {
97			.len = sizeof(struct __kernel_sockaddr_storage) },
98	[RDMA_NLDEV_ATTR_RES_IOVA]		= { .type = NLA_U64 },
99	[RDMA_NLDEV_ATTR_RES_KERN_NAME]		= { .type = NLA_NUL_STRING,
100					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
101	[RDMA_NLDEV_ATTR_RES_LKEY]		= { .type = NLA_U32 },
102	[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY]	= { .type = NLA_U32 },
103	[RDMA_NLDEV_ATTR_RES_LQPN]		= { .type = NLA_U32 },
104	[RDMA_NLDEV_ATTR_RES_MR]		= { .type = NLA_NESTED },
105	[RDMA_NLDEV_ATTR_RES_MRLEN]		= { .type = NLA_U64 },
106	[RDMA_NLDEV_ATTR_RES_MRN]		= { .type = NLA_U32 },
107	[RDMA_NLDEV_ATTR_RES_MR_ENTRY]		= { .type = NLA_NESTED },
108	[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE]	= { .type = NLA_U8 },
109	[RDMA_NLDEV_ATTR_RES_PD]		= { .type = NLA_NESTED },
110	[RDMA_NLDEV_ATTR_RES_PDN]		= { .type = NLA_U32 },
111	[RDMA_NLDEV_ATTR_RES_PD_ENTRY]		= { .type = NLA_NESTED },
112	[RDMA_NLDEV_ATTR_RES_PID]		= { .type = NLA_U32 },
113	[RDMA_NLDEV_ATTR_RES_POLL_CTX]		= { .type = NLA_U8 },
114	[RDMA_NLDEV_ATTR_RES_PS]		= { .type = NLA_U32 },
115	[RDMA_NLDEV_ATTR_RES_QP]		= { .type = NLA_NESTED },
116	[RDMA_NLDEV_ATTR_RES_QP_ENTRY]		= { .type = NLA_NESTED },
117	[RDMA_NLDEV_ATTR_RES_RAW]		= { .type = NLA_BINARY },
118	[RDMA_NLDEV_ATTR_RES_RKEY]		= { .type = NLA_U32 },
119	[RDMA_NLDEV_ATTR_RES_RQPN]		= { .type = NLA_U32 },
120	[RDMA_NLDEV_ATTR_RES_RQ_PSN]		= { .type = NLA_U32 },
121	[RDMA_NLDEV_ATTR_RES_SQ_PSN]		= { .type = NLA_U32 },
122	[RDMA_NLDEV_ATTR_RES_SRC_ADDR]		= {
123			.len = sizeof(struct __kernel_sockaddr_storage) },
124	[RDMA_NLDEV_ATTR_RES_STATE]		= { .type = NLA_U8 },
125	[RDMA_NLDEV_ATTR_RES_SUMMARY]		= { .type = NLA_NESTED },
126	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY]	= { .type = NLA_NESTED },
127	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
128	[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
129					.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
130	[RDMA_NLDEV_ATTR_RES_TYPE]		= { .type = NLA_U8 },
131	[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
132	[RDMA_NLDEV_ATTR_RES_USECNT]		= { .type = NLA_U64 },
133	[RDMA_NLDEV_ATTR_SM_LID]		= { .type = NLA_U32 },
134	[RDMA_NLDEV_ATTR_SUBNET_PREFIX]		= { .type = NLA_U64 },
135	[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]	= { .type = NLA_U32 },
136	[RDMA_NLDEV_ATTR_STAT_MODE]		= { .type = NLA_U32 },
137	[RDMA_NLDEV_ATTR_STAT_RES]		= { .type = NLA_U32 },
138	[RDMA_NLDEV_ATTR_STAT_COUNTER]		= { .type = NLA_NESTED },
139	[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY]	= { .type = NLA_NESTED },
140	[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]       = { .type = NLA_U32 },
141	[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]       = { .type = NLA_NESTED },
142	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY]  = { .type = NLA_NESTED },
143	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
144	[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
145	[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID]	= { .type = NLA_U64 },
146	[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID]	= { .type = NLA_U32 },
147	[RDMA_NLDEV_NET_NS_FD]			= { .type = NLA_U32 },
148	[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]	= { .type = NLA_U8 },
149};
150
151static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
152				      enum rdma_nldev_print_type print_type)
153{
154	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
155		return -EMSGSIZE;
156	if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
157	    nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
158		return -EMSGSIZE;
159
160	return 0;
161}
162
163static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
164				   enum rdma_nldev_print_type print_type,
165				   u32 value)
166{
167	if (put_driver_name_print_type(msg, name, print_type))
168		return -EMSGSIZE;
169	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
170		return -EMSGSIZE;
171
172	return 0;
173}
174
175static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
176				   enum rdma_nldev_print_type print_type,
177				   u64 value)
178{
179	if (put_driver_name_print_type(msg, name, print_type))
180		return -EMSGSIZE;
181	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
182			      RDMA_NLDEV_ATTR_PAD))
183		return -EMSGSIZE;
184
185	return 0;
186}
187
188int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
189			      const char *str)
190{
191	if (put_driver_name_print_type(msg, name,
192				       RDMA_NLDEV_PRINT_TYPE_UNSPEC))
193		return -EMSGSIZE;
194	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
195		return -EMSGSIZE;
196
197	return 0;
198}
199EXPORT_SYMBOL(rdma_nl_put_driver_string);
200
201int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
202{
203	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
204				       value);
205}
206EXPORT_SYMBOL(rdma_nl_put_driver_u32);
207
208int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
209			       u32 value)
210{
211	return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
212				       value);
213}
214EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
215
216int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
217{
218	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
219				       value);
220}
221EXPORT_SYMBOL(rdma_nl_put_driver_u64);
222
223int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
224{
225	return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
226				       value);
227}
228EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
229
230static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
231{
232	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
233		return -EMSGSIZE;
234	if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
235			   dev_name(&device->dev)))
236		return -EMSGSIZE;
237
238	return 0;
239}
240
241static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
242{
243	char fw[IB_FW_VERSION_NAME_MAX];
244	int ret = 0;
245	u8 port;
246
247	if (fill_nldev_handle(msg, device))
248		return -EMSGSIZE;
249
250	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
251		return -EMSGSIZE;
252
253	BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
254	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
255			      device->attrs.device_cap_flags,
256			      RDMA_NLDEV_ATTR_PAD))
257		return -EMSGSIZE;
258
259	ib_get_device_fw_str(device, fw);
260	/* Device without FW has strlen(fw) = 0 */
261	if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
262		return -EMSGSIZE;
263
264	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
265			      be64_to_cpu(device->node_guid),
266			      RDMA_NLDEV_ATTR_PAD))
267		return -EMSGSIZE;
268	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
269			      be64_to_cpu(device->attrs.sys_image_guid),
270			      RDMA_NLDEV_ATTR_PAD))
271		return -EMSGSIZE;
272	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
273		return -EMSGSIZE;
274	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
275		return -EMSGSIZE;
276
277	/*
278	 * Link type is determined on first port and mlx4 device
279	 * which can potentially have two different link type for the same
280	 * IB device is considered as better to be avoided in the future,
281	 */
282	port = rdma_start_port(device);
283	if (rdma_cap_opa_mad(device, port))
284		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
285	else if (rdma_protocol_ib(device, port))
286		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
287	else if (rdma_protocol_iwarp(device, port))
288		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
289	else if (rdma_protocol_roce(device, port))
290		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
291	else if (rdma_protocol_usnic(device, port))
292		ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
293				     "usnic");
294	return ret;
295}
296
297static int fill_port_info(struct sk_buff *msg,
298			  struct ib_device *device, u32 port,
299			  const struct net *net)
300{
301	struct net_device *netdev = NULL;
302	struct ib_port_attr attr;
303	int ret;
304	u64 cap_flags = 0;
305
306	if (fill_nldev_handle(msg, device))
307		return -EMSGSIZE;
308
309	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
310		return -EMSGSIZE;
311
312	ret = ib_query_port(device, port, &attr);
313	if (ret)
314		return ret;
315
316	if (rdma_protocol_ib(device, port)) {
317		BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
318				sizeof(attr.port_cap_flags2)) > sizeof(u64));
319		cap_flags = attr.port_cap_flags |
320			((u64)attr.port_cap_flags2 << 32);
321		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
322				      cap_flags, RDMA_NLDEV_ATTR_PAD))
323			return -EMSGSIZE;
324		if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
325				      attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
326			return -EMSGSIZE;
327		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
328			return -EMSGSIZE;
329		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
330			return -EMSGSIZE;
331		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
332			return -EMSGSIZE;
333	}
334	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
335		return -EMSGSIZE;
336	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
337		return -EMSGSIZE;
338
339	netdev = ib_device_get_netdev(device, port);
340	if (netdev && net_eq(dev_net(netdev), net)) {
341		ret = nla_put_u32(msg,
342				  RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
343		if (ret)
344			goto out;
345		ret = nla_put_string(msg,
346				     RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
347	}
348
349out:
350	if (netdev)
351		dev_put(netdev);
352	return ret;
353}
354
355static int fill_res_info_entry(struct sk_buff *msg,
356			       const char *name, u64 curr)
357{
358	struct nlattr *entry_attr;
359
360	entry_attr = nla_nest_start_noflag(msg,
361					   RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
362	if (!entry_attr)
363		return -EMSGSIZE;
364
365	if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
366		goto err;
367	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
368			      RDMA_NLDEV_ATTR_PAD))
369		goto err;
370
371	nla_nest_end(msg, entry_attr);
372	return 0;
373
374err:
375	nla_nest_cancel(msg, entry_attr);
376	return -EMSGSIZE;
377}
378
379static int fill_res_info(struct sk_buff *msg, struct ib_device *device)
380{
381	static const char * const names[RDMA_RESTRACK_MAX] = {
382		[RDMA_RESTRACK_PD] = "pd",
383		[RDMA_RESTRACK_CQ] = "cq",
384		[RDMA_RESTRACK_QP] = "qp",
385		[RDMA_RESTRACK_CM_ID] = "cm_id",
386		[RDMA_RESTRACK_MR] = "mr",
387		[RDMA_RESTRACK_CTX] = "ctx",
388	};
389
390	struct nlattr *table_attr;
391	int ret, i, curr;
392
393	if (fill_nldev_handle(msg, device))
394		return -EMSGSIZE;
395
396	table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
397	if (!table_attr)
398		return -EMSGSIZE;
399
400	for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
401		if (!names[i])
402			continue;
403		curr = rdma_restrack_count(device, i);
404		ret = fill_res_info_entry(msg, names[i], curr);
405		if (ret)
406			goto err;
407	}
408
409	nla_nest_end(msg, table_attr);
410	return 0;
411
412err:
413	nla_nest_cancel(msg, table_attr);
414	return ret;
415}
416
417static int fill_res_name_pid(struct sk_buff *msg,
418			     struct rdma_restrack_entry *res)
419{
420	int err = 0;
421
422	/*
423	 * For user resources, user is should read /proc/PID/comm to get the
424	 * name of the task file.
425	 */
426	if (rdma_is_kernel_res(res)) {
427		err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
428				     res->kern_name);
429	} else {
430		pid_t pid;
431
432		pid = task_pid_vnr(res->task);
433		/*
434		 * Task is dead and in zombie state.
435		 * There is no need to print PID anymore.
436		 */
437		if (pid)
438			/*
439			 * This part is racy, task can be killed and PID will
440			 * be zero right here but it is ok, next query won't
441			 * return PID. We don't promise real-time reflection
442			 * of SW objects.
443			 */
444			err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
445	}
446
447	return err ? -EMSGSIZE : 0;
448}
449
450static int fill_res_qp_entry_query(struct sk_buff *msg,
451				   struct rdma_restrack_entry *res,
452				   struct ib_device *dev,
453				   struct ib_qp *qp)
454{
455	struct ib_qp_init_attr qp_init_attr;
456	struct ib_qp_attr qp_attr;
457	int ret;
458
459	ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
460	if (ret)
461		return ret;
462
463	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
464		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
465				qp_attr.dest_qp_num))
466			goto err;
467		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
468				qp_attr.rq_psn))
469			goto err;
470	}
471
472	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
473		goto err;
474
475	if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
476	    qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
477		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
478			       qp_attr.path_mig_state))
479			goto err;
480	}
481	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
482		goto err;
483	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
484		goto err;
485
486	if (dev->ops.fill_res_qp_entry)
487		return dev->ops.fill_res_qp_entry(msg, qp);
488	return 0;
489
490err:	return -EMSGSIZE;
491}
492
493static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
494			     struct rdma_restrack_entry *res, uint32_t port)
495{
496	struct ib_qp *qp = container_of(res, struct ib_qp, res);
497	struct ib_device *dev = qp->device;
498	int ret;
499
500	if (port && port != qp->port)
501		return -EAGAIN;
502
503	/* In create_qp() port is not set yet */
504	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
505		return -EMSGSIZE;
506
507	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
508	if (ret)
509		return -EMSGSIZE;
510
511	if (!rdma_is_kernel_res(res) &&
512	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
513		return -EMSGSIZE;
514
515	ret = fill_res_name_pid(msg, res);
516	if (ret)
517		return -EMSGSIZE;
518
519	return fill_res_qp_entry_query(msg, res, dev, qp);
520}
521
522static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
523				 struct rdma_restrack_entry *res, uint32_t port)
524{
525	struct ib_qp *qp = container_of(res, struct ib_qp, res);
526	struct ib_device *dev = qp->device;
527
528	if (port && port != qp->port)
529		return -EAGAIN;
530	if (!dev->ops.fill_res_qp_entry_raw)
531		return -EINVAL;
532	return dev->ops.fill_res_qp_entry_raw(msg, qp);
533}
534
535static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
536				struct rdma_restrack_entry *res, uint32_t port)
537{
538	struct rdma_id_private *id_priv =
539				container_of(res, struct rdma_id_private, res);
540	struct ib_device *dev = id_priv->id.device;
541	struct rdma_cm_id *cm_id = &id_priv->id;
542
543	if (port && port != cm_id->port_num)
544		return -EAGAIN;
545
546	if (cm_id->port_num &&
547	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
548		goto err;
549
550	if (id_priv->qp_num) {
551		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
552			goto err;
553		if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
554			goto err;
555	}
556
557	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
558		goto err;
559
560	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
561		goto err;
562
563	if (cm_id->route.addr.src_addr.ss_family &&
564	    nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
565		    sizeof(cm_id->route.addr.src_addr),
566		    &cm_id->route.addr.src_addr))
567		goto err;
568	if (cm_id->route.addr.dst_addr.ss_family &&
569	    nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
570		    sizeof(cm_id->route.addr.dst_addr),
571		    &cm_id->route.addr.dst_addr))
572		goto err;
573
574	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
575		goto err;
576
577	if (fill_res_name_pid(msg, res))
578		goto err;
579
580	if (dev->ops.fill_res_cm_id_entry)
581		return dev->ops.fill_res_cm_id_entry(msg, cm_id);
582	return 0;
583
584err: return -EMSGSIZE;
585}
586
587static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
588			     struct rdma_restrack_entry *res, uint32_t port)
589{
590	struct ib_cq *cq = container_of(res, struct ib_cq, res);
591	struct ib_device *dev = cq->device;
592
593	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
594		return -EMSGSIZE;
595	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
596			      atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
597		return -EMSGSIZE;
598
599	/* Poll context is only valid for kernel CQs */
600	if (rdma_is_kernel_res(res) &&
601	    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
602		return -EMSGSIZE;
603
604	if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
605		return -EMSGSIZE;
606
607	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
608		return -EMSGSIZE;
609	if (!rdma_is_kernel_res(res) &&
610	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
611			cq->uobject->uevent.uobject.context->res.id))
612		return -EMSGSIZE;
613
614	if (fill_res_name_pid(msg, res))
615		return -EMSGSIZE;
616
617	return (dev->ops.fill_res_cq_entry) ?
618		dev->ops.fill_res_cq_entry(msg, cq) : 0;
619}
620
621static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
622				 struct rdma_restrack_entry *res, uint32_t port)
623{
624	struct ib_cq *cq = container_of(res, struct ib_cq, res);
625	struct ib_device *dev = cq->device;
626
627	if (!dev->ops.fill_res_cq_entry_raw)
628		return -EINVAL;
629	return dev->ops.fill_res_cq_entry_raw(msg, cq);
630}
631
632static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
633			     struct rdma_restrack_entry *res, uint32_t port)
634{
635	struct ib_mr *mr = container_of(res, struct ib_mr, res);
636	struct ib_device *dev = mr->pd->device;
637
638	if (has_cap_net_admin) {
639		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
640			return -EMSGSIZE;
641		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
642			return -EMSGSIZE;
643	}
644
645	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
646			      RDMA_NLDEV_ATTR_PAD))
647		return -EMSGSIZE;
648
649	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
650		return -EMSGSIZE;
651
652	if (!rdma_is_kernel_res(res) &&
653	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
654		return -EMSGSIZE;
655
656	if (fill_res_name_pid(msg, res))
657		return -EMSGSIZE;
658
659	return (dev->ops.fill_res_mr_entry) ?
660		       dev->ops.fill_res_mr_entry(msg, mr) :
661		       0;
662}
663
664static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
665				 struct rdma_restrack_entry *res, uint32_t port)
666{
667	struct ib_mr *mr = container_of(res, struct ib_mr, res);
668	struct ib_device *dev = mr->pd->device;
669
670	if (!dev->ops.fill_res_mr_entry_raw)
671		return -EINVAL;
672	return dev->ops.fill_res_mr_entry_raw(msg, mr);
673}
674
675static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
676			     struct rdma_restrack_entry *res, uint32_t port)
677{
678	struct ib_pd *pd = container_of(res, struct ib_pd, res);
679
680	if (has_cap_net_admin) {
681		if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
682				pd->local_dma_lkey))
683			goto err;
684		if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
685		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
686				pd->unsafe_global_rkey))
687			goto err;
688	}
689	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
690			      atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
691		goto err;
692
693	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
694		goto err;
695
696	if (!rdma_is_kernel_res(res) &&
697	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
698			pd->uobject->context->res.id))
699		goto err;
700
701	return fill_res_name_pid(msg, res);
702
703err:	return -EMSGSIZE;
704}
705
706static int fill_stat_counter_mode(struct sk_buff *msg,
707				  struct rdma_counter *counter)
708{
709	struct rdma_counter_mode *m = &counter->mode;
710
711	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
712		return -EMSGSIZE;
713
714	if (m->mode == RDMA_COUNTER_MODE_AUTO) {
715		if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
716		    nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
717			return -EMSGSIZE;
718
719		if ((m->mask & RDMA_COUNTER_MASK_PID) &&
720		    fill_res_name_pid(msg, &counter->res))
721			return -EMSGSIZE;
722	}
723
724	return 0;
725}
726
727static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
728{
729	struct nlattr *entry_attr;
730
731	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
732	if (!entry_attr)
733		return -EMSGSIZE;
734
735	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
736		goto err;
737
738	nla_nest_end(msg, entry_attr);
739	return 0;
740
741err:
742	nla_nest_cancel(msg, entry_attr);
743	return -EMSGSIZE;
744}
745
746static int fill_stat_counter_qps(struct sk_buff *msg,
747				 struct rdma_counter *counter)
748{
749	struct rdma_restrack_entry *res;
750	struct rdma_restrack_root *rt;
751	struct nlattr *table_attr;
752	struct ib_qp *qp = NULL;
753	unsigned long id = 0;
754	int ret = 0;
755
756	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
757	if (!table_attr)
758		return -EMSGSIZE;
759
760	rt = &counter->device->res[RDMA_RESTRACK_QP];
761	xa_lock(&rt->xa);
762	xa_for_each(&rt->xa, id, res) {
763		qp = container_of(res, struct ib_qp, res);
764		if (!qp->counter || (qp->counter->id != counter->id))
765			continue;
766
767		ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
768		if (ret)
769			goto err;
770	}
771
772	xa_unlock(&rt->xa);
773	nla_nest_end(msg, table_attr);
774	return 0;
775
776err:
777	xa_unlock(&rt->xa);
778	nla_nest_cancel(msg, table_attr);
779	return ret;
780}
781
782int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
783				 u64 value)
784{
785	struct nlattr *entry_attr;
786
787	entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
788	if (!entry_attr)
789		return -EMSGSIZE;
790
791	if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
792			   name))
793		goto err;
794	if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
795			      value, RDMA_NLDEV_ATTR_PAD))
796		goto err;
797
798	nla_nest_end(msg, entry_attr);
799	return 0;
800
801err:
802	nla_nest_cancel(msg, entry_attr);
803	return -EMSGSIZE;
804}
805EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
806
807static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
808			      struct rdma_restrack_entry *res, uint32_t port)
809{
810	struct ib_mr *mr = container_of(res, struct ib_mr, res);
811	struct ib_device *dev = mr->pd->device;
812
813	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
814		goto err;
815
816	if (dev->ops.fill_stat_mr_entry)
817		return dev->ops.fill_stat_mr_entry(msg, mr);
818	return 0;
819
820err:
821	return -EMSGSIZE;
822}
823
824static int fill_stat_counter_hwcounters(struct sk_buff *msg,
825					struct rdma_counter *counter)
826{
827	struct rdma_hw_stats *st = counter->stats;
828	struct nlattr *table_attr;
829	int i;
830
831	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
832	if (!table_attr)
833		return -EMSGSIZE;
834
835	for (i = 0; i < st->num_counters; i++)
836		if (rdma_nl_stat_hwcounter_entry(msg, st->names[i], st->value[i]))
837			goto err;
838
839	nla_nest_end(msg, table_attr);
840	return 0;
841
842err:
843	nla_nest_cancel(msg, table_attr);
844	return -EMSGSIZE;
845}
846
847static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
848				  struct rdma_restrack_entry *res,
849				  uint32_t port)
850{
851	struct rdma_counter *counter =
852		container_of(res, struct rdma_counter, res);
853
854	if (port && port != counter->port)
855		return -EAGAIN;
856
857	/* Dump it even query failed */
858	rdma_counter_query_stats(counter);
859
860	if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
861	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
862	    fill_stat_counter_mode(msg, counter) ||
863	    fill_stat_counter_qps(msg, counter) ||
864	    fill_stat_counter_hwcounters(msg, counter))
865		return -EMSGSIZE;
866
867	return 0;
868}
869
870static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
871			  struct netlink_ext_ack *extack)
872{
873	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
874	struct ib_device *device;
875	struct sk_buff *msg;
876	u32 index;
877	int err;
878
879	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
880				     nldev_policy, extack);
881	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
882		return -EINVAL;
883
884	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
885
886	device = ib_device_get_by_index(sock_net(skb->sk), index);
887	if (!device)
888		return -EINVAL;
889
890	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
891	if (!msg) {
892		err = -ENOMEM;
893		goto err;
894	}
895
896	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
897			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
898			0, 0);
899
900	err = fill_dev_info(msg, device);
901	if (err)
902		goto err_free;
903
904	nlmsg_end(msg, nlh);
905
906	ib_device_put(device);
907	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
908
909err_free:
910	nlmsg_free(msg);
911err:
912	ib_device_put(device);
913	return err;
914}
915
916static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
917			  struct netlink_ext_ack *extack)
918{
919	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
920	struct ib_device *device;
921	u32 index;
922	int err;
923
924	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
925				     nldev_policy, extack);
926	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
927		return -EINVAL;
928
929	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
930	device = ib_device_get_by_index(sock_net(skb->sk), index);
931	if (!device)
932		return -EINVAL;
933
934	if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
935		char name[IB_DEVICE_NAME_MAX] = {};
936
937		nla_strlcpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
938			    IB_DEVICE_NAME_MAX);
939		if (strlen(name) == 0) {
940			err = -EINVAL;
941			goto done;
942		}
943		err = ib_device_rename(device, name);
944		goto done;
945	}
946
947	if (tb[RDMA_NLDEV_NET_NS_FD]) {
948		u32 ns_fd;
949
950		ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
951		err = ib_device_set_netns_put(skb, device, ns_fd);
952		goto put_done;
953	}
954
955	if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
956		u8 use_dim;
957
958		use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
959		err = ib_device_set_dim(device,  use_dim);
960		goto done;
961	}
962
963done:
964	ib_device_put(device);
965put_done:
966	return err;
967}
968
969static int _nldev_get_dumpit(struct ib_device *device,
970			     struct sk_buff *skb,
971			     struct netlink_callback *cb,
972			     unsigned int idx)
973{
974	int start = cb->args[0];
975	struct nlmsghdr *nlh;
976
977	if (idx < start)
978		return 0;
979
980	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
981			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
982			0, NLM_F_MULTI);
983
984	if (fill_dev_info(skb, device)) {
985		nlmsg_cancel(skb, nlh);
986		goto out;
987	}
988
989	nlmsg_end(skb, nlh);
990
991	idx++;
992
993out:	cb->args[0] = idx;
994	return skb->len;
995}
996
997static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
998{
999	/*
1000	 * There is no need to take lock, because
1001	 * we are relying on ib_core's locking.
1002	 */
1003	return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
1004}
1005
1006static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1007			       struct netlink_ext_ack *extack)
1008{
1009	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1010	struct ib_device *device;
1011	struct sk_buff *msg;
1012	u32 index;
1013	u32 port;
1014	int err;
1015
1016	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1017				     nldev_policy, extack);
1018	if (err ||
1019	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1020	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1021		return -EINVAL;
1022
1023	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1024	device = ib_device_get_by_index(sock_net(skb->sk), index);
1025	if (!device)
1026		return -EINVAL;
1027
1028	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1029	if (!rdma_is_port_valid(device, port)) {
1030		err = -EINVAL;
1031		goto err;
1032	}
1033
1034	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1035	if (!msg) {
1036		err = -ENOMEM;
1037		goto err;
1038	}
1039
1040	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1041			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
1042			0, 0);
1043
1044	err = fill_port_info(msg, device, port, sock_net(skb->sk));
1045	if (err)
1046		goto err_free;
1047
1048	nlmsg_end(msg, nlh);
1049	ib_device_put(device);
1050
1051	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1052
1053err_free:
1054	nlmsg_free(msg);
1055err:
1056	ib_device_put(device);
1057	return err;
1058}
1059
1060static int nldev_port_get_dumpit(struct sk_buff *skb,
1061				 struct netlink_callback *cb)
1062{
1063	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1064	struct ib_device *device;
1065	int start = cb->args[0];
1066	struct nlmsghdr *nlh;
1067	u32 idx = 0;
1068	u32 ifindex;
1069	int err;
1070	unsigned int p;
1071
1072	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1073				     nldev_policy, NULL);
1074	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1075		return -EINVAL;
1076
1077	ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1078	device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
1079	if (!device)
1080		return -EINVAL;
1081
1082	rdma_for_each_port (device, p) {
1083		/*
1084		 * The dumpit function returns all information from specific
1085		 * index. This specific index is taken from the netlink
1086		 * messages request sent by user and it is available
1087		 * in cb->args[0].
1088		 *
1089		 * Usually, the user doesn't fill this field and it causes
1090		 * to return everything.
1091		 *
1092		 */
1093		if (idx < start) {
1094			idx++;
1095			continue;
1096		}
1097
1098		nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
1099				cb->nlh->nlmsg_seq,
1100				RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1101						 RDMA_NLDEV_CMD_PORT_GET),
1102				0, NLM_F_MULTI);
1103
1104		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
1105			nlmsg_cancel(skb, nlh);
1106			goto out;
1107		}
1108		idx++;
1109		nlmsg_end(skb, nlh);
1110	}
1111
1112out:
1113	ib_device_put(device);
1114	cb->args[0] = idx;
1115	return skb->len;
1116}
1117
1118static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1119			      struct netlink_ext_ack *extack)
1120{
1121	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1122	struct ib_device *device;
1123	struct sk_buff *msg;
1124	u32 index;
1125	int ret;
1126
1127	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1128				     nldev_policy, extack);
1129	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1130		return -EINVAL;
1131
1132	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1133	device = ib_device_get_by_index(sock_net(skb->sk), index);
1134	if (!device)
1135		return -EINVAL;
1136
1137	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1138	if (!msg) {
1139		ret = -ENOMEM;
1140		goto err;
1141	}
1142
1143	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1144			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1145			0, 0);
1146
1147	ret = fill_res_info(msg, device);
1148	if (ret)
1149		goto err_free;
1150
1151	nlmsg_end(msg, nlh);
1152	ib_device_put(device);
1153	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1154
1155err_free:
1156	nlmsg_free(msg);
1157err:
1158	ib_device_put(device);
1159	return ret;
1160}
1161
1162static int _nldev_res_get_dumpit(struct ib_device *device,
1163				 struct sk_buff *skb,
1164				 struct netlink_callback *cb,
1165				 unsigned int idx)
1166{
1167	int start = cb->args[0];
1168	struct nlmsghdr *nlh;
1169
1170	if (idx < start)
1171		return 0;
1172
1173	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1174			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
1175			0, NLM_F_MULTI);
1176
1177	if (fill_res_info(skb, device)) {
1178		nlmsg_cancel(skb, nlh);
1179		goto out;
1180	}
1181	nlmsg_end(skb, nlh);
1182
1183	idx++;
1184
1185out:
1186	cb->args[0] = idx;
1187	return skb->len;
1188}
1189
1190static int nldev_res_get_dumpit(struct sk_buff *skb,
1191				struct netlink_callback *cb)
1192{
1193	return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
1194}
1195
1196struct nldev_fill_res_entry {
1197	enum rdma_nldev_attr nldev_attr;
1198	u8 flags;
1199	u32 entry;
1200	u32 id;
1201};
1202
1203enum nldev_res_flags {
1204	NLDEV_PER_DEV = 1 << 0,
1205};
1206
1207static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
1208	[RDMA_RESTRACK_QP] = {
1209		.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
1210		.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
1211		.id = RDMA_NLDEV_ATTR_RES_LQPN,
1212	},
1213	[RDMA_RESTRACK_CM_ID] = {
1214		.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
1215		.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
1216		.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
1217	},
1218	[RDMA_RESTRACK_CQ] = {
1219		.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
1220		.flags = NLDEV_PER_DEV,
1221		.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
1222		.id = RDMA_NLDEV_ATTR_RES_CQN,
1223	},
1224	[RDMA_RESTRACK_MR] = {
1225		.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
1226		.flags = NLDEV_PER_DEV,
1227		.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
1228		.id = RDMA_NLDEV_ATTR_RES_MRN,
1229	},
1230	[RDMA_RESTRACK_PD] = {
1231		.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
1232		.flags = NLDEV_PER_DEV,
1233		.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
1234		.id = RDMA_NLDEV_ATTR_RES_PDN,
1235	},
1236	[RDMA_RESTRACK_COUNTER] = {
1237		.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
1238		.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
1239		.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
1240	},
1241};
1242
1243static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1244			       struct netlink_ext_ack *extack,
1245			       enum rdma_restrack_type res_type,
1246			       res_fill_func_t fill_func)
1247{
1248	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1249	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1250	struct rdma_restrack_entry *res;
1251	struct ib_device *device;
1252	u32 index, id, port = 0;
1253	bool has_cap_net_admin;
1254	struct sk_buff *msg;
1255	int ret;
1256
1257	ret = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1258				     nldev_policy, extack);
1259	if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
1260		return -EINVAL;
1261
1262	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1263	device = ib_device_get_by_index(sock_net(skb->sk), index);
1264	if (!device)
1265		return -EINVAL;
1266
1267	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1268		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1269		if (!rdma_is_port_valid(device, port)) {
1270			ret = -EINVAL;
1271			goto err;
1272		}
1273	}
1274
1275	if ((port && fe->flags & NLDEV_PER_DEV) ||
1276	    (!port && ~fe->flags & NLDEV_PER_DEV)) {
1277		ret = -EINVAL;
1278		goto err;
1279	}
1280
1281	id = nla_get_u32(tb[fe->id]);
1282	res = rdma_restrack_get_byid(device, res_type, id);
1283	if (IS_ERR(res)) {
1284		ret = PTR_ERR(res);
1285		goto err;
1286	}
1287
1288	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1289	if (!msg) {
1290		ret = -ENOMEM;
1291		goto err_get;
1292	}
1293
1294	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1295			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1296					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
1297			0, 0);
1298
1299	if (fill_nldev_handle(msg, device)) {
1300		ret = -EMSGSIZE;
1301		goto err_free;
1302	}
1303
1304	has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
1305
1306	ret = fill_func(msg, has_cap_net_admin, res, port);
1307	if (ret)
1308		goto err_free;
1309
1310	rdma_restrack_put(res);
1311	nlmsg_end(msg, nlh);
1312	ib_device_put(device);
1313	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1314
1315err_free:
1316	nlmsg_free(msg);
1317err_get:
1318	rdma_restrack_put(res);
1319err:
1320	ib_device_put(device);
1321	return ret;
1322}
1323
1324static int res_get_common_dumpit(struct sk_buff *skb,
1325				 struct netlink_callback *cb,
1326				 enum rdma_restrack_type res_type,
1327				 res_fill_func_t fill_func)
1328{
1329	const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
1330	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1331	struct rdma_restrack_entry *res;
1332	struct rdma_restrack_root *rt;
1333	int err, ret = 0, idx = 0;
1334	struct nlattr *table_attr;
1335	struct nlattr *entry_attr;
1336	struct ib_device *device;
1337	int start = cb->args[0];
1338	bool has_cap_net_admin;
1339	struct nlmsghdr *nlh;
1340	unsigned long id;
1341	u32 index, port = 0;
1342	bool filled = false;
1343
1344	err = nlmsg_parse_deprecated(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1345				     nldev_policy, NULL);
1346	/*
1347	 * Right now, we are expecting the device index to get res information,
1348	 * but it is possible to extend this code to return all devices in
1349	 * one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
1350	 * if it doesn't exist, we will iterate over all devices.
1351	 *
1352	 * But it is not needed for now.
1353	 */
1354	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1355		return -EINVAL;
1356
1357	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1358	device = ib_device_get_by_index(sock_net(skb->sk), index);
1359	if (!device)
1360		return -EINVAL;
1361
1362	/*
1363	 * If no PORT_INDEX is supplied, we will return all QPs from that device
1364	 */
1365	if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1366		port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1367		if (!rdma_is_port_valid(device, port)) {
1368			ret = -EINVAL;
1369			goto err_index;
1370		}
1371	}
1372
1373	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
1374			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1375					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
1376			0, NLM_F_MULTI);
1377
1378	if (fill_nldev_handle(skb, device)) {
1379		ret = -EMSGSIZE;
1380		goto err;
1381	}
1382
1383	table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
1384	if (!table_attr) {
1385		ret = -EMSGSIZE;
1386		goto err;
1387	}
1388
1389	has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
1390
1391	rt = &device->res[res_type];
1392	xa_lock(&rt->xa);
1393	/*
1394	 * FIXME: if the skip ahead is something common this loop should
1395	 * use xas_for_each & xas_pause to optimize, we can have a lot of
1396	 * objects.
1397	 */
1398	xa_for_each(&rt->xa, id, res) {
1399		if (idx < start || !rdma_restrack_get(res))
1400			goto next;
1401
1402		xa_unlock(&rt->xa);
1403
1404		filled = true;
1405
1406		entry_attr = nla_nest_start_noflag(skb, fe->entry);
1407		if (!entry_attr) {
1408			ret = -EMSGSIZE;
1409			rdma_restrack_put(res);
1410			goto msg_full;
1411		}
1412
1413		ret = fill_func(skb, has_cap_net_admin, res, port);
1414
1415		rdma_restrack_put(res);
1416
1417		if (ret) {
1418			nla_nest_cancel(skb, entry_attr);
1419			if (ret == -EMSGSIZE)
1420				goto msg_full;
1421			if (ret == -EAGAIN)
1422				goto again;
1423			goto res_err;
1424		}
1425		nla_nest_end(skb, entry_attr);
1426again:		xa_lock(&rt->xa);
1427next:		idx++;
1428	}
1429	xa_unlock(&rt->xa);
1430
1431msg_full:
1432	nla_nest_end(skb, table_attr);
1433	nlmsg_end(skb, nlh);
1434	cb->args[0] = idx;
1435
1436	/*
1437	 * No more entries to fill, cancel the message and
1438	 * return 0 to mark end of dumpit.
1439	 */
1440	if (!filled)
1441		goto err;
1442
1443	ib_device_put(device);
1444	return skb->len;
1445
1446res_err:
1447	nla_nest_cancel(skb, table_attr);
1448
1449err:
1450	nlmsg_cancel(skb, nlh);
1451
1452err_index:
1453	ib_device_put(device);
1454	return ret;
1455}
1456
1457#define RES_GET_FUNCS(name, type)                                              \
1458	static int nldev_res_get_##name##_dumpit(struct sk_buff *skb,          \
1459						 struct netlink_callback *cb)  \
1460	{                                                                      \
1461		return res_get_common_dumpit(skb, cb, type,                    \
1462					     fill_res_##name##_entry);         \
1463	}                                                                      \
1464	static int nldev_res_get_##name##_doit(struct sk_buff *skb,            \
1465					       struct nlmsghdr *nlh,           \
1466					       struct netlink_ext_ack *extack) \
1467	{                                                                      \
1468		return res_get_common_doit(skb, nlh, extack, type,             \
1469					   fill_res_##name##_entry);           \
1470	}
1471
1472RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
1473RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
1474RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
1475RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
1476RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
1477RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
1478RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
1479RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
1480RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
1481
1482static LIST_HEAD(link_ops);
1483static DECLARE_RWSEM(link_ops_rwsem);
1484
1485static const struct rdma_link_ops *link_ops_get(const char *type)
1486{
1487	const struct rdma_link_ops *ops;
1488
1489	list_for_each_entry(ops, &link_ops, list) {
1490		if (!strcmp(ops->type, type))
1491			goto out;
1492	}
1493	ops = NULL;
1494out:
1495	return ops;
1496}
1497
1498void rdma_link_register(struct rdma_link_ops *ops)
1499{
1500	down_write(&link_ops_rwsem);
1501	if (WARN_ON_ONCE(link_ops_get(ops->type)))
1502		goto out;
1503	list_add(&ops->list, &link_ops);
1504out:
1505	up_write(&link_ops_rwsem);
1506}
1507EXPORT_SYMBOL(rdma_link_register);
1508
1509void rdma_link_unregister(struct rdma_link_ops *ops)
1510{
1511	down_write(&link_ops_rwsem);
1512	list_del(&ops->list);
1513	up_write(&link_ops_rwsem);
1514}
1515EXPORT_SYMBOL(rdma_link_unregister);
1516
1517static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
1518			  struct netlink_ext_ack *extack)
1519{
1520	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1521	char ibdev_name[IB_DEVICE_NAME_MAX];
1522	const struct rdma_link_ops *ops;
1523	char ndev_name[IFNAMSIZ];
1524	struct net_device *ndev;
1525	char type[IFNAMSIZ];
1526	int err;
1527
1528	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1529				     nldev_policy, extack);
1530	if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
1531	    !tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
1532		return -EINVAL;
1533
1534	nla_strlcpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
1535		    sizeof(ibdev_name));
1536	if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
1537		return -EINVAL;
1538
1539	nla_strlcpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
1540	nla_strlcpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
1541		    sizeof(ndev_name));
1542
1543	ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
1544	if (!ndev)
1545		return -ENODEV;
1546
1547	down_read(&link_ops_rwsem);
1548	ops = link_ops_get(type);
1549#ifdef CONFIG_MODULES
1550	if (!ops) {
1551		up_read(&link_ops_rwsem);
1552		request_module("rdma-link-%s", type);
1553		down_read(&link_ops_rwsem);
1554		ops = link_ops_get(type);
1555	}
1556#endif
1557	err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
1558	up_read(&link_ops_rwsem);
1559	dev_put(ndev);
1560
1561	return err;
1562}
1563
1564static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
1565			  struct netlink_ext_ack *extack)
1566{
1567	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1568	struct ib_device *device;
1569	u32 index;
1570	int err;
1571
1572	err = nlmsg_parse_deprecated(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1573				     nldev_policy, extack);
1574	if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
1575		return -EINVAL;
1576
1577	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1578	device = ib_device_get_by_index(sock_net(skb->sk), index);
1579	if (!device)
1580		return -EINVAL;
1581
1582	if (!(device->attrs.device_cap_flags & IB_DEVICE_ALLOW_USER_UNREG)) {
1583		ib_device_put(device);
1584		return -EINVAL;
1585	}
1586
1587	ib_unregister_device_and_put(device);
1588	return 0;
1589}
1590
1591static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
1592			     struct netlink_ext_ack *extack)
1593{
1594	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1595	char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
1596	struct ib_client_nl_info data = {};
1597	struct ib_device *ibdev = NULL;
1598	struct sk_buff *msg;
1599	u32 index;
1600	int err;
1601
1602	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
1603			  extack);
1604	if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
1605		return -EINVAL;
1606
1607	nla_strlcpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
1608		    sizeof(client_name));
1609
1610	if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
1611		index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1612		ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
1613		if (!ibdev)
1614			return -EINVAL;
1615
1616		if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1617			data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1618			if (!rdma_is_port_valid(ibdev, data.port)) {
1619				err = -EINVAL;
1620				goto out_put;
1621			}
1622		} else {
1623			data.port = -1;
1624		}
1625	} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
1626		return -EINVAL;
1627	}
1628
1629	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1630	if (!msg) {
1631		err = -ENOMEM;
1632		goto out_put;
1633	}
1634	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1635			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1636					 RDMA_NLDEV_CMD_GET_CHARDEV),
1637			0, 0);
1638
1639	data.nl_msg = msg;
1640	err = ib_get_client_nl_info(ibdev, client_name, &data);
1641	if (err)
1642		goto out_nlmsg;
1643
1644	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
1645				huge_encode_dev(data.cdev->devt),
1646				RDMA_NLDEV_ATTR_PAD);
1647	if (err)
1648		goto out_data;
1649	err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
1650				RDMA_NLDEV_ATTR_PAD);
1651	if (err)
1652		goto out_data;
1653	if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
1654			   dev_name(data.cdev))) {
1655		err = -EMSGSIZE;
1656		goto out_data;
1657	}
1658
1659	nlmsg_end(msg, nlh);
1660	put_device(data.cdev);
1661	if (ibdev)
1662		ib_device_put(ibdev);
1663	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1664
1665out_data:
1666	put_device(data.cdev);
1667out_nlmsg:
1668	nlmsg_free(msg);
1669out_put:
1670	if (ibdev)
1671		ib_device_put(ibdev);
1672	return err;
1673}
1674
1675static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1676			      struct netlink_ext_ack *extack)
1677{
1678	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1679	struct sk_buff *msg;
1680	int err;
1681
1682	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1683			  nldev_policy, extack);
1684	if (err)
1685		return err;
1686
1687	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1688	if (!msg)
1689		return -ENOMEM;
1690
1691	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1692			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1693					 RDMA_NLDEV_CMD_SYS_GET),
1694			0, 0);
1695
1696	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
1697			 (u8)ib_devices_shared_netns);
1698	if (err) {
1699		nlmsg_free(msg);
1700		return err;
1701	}
1702	nlmsg_end(msg, nlh);
1703	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1704}
1705
1706static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1707				  struct netlink_ext_ack *extack)
1708{
1709	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1710	u8 enable;
1711	int err;
1712
1713	err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1714			  nldev_policy, extack);
1715	if (err || !tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
1716		return -EINVAL;
1717
1718	enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
1719	/* Only 0 and 1 are supported */
1720	if (enable > 1)
1721		return -EINVAL;
1722
1723	err = rdma_compatdev_set(enable);
1724	return err;
1725}
1726
1727static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1728			       struct netlink_ext_ack *extack)
1729{
1730	u32 index, port, mode, mask = 0, qpn, cntn = 0;
1731	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1732	struct ib_device *device;
1733	struct sk_buff *msg;
1734	int ret;
1735
1736	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1737			  nldev_policy, extack);
1738	/* Currently only counter for QP is supported */
1739	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1740	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
1741	    !tb[RDMA_NLDEV_ATTR_PORT_INDEX] || !tb[RDMA_NLDEV_ATTR_STAT_MODE])
1742		return -EINVAL;
1743
1744	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1745		return -EINVAL;
1746
1747	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1748	device = ib_device_get_by_index(sock_net(skb->sk), index);
1749	if (!device)
1750		return -EINVAL;
1751
1752	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1753	if (!rdma_is_port_valid(device, port)) {
1754		ret = -EINVAL;
1755		goto err;
1756	}
1757
1758	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1759	if (!msg) {
1760		ret = -ENOMEM;
1761		goto err;
1762	}
1763	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1764			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1765					 RDMA_NLDEV_CMD_STAT_SET),
1766			0, 0);
1767
1768	mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
1769	if (mode == RDMA_COUNTER_MODE_AUTO) {
1770		if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
1771			mask = nla_get_u32(
1772				tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
1773
1774		ret = rdma_counter_set_auto_mode(device, port,
1775						 mask ? true : false, mask);
1776		if (ret)
1777			goto err_msg;
1778	} else {
1779		if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
1780			goto err_msg;
1781		qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1782		if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
1783			cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1784			ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
1785		} else {
1786			ret = rdma_counter_bind_qpn_alloc(device, port,
1787							  qpn, &cntn);
1788		}
1789		if (ret)
1790			goto err_msg;
1791
1792		if (fill_nldev_handle(msg, device) ||
1793		    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1794		    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1795		    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1796			ret = -EMSGSIZE;
1797			goto err_fill;
1798		}
1799	}
1800
1801	nlmsg_end(msg, nlh);
1802	ib_device_put(device);
1803	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1804
1805err_fill:
1806	rdma_counter_unbind_qpn(device, port, qpn, cntn);
1807err_msg:
1808	nlmsg_free(msg);
1809err:
1810	ib_device_put(device);
1811	return ret;
1812}
1813
1814static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
1815			       struct netlink_ext_ack *extack)
1816{
1817	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
1818	struct ib_device *device;
1819	struct sk_buff *msg;
1820	u32 index, port, qpn, cntn;
1821	int ret;
1822
1823	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
1824			  nldev_policy, extack);
1825	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
1826	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
1827	    !tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
1828	    !tb[RDMA_NLDEV_ATTR_RES_LQPN])
1829		return -EINVAL;
1830
1831	if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
1832		return -EINVAL;
1833
1834	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1835	device = ib_device_get_by_index(sock_net(skb->sk), index);
1836	if (!device)
1837		return -EINVAL;
1838
1839	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1840	if (!rdma_is_port_valid(device, port)) {
1841		ret = -EINVAL;
1842		goto err;
1843	}
1844
1845	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1846	if (!msg) {
1847		ret = -ENOMEM;
1848		goto err;
1849	}
1850	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1851			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1852					 RDMA_NLDEV_CMD_STAT_SET),
1853			0, 0);
1854
1855	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
1856	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
1857	if (fill_nldev_handle(msg, device) ||
1858	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
1859	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
1860	    nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
1861		ret = -EMSGSIZE;
1862		goto err_fill;
1863	}
1864
1865	ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
1866	if (ret)
1867		goto err_fill;
1868
1869	nlmsg_end(msg, nlh);
1870	ib_device_put(device);
1871	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1872
1873err_fill:
1874	nlmsg_free(msg);
1875err:
1876	ib_device_put(device);
1877	return ret;
1878}
1879
1880static int stat_get_doit_default_counter(struct sk_buff *skb,
1881					 struct nlmsghdr *nlh,
1882					 struct netlink_ext_ack *extack,
1883					 struct nlattr *tb[])
1884{
1885	struct rdma_hw_stats *stats;
1886	struct nlattr *table_attr;
1887	struct ib_device *device;
1888	int ret, num_cnts, i;
1889	struct sk_buff *msg;
1890	u32 index, port;
1891	u64 v;
1892
1893	if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1894		return -EINVAL;
1895
1896	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1897	device = ib_device_get_by_index(sock_net(skb->sk), index);
1898	if (!device)
1899		return -EINVAL;
1900
1901	if (!device->ops.alloc_hw_stats || !device->ops.get_hw_stats) {
1902		ret = -EINVAL;
1903		goto err;
1904	}
1905
1906	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1907	if (!rdma_is_port_valid(device, port)) {
1908		ret = -EINVAL;
1909		goto err;
1910	}
1911
1912	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
1913	if (!msg) {
1914		ret = -ENOMEM;
1915		goto err;
1916	}
1917
1918	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
1919			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
1920					 RDMA_NLDEV_CMD_STAT_GET),
1921			0, 0);
1922
1923	if (fill_nldev_handle(msg, device) ||
1924	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
1925		ret = -EMSGSIZE;
1926		goto err_msg;
1927	}
1928
1929	stats = device->port_data ? device->port_data[port].hw_stats : NULL;
1930	if (stats == NULL) {
1931		ret = -EINVAL;
1932		goto err_msg;
1933	}
1934	mutex_lock(&stats->lock);
1935
1936	num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
1937	if (num_cnts < 0) {
1938		ret = -EINVAL;
1939		goto err_stats;
1940	}
1941
1942	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
1943	if (!table_attr) {
1944		ret = -EMSGSIZE;
1945		goto err_stats;
1946	}
1947	for (i = 0; i < num_cnts; i++) {
1948		v = stats->value[i] +
1949			rdma_counter_get_hwstat_value(device, port, i);
1950		if (rdma_nl_stat_hwcounter_entry(msg, stats->names[i], v)) {
1951			ret = -EMSGSIZE;
1952			goto err_table;
1953		}
1954	}
1955	nla_nest_end(msg, table_attr);
1956
1957	mutex_unlock(&stats->lock);
1958	nlmsg_end(msg, nlh);
1959	ib_device_put(device);
1960	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
1961
1962err_table:
1963	nla_nest_cancel(msg, table_attr);
1964err_stats:
1965	mutex_unlock(&stats->lock);
1966err_msg:
1967	nlmsg_free(msg);
1968err:
1969	ib_device_put(device);
1970	return ret;
1971}
1972
1973static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
1974			    struct netlink_ext_ack *extack, struct nlattr *tb[])
1975
1976{
1977	static enum rdma_nl_counter_mode mode;
1978	static enum rdma_nl_counter_mask mask;
1979	struct ib_device *device;
1980	struct sk_buff *msg;
1981	u32 index, port;
1982	int ret;
1983
1984	if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
1985		return nldev_res_get_counter_doit(skb, nlh, extack);
1986
1987	if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
1988	    !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
1989		return -EINVAL;
1990
1991	index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
1992	device = ib_device_get_by_index(sock_net(skb->sk), index);
1993	if (!device)
1994		return -EINVAL;
1995
1996	port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
1997	if (!rdma_is_port_valid(device, port)) {
1998		ret = -EINVAL;
1999		goto err;
2000	}
2001
2002	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2003	if (!msg) {
2004		ret = -ENOMEM;
2005		goto err;
2006	}
2007
2008	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
2009			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
2010					 RDMA_NLDEV_CMD_STAT_GET),
2011			0, 0);
2012
2013	ret = rdma_counter_get_mode(device, port, &mode, &mask);
2014	if (ret)
2015		goto err_msg;
2016
2017	if (fill_nldev_handle(msg, device) ||
2018	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
2019	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
2020		ret = -EMSGSIZE;
2021		goto err_msg;
2022	}
2023
2024	if ((mode == RDMA_COUNTER_MODE_AUTO) &&
2025	    nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
2026		ret = -EMSGSIZE;
2027		goto err_msg;
2028	}
2029
2030	nlmsg_end(msg, nlh);
2031	ib_device_put(device);
2032	return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
2033
2034err_msg:
2035	nlmsg_free(msg);
2036err:
2037	ib_device_put(device);
2038	return ret;
2039}
2040
2041static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
2042			       struct netlink_ext_ack *extack)
2043{
2044	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2045	int ret;
2046
2047	ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2048			  nldev_policy, extack);
2049	if (ret)
2050		return -EINVAL;
2051
2052	if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
2053		return stat_get_doit_default_counter(skb, nlh, extack, tb);
2054
2055	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2056	case RDMA_NLDEV_ATTR_RES_QP:
2057		ret = stat_get_doit_qp(skb, nlh, extack, tb);
2058		break;
2059	case RDMA_NLDEV_ATTR_RES_MR:
2060		ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
2061					  fill_stat_mr_entry);
2062		break;
2063	default:
2064		ret = -EINVAL;
2065		break;
2066	}
2067
2068	return ret;
2069}
2070
2071static int nldev_stat_get_dumpit(struct sk_buff *skb,
2072				 struct netlink_callback *cb)
2073{
2074	struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
2075	int ret;
2076
2077	ret = nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
2078			  nldev_policy, NULL);
2079	if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
2080		return -EINVAL;
2081
2082	switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
2083	case RDMA_NLDEV_ATTR_RES_QP:
2084		ret = nldev_res_get_counter_dumpit(skb, cb);
2085		break;
2086	case RDMA_NLDEV_ATTR_RES_MR:
2087		ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
2088					    fill_stat_mr_entry);
2089		break;
2090	default:
2091		ret = -EINVAL;
2092		break;
2093	}
2094
2095	return ret;
2096}
2097
2098static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
2099	[RDMA_NLDEV_CMD_GET] = {
2100		.doit = nldev_get_doit,
2101		.dump = nldev_get_dumpit,
2102	},
2103	[RDMA_NLDEV_CMD_GET_CHARDEV] = {
2104		.doit = nldev_get_chardev,
2105	},
2106	[RDMA_NLDEV_CMD_SET] = {
2107		.doit = nldev_set_doit,
2108		.flags = RDMA_NL_ADMIN_PERM,
2109	},
2110	[RDMA_NLDEV_CMD_NEWLINK] = {
2111		.doit = nldev_newlink,
2112		.flags = RDMA_NL_ADMIN_PERM,
2113	},
2114	[RDMA_NLDEV_CMD_DELLINK] = {
2115		.doit = nldev_dellink,
2116		.flags = RDMA_NL_ADMIN_PERM,
2117	},
2118	[RDMA_NLDEV_CMD_PORT_GET] = {
2119		.doit = nldev_port_get_doit,
2120		.dump = nldev_port_get_dumpit,
2121	},
2122	[RDMA_NLDEV_CMD_RES_GET] = {
2123		.doit = nldev_res_get_doit,
2124		.dump = nldev_res_get_dumpit,
2125	},
2126	[RDMA_NLDEV_CMD_RES_QP_GET] = {
2127		.doit = nldev_res_get_qp_doit,
2128		.dump = nldev_res_get_qp_dumpit,
2129	},
2130	[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
2131		.doit = nldev_res_get_cm_id_doit,
2132		.dump = nldev_res_get_cm_id_dumpit,
2133	},
2134	[RDMA_NLDEV_CMD_RES_CQ_GET] = {
2135		.doit = nldev_res_get_cq_doit,
2136		.dump = nldev_res_get_cq_dumpit,
2137	},
2138	[RDMA_NLDEV_CMD_RES_MR_GET] = {
2139		.doit = nldev_res_get_mr_doit,
2140		.dump = nldev_res_get_mr_dumpit,
2141	},
2142	[RDMA_NLDEV_CMD_RES_PD_GET] = {
2143		.doit = nldev_res_get_pd_doit,
2144		.dump = nldev_res_get_pd_dumpit,
2145	},
2146	[RDMA_NLDEV_CMD_SYS_GET] = {
2147		.doit = nldev_sys_get_doit,
2148	},
2149	[RDMA_NLDEV_CMD_SYS_SET] = {
2150		.doit = nldev_set_sys_set_doit,
2151		.flags = RDMA_NL_ADMIN_PERM,
2152	},
2153	[RDMA_NLDEV_CMD_STAT_SET] = {
2154		.doit = nldev_stat_set_doit,
2155		.flags = RDMA_NL_ADMIN_PERM,
2156	},
2157	[RDMA_NLDEV_CMD_STAT_GET] = {
2158		.doit = nldev_stat_get_doit,
2159		.dump = nldev_stat_get_dumpit,
2160	},
2161	[RDMA_NLDEV_CMD_STAT_DEL] = {
2162		.doit = nldev_stat_del_doit,
2163		.flags = RDMA_NL_ADMIN_PERM,
2164	},
2165	[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
2166		.doit = nldev_res_get_qp_raw_doit,
2167		.dump = nldev_res_get_qp_raw_dumpit,
2168		.flags = RDMA_NL_ADMIN_PERM,
2169	},
2170	[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
2171		.doit = nldev_res_get_cq_raw_doit,
2172		.dump = nldev_res_get_cq_raw_dumpit,
2173		.flags = RDMA_NL_ADMIN_PERM,
2174	},
2175	[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
2176		.doit = nldev_res_get_mr_raw_doit,
2177		.dump = nldev_res_get_mr_raw_dumpit,
2178		.flags = RDMA_NL_ADMIN_PERM,
2179	},
2180};
2181
2182void __init nldev_init(void)
2183{
2184	rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
2185}
2186
2187void nldev_exit(void)
2188{
2189	rdma_nl_unregister(RDMA_NL_NLDEV);
2190}
2191
2192MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
2193