1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
4 */
5
6#include "mlx5_ib.h"
7#include <linux/mlx5/eswitch.h>
8#include "counters.h"
9#include "ib_rep.h"
10#include "qp.h"
11
12struct mlx5_ib_counter {
13	const char *name;
14	size_t offset;
15};
16
17#define INIT_Q_COUNTER(_name)		\
18	{ .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
19
20static const struct mlx5_ib_counter basic_q_cnts[] = {
21	INIT_Q_COUNTER(rx_write_requests),
22	INIT_Q_COUNTER(rx_read_requests),
23	INIT_Q_COUNTER(rx_atomic_requests),
24	INIT_Q_COUNTER(out_of_buffer),
25};
26
27static const struct mlx5_ib_counter out_of_seq_q_cnts[] = {
28	INIT_Q_COUNTER(out_of_sequence),
29};
30
31static const struct mlx5_ib_counter retrans_q_cnts[] = {
32	INIT_Q_COUNTER(duplicate_request),
33	INIT_Q_COUNTER(rnr_nak_retry_err),
34	INIT_Q_COUNTER(packet_seq_err),
35	INIT_Q_COUNTER(implied_nak_seq_err),
36	INIT_Q_COUNTER(local_ack_timeout_err),
37};
38
39#define INIT_CONG_COUNTER(_name)		\
40	{ .name = #_name, .offset =	\
41		MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)}
42
43static const struct mlx5_ib_counter cong_cnts[] = {
44	INIT_CONG_COUNTER(rp_cnp_ignored),
45	INIT_CONG_COUNTER(rp_cnp_handled),
46	INIT_CONG_COUNTER(np_ecn_marked_roce_packets),
47	INIT_CONG_COUNTER(np_cnp_sent),
48};
49
50static const struct mlx5_ib_counter extended_err_cnts[] = {
51	INIT_Q_COUNTER(resp_local_length_error),
52	INIT_Q_COUNTER(resp_cqe_error),
53	INIT_Q_COUNTER(req_cqe_error),
54	INIT_Q_COUNTER(req_remote_invalid_request),
55	INIT_Q_COUNTER(req_remote_access_errors),
56	INIT_Q_COUNTER(resp_remote_access_errors),
57	INIT_Q_COUNTER(resp_cqe_flush_error),
58	INIT_Q_COUNTER(req_cqe_flush_error),
59};
60
61static const struct mlx5_ib_counter roce_accl_cnts[] = {
62	INIT_Q_COUNTER(roce_adp_retrans),
63	INIT_Q_COUNTER(roce_adp_retrans_to),
64	INIT_Q_COUNTER(roce_slow_restart),
65	INIT_Q_COUNTER(roce_slow_restart_cnps),
66	INIT_Q_COUNTER(roce_slow_restart_trans),
67};
68
69#define INIT_EXT_PPCNT_COUNTER(_name)		\
70	{ .name = #_name, .offset =	\
71	MLX5_BYTE_OFF(ppcnt_reg, \
72		      counter_set.eth_extended_cntrs_grp_data_layout._name##_high)}
73
74static const struct mlx5_ib_counter ext_ppcnt_cnts[] = {
75	INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated),
76};
77
78static int mlx5_ib_read_counters(struct ib_counters *counters,
79				 struct ib_counters_read_attr *read_attr,
80				 struct uverbs_attr_bundle *attrs)
81{
82	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
83	struct mlx5_read_counters_attr mread_attr = {};
84	struct mlx5_ib_flow_counters_desc *desc;
85	int ret, i;
86
87	mutex_lock(&mcounters->mcntrs_mutex);
88	if (mcounters->cntrs_max_index > read_attr->ncounters) {
89		ret = -EINVAL;
90		goto err_bound;
91	}
92
93	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64),
94				 GFP_KERNEL);
95	if (!mread_attr.out) {
96		ret = -ENOMEM;
97		goto err_bound;
98	}
99
100	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl;
101	mread_attr.flags = read_attr->flags;
102	ret = mcounters->read_counters(counters->device, &mread_attr);
103	if (ret)
104		goto err_read;
105
106	/* do the pass over the counters data array to assign according to the
107	 * descriptions and indexing pairs
108	 */
109	desc = mcounters->counters_data;
110	for (i = 0; i < mcounters->ncounters; i++)
111		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description];
112
113err_read:
114	kfree(mread_attr.out);
115err_bound:
116	mutex_unlock(&mcounters->mcntrs_mutex);
117	return ret;
118}
119
120static int mlx5_ib_destroy_counters(struct ib_counters *counters)
121{
122	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
123
124	mlx5_ib_counters_clear_description(counters);
125	if (mcounters->hw_cntrs_hndl)
126		mlx5_fc_destroy(to_mdev(counters->device)->mdev,
127				mcounters->hw_cntrs_hndl);
128	return 0;
129}
130
131static int mlx5_ib_create_counters(struct ib_counters *counters,
132				   struct uverbs_attr_bundle *attrs)
133{
134	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
135
136	mutex_init(&mcounters->mcntrs_mutex);
137	return 0;
138}
139
140
141static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
142{
143	return MLX5_ESWITCH_MANAGER(mdev) &&
144	       mlx5_ib_eswitch_mode(mdev->priv.eswitch) ==
145		       MLX5_ESWITCH_OFFLOADS;
146}
147
148static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev,
149						   u8 port_num)
150{
151	return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts :
152						   &dev->port[port_num].cnts;
153}
154
155/**
156 * mlx5_ib_get_counters_id - Returns counters id to use for device+port
157 * @dev:	Pointer to mlx5 IB device
158 * @port_num:	Zero based port number
159 *
160 * mlx5_ib_get_counters_id() Returns counters set id to use for given
161 * device port combination in switchdev and non switchdev mode of the
162 * parent device.
163 */
164u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num)
165{
166	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num);
167
168	return cnts->set_id;
169}
170
171static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
172						    u8 port_num)
173{
174	struct mlx5_ib_dev *dev = to_mdev(ibdev);
175	const struct mlx5_ib_counters *cnts;
176	bool is_switchdev = is_mdev_switchdev_mode(dev->mdev);
177
178	if ((is_switchdev && port_num) || (!is_switchdev && !port_num))
179		return NULL;
180
181	cnts = get_counters(dev, port_num - 1);
182
183	return rdma_alloc_hw_stats_struct(cnts->names,
184					  cnts->num_q_counters +
185					  cnts->num_cong_counters +
186					  cnts->num_ext_ppcnt_counters,
187					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
188}
189
190static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
191				    const struct mlx5_ib_counters *cnts,
192				    struct rdma_hw_stats *stats,
193				    u16 set_id)
194{
195	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
196	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
197	__be32 val;
198	int ret, i;
199
200	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
201	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
202	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
203	if (ret)
204		return ret;
205
206	for (i = 0; i < cnts->num_q_counters; i++) {
207		val = *(__be32 *)((void *)out + cnts->offsets[i]);
208		stats->value[i] = (u64)be32_to_cpu(val);
209	}
210
211	return 0;
212}
213
214static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
215					    const struct mlx5_ib_counters *cnts,
216					    struct rdma_hw_stats *stats)
217{
218	int offset = cnts->num_q_counters + cnts->num_cong_counters;
219	u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {};
220	int sz = MLX5_ST_SZ_BYTES(ppcnt_reg);
221	int ret, i;
222	void *out;
223
224	out = kvzalloc(sz, GFP_KERNEL);
225	if (!out)
226		return -ENOMEM;
227
228	MLX5_SET(ppcnt_reg, in, local_port, 1);
229	MLX5_SET(ppcnt_reg, in, grp, MLX5_ETHERNET_EXTENDED_COUNTERS_GROUP);
230	ret = mlx5_core_access_reg(dev->mdev, in, sz, out, sz, MLX5_REG_PPCNT,
231				   0, 0);
232	if (ret)
233		goto free;
234
235	for (i = 0; i < cnts->num_ext_ppcnt_counters; i++)
236		stats->value[i + offset] =
237			be64_to_cpup((__be64 *)(out +
238				    cnts->offsets[i + offset]));
239free:
240	kvfree(out);
241	return ret;
242}
243
244static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
245				struct rdma_hw_stats *stats,
246				u8 port_num, int index)
247{
248	struct mlx5_ib_dev *dev = to_mdev(ibdev);
249	const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1);
250	struct mlx5_core_dev *mdev;
251	int ret, num_counters;
252	u8 mdev_port_num;
253
254	if (!stats)
255		return -EINVAL;
256
257	num_counters = cnts->num_q_counters +
258		       cnts->num_cong_counters +
259		       cnts->num_ext_ppcnt_counters;
260
261	/* q_counters are per IB device, query the master mdev */
262	ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id);
263	if (ret)
264		return ret;
265
266	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
267		ret =  mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats);
268		if (ret)
269			return ret;
270	}
271
272	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
273		mdev = mlx5_ib_get_native_port_mdev(dev, port_num,
274						    &mdev_port_num);
275		if (!mdev) {
276			/* If port is not affiliated yet, its in down state
277			 * which doesn't have any counters yet, so it would be
278			 * zero. So no need to read from the HCA.
279			 */
280			goto done;
281		}
282		ret = mlx5_lag_query_cong_counters(dev->mdev,
283						   stats->value +
284						   cnts->num_q_counters,
285						   cnts->num_cong_counters,
286						   cnts->offsets +
287						   cnts->num_q_counters);
288
289		mlx5_ib_put_native_port_mdev(dev, port_num);
290		if (ret)
291			return ret;
292	}
293
294done:
295	return num_counters;
296}
297
298static struct rdma_hw_stats *
299mlx5_ib_counter_alloc_stats(struct rdma_counter *counter)
300{
301	struct mlx5_ib_dev *dev = to_mdev(counter->device);
302	const struct mlx5_ib_counters *cnts =
303		get_counters(dev, counter->port - 1);
304
305	return rdma_alloc_hw_stats_struct(cnts->names,
306					  cnts->num_q_counters +
307					  cnts->num_cong_counters +
308					  cnts->num_ext_ppcnt_counters,
309					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
310}
311
312static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
313{
314	struct mlx5_ib_dev *dev = to_mdev(counter->device);
315	const struct mlx5_ib_counters *cnts =
316		get_counters(dev, counter->port - 1);
317
318	return mlx5_ib_query_q_counters(dev->mdev, cnts,
319					counter->stats, counter->id);
320}
321
322static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
323{
324	struct mlx5_ib_dev *dev = to_mdev(counter->device);
325	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
326
327	if (!counter->id)
328		return 0;
329
330	MLX5_SET(dealloc_q_counter_in, in, opcode,
331		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
332	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
333	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
334}
335
336static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
337				   struct ib_qp *qp)
338{
339	struct mlx5_ib_dev *dev = to_mdev(qp->device);
340	int err;
341
342	if (!counter->id) {
343		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
344		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
345
346		MLX5_SET(alloc_q_counter_in, in, opcode,
347			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
348		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
349		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
350		if (err)
351			return err;
352		counter->id =
353			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
354	}
355
356	err = mlx5_ib_qp_set_counter(qp, counter);
357	if (err)
358		goto fail_set_counter;
359
360	return 0;
361
362fail_set_counter:
363	mlx5_ib_counter_dealloc(counter);
364	counter->id = 0;
365
366	return err;
367}
368
369static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
370{
371	return mlx5_ib_qp_set_counter(qp, NULL);
372}
373
374
375static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
376				  const char **names,
377				  size_t *offsets)
378{
379	int i;
380	int j = 0;
381
382	for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
383		names[j] = basic_q_cnts[i].name;
384		offsets[j] = basic_q_cnts[i].offset;
385	}
386
387	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
388		for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
389			names[j] = out_of_seq_q_cnts[i].name;
390			offsets[j] = out_of_seq_q_cnts[i].offset;
391		}
392	}
393
394	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
395		for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
396			names[j] = retrans_q_cnts[i].name;
397			offsets[j] = retrans_q_cnts[i].offset;
398		}
399	}
400
401	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) {
402		for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) {
403			names[j] = extended_err_cnts[i].name;
404			offsets[j] = extended_err_cnts[i].offset;
405		}
406	}
407
408	if (MLX5_CAP_GEN(dev->mdev, roce_accl)) {
409		for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) {
410			names[j] = roce_accl_cnts[i].name;
411			offsets[j] = roce_accl_cnts[i].offset;
412		}
413	}
414
415	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
416		for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) {
417			names[j] = cong_cnts[i].name;
418			offsets[j] = cong_cnts[i].offset;
419		}
420	}
421
422	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
423		for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) {
424			names[j] = ext_ppcnt_cnts[i].name;
425			offsets[j] = ext_ppcnt_cnts[i].offset;
426		}
427	}
428}
429
430
431static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev,
432				    struct mlx5_ib_counters *cnts)
433{
434	u32 num_counters;
435
436	num_counters = ARRAY_SIZE(basic_q_cnts);
437
438	if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
439		num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
440
441	if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
442		num_counters += ARRAY_SIZE(retrans_q_cnts);
443
444	if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters))
445		num_counters += ARRAY_SIZE(extended_err_cnts);
446
447	if (MLX5_CAP_GEN(dev->mdev, roce_accl))
448		num_counters += ARRAY_SIZE(roce_accl_cnts);
449
450	cnts->num_q_counters = num_counters;
451
452	if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) {
453		cnts->num_cong_counters = ARRAY_SIZE(cong_cnts);
454		num_counters += ARRAY_SIZE(cong_cnts);
455	}
456	if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) {
457		cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts);
458		num_counters += ARRAY_SIZE(ext_ppcnt_cnts);
459	}
460	cnts->names = kcalloc(num_counters, sizeof(*cnts->names), GFP_KERNEL);
461	if (!cnts->names)
462		return -ENOMEM;
463
464	cnts->offsets = kcalloc(num_counters,
465				sizeof(*cnts->offsets), GFP_KERNEL);
466	if (!cnts->offsets)
467		goto err_names;
468
469	return 0;
470
471err_names:
472	kfree(cnts->names);
473	cnts->names = NULL;
474	return -ENOMEM;
475}
476
477static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
478{
479	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
480	int num_cnt_ports;
481	int i;
482
483	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
484
485	MLX5_SET(dealloc_q_counter_in, in, opcode,
486		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
487
488	for (i = 0; i < num_cnt_ports; i++) {
489		if (dev->port[i].cnts.set_id) {
490			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
491				 dev->port[i].cnts.set_id);
492			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
493		}
494		kfree(dev->port[i].cnts.names);
495		kfree(dev->port[i].cnts.offsets);
496	}
497}
498
499static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
500{
501	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
502	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
503	int num_cnt_ports;
504	int err = 0;
505	int i;
506	bool is_shared;
507
508	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
509	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
510	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
511
512	for (i = 0; i < num_cnt_ports; i++) {
513		err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts);
514		if (err)
515			goto err_alloc;
516
517		mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
518				      dev->port[i].cnts.offsets);
519
520		MLX5_SET(alloc_q_counter_in, in, uid,
521			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
522
523		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
524		if (err) {
525			mlx5_ib_warn(dev,
526				     "couldn't allocate queue counter for port %d, err %d\n",
527				     i + 1, err);
528			goto err_alloc;
529		}
530
531		dev->port[i].cnts.set_id =
532			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
533	}
534	return 0;
535
536err_alloc:
537	mlx5_ib_dealloc_counters(dev);
538	return err;
539}
540
541static int read_flow_counters(struct ib_device *ibdev,
542			      struct mlx5_read_counters_attr *read_attr)
543{
544	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl;
545	struct mlx5_ib_dev *dev = to_mdev(ibdev);
546
547	return mlx5_fc_query(dev->mdev, fc,
548			     &read_attr->out[IB_COUNTER_PACKETS],
549			     &read_attr->out[IB_COUNTER_BYTES]);
550}
551
552/* flow counters currently expose two counters packets and bytes */
553#define FLOW_COUNTERS_NUM 2
554static int counters_set_description(
555	struct ib_counters *counters, enum mlx5_ib_counters_type counters_type,
556	struct mlx5_ib_flow_counters_desc *desc_data, u32 ncounters)
557{
558	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters);
559	u32 cntrs_max_index = 0;
560	int i;
561
562	if (counters_type != MLX5_IB_COUNTERS_FLOW)
563		return -EINVAL;
564
565	/* init the fields for the object */
566	mcounters->type = counters_type;
567	mcounters->read_counters = read_flow_counters;
568	mcounters->counters_num = FLOW_COUNTERS_NUM;
569	mcounters->ncounters = ncounters;
570	/* each counter entry have both description and index pair */
571	for (i = 0; i < ncounters; i++) {
572		if (desc_data[i].description > IB_COUNTER_BYTES)
573			return -EINVAL;
574
575		if (cntrs_max_index <= desc_data[i].index)
576			cntrs_max_index = desc_data[i].index + 1;
577	}
578
579	mutex_lock(&mcounters->mcntrs_mutex);
580	mcounters->counters_data = desc_data;
581	mcounters->cntrs_max_index = cntrs_max_index;
582	mutex_unlock(&mcounters->mcntrs_mutex);
583
584	return 0;
585}
586
587#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2))
588int mlx5_ib_flow_counters_set_data(struct ib_counters *ibcounters,
589				   struct mlx5_ib_create_flow *ucmd)
590{
591	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters);
592	struct mlx5_ib_flow_counters_data *cntrs_data = NULL;
593	struct mlx5_ib_flow_counters_desc *desc_data = NULL;
594	bool hw_hndl = false;
595	int ret = 0;
596
597	if (ucmd && ucmd->ncounters_data != 0) {
598		cntrs_data = ucmd->data;
599		if (cntrs_data->ncounters > MAX_COUNTERS_NUM)
600			return -EINVAL;
601
602		desc_data = kcalloc(cntrs_data->ncounters,
603				    sizeof(*desc_data),
604				    GFP_KERNEL);
605		if (!desc_data)
606			return  -ENOMEM;
607
608		if (copy_from_user(desc_data,
609				   u64_to_user_ptr(cntrs_data->counters_data),
610				   sizeof(*desc_data) * cntrs_data->ncounters)) {
611			ret = -EFAULT;
612			goto free;
613		}
614	}
615
616	if (!mcounters->hw_cntrs_hndl) {
617		mcounters->hw_cntrs_hndl = mlx5_fc_create(
618			to_mdev(ibcounters->device)->mdev, false);
619		if (IS_ERR(mcounters->hw_cntrs_hndl)) {
620			ret = PTR_ERR(mcounters->hw_cntrs_hndl);
621			goto free;
622		}
623		hw_hndl = true;
624	}
625
626	if (desc_data) {
627		/* counters already bound to at least one flow */
628		if (mcounters->cntrs_max_index) {
629			ret = -EINVAL;
630			goto free_hndl;
631		}
632
633		ret = counters_set_description(ibcounters,
634					       MLX5_IB_COUNTERS_FLOW,
635					       desc_data,
636					       cntrs_data->ncounters);
637		if (ret)
638			goto free_hndl;
639
640	} else if (!mcounters->cntrs_max_index) {
641		/* counters not bound yet, must have udata passed */
642		ret = -EINVAL;
643		goto free_hndl;
644	}
645
646	return 0;
647
648free_hndl:
649	if (hw_hndl) {
650		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev,
651				mcounters->hw_cntrs_hndl);
652		mcounters->hw_cntrs_hndl = NULL;
653	}
654free:
655	kfree(desc_data);
656	return ret;
657}
658
659void mlx5_ib_counters_clear_description(struct ib_counters *counters)
660{
661	struct mlx5_ib_mcounters *mcounters;
662
663	if (!counters || atomic_read(&counters->usecnt) != 1)
664		return;
665
666	mcounters = to_mcounters(counters);
667
668	mutex_lock(&mcounters->mcntrs_mutex);
669	kfree(mcounters->counters_data);
670	mcounters->counters_data = NULL;
671	mcounters->cntrs_max_index = 0;
672	mutex_unlock(&mcounters->mcntrs_mutex);
673}
674
675static const struct ib_device_ops hw_stats_ops = {
676	.alloc_hw_stats = mlx5_ib_alloc_hw_stats,
677	.get_hw_stats = mlx5_ib_get_hw_stats,
678	.counter_bind_qp = mlx5_ib_counter_bind_qp,
679	.counter_unbind_qp = mlx5_ib_counter_unbind_qp,
680	.counter_dealloc = mlx5_ib_counter_dealloc,
681	.counter_alloc_stats = mlx5_ib_counter_alloc_stats,
682	.counter_update_stats = mlx5_ib_counter_update_stats,
683};
684
685static const struct ib_device_ops counters_ops = {
686	.create_counters = mlx5_ib_create_counters,
687	.destroy_counters = mlx5_ib_destroy_counters,
688	.read_counters = mlx5_ib_read_counters,
689
690	INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs),
691};
692
693int mlx5_ib_counters_init(struct mlx5_ib_dev *dev)
694{
695	ib_set_device_ops(&dev->ib_dev, &counters_ops);
696
697	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
698		return 0;
699
700	ib_set_device_ops(&dev->ib_dev, &hw_stats_ops);
701	return mlx5_ib_alloc_counters(dev);
702}
703
704void mlx5_ib_counters_cleanup(struct mlx5_ib_dev *dev)
705{
706	if (!MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
707		return;
708
709	mlx5_ib_dealloc_counters(dev);
710}
711