1/*
2 * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3 * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4 * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5 * Copyright (c) 2009 HNR Consulting. All rights reserved.
6 * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
7 *
8 * This software is available to you under a choice of one of two
9 * licenses.  You may choose to be licensed under the terms of the GNU
10 * General Public License (GPL) Version 2, available from the file
11 * COPYING in the main directory of this source tree, or the
12 * OpenIB.org BSD license below:
13 *
14 *     Redistribution and use in source and binary forms, with or
15 *     without modification, are permitted provided that the following
16 *     conditions are met:
17 *
18 *      - Redistributions of source code must retain the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer.
21 *
22 *      - Redistributions in binary form must reproduce the above
23 *        copyright notice, this list of conditions and the following
24 *        disclaimer in the documentation and/or other materials
25 *        provided with the distribution.
26 *
27 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 * SOFTWARE.
35 *
36 */
37
38#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
39
40#include <linux/dma-mapping.h>
41#include <linux/slab.h>
42#include <linux/module.h>
43#include <linux/security.h>
44#include <linux/xarray.h>
45#include <rdma/ib_cache.h>
46
47#include "mad_priv.h"
48#include "core_priv.h"
49#include "mad_rmpp.h"
50#include "smi.h"
51#include "opa_smi.h"
52#include "agent.h"
53
54#define CREATE_TRACE_POINTS
55#include <trace/events/ib_mad.h>
56
57#ifdef CONFIG_TRACEPOINTS
58static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
59			  struct ib_mad_qp_info *qp_info,
60			  struct trace_event_raw_ib_mad_send_template *entry)
61{
62	u16 pkey;
63	struct ib_device *dev = qp_info->port_priv->device;
64	u8 pnum = qp_info->port_priv->port_num;
65	struct ib_ud_wr *wr = &mad_send_wr->send_wr;
66	struct rdma_ah_attr attr = {};
67
68	rdma_query_ah(wr->ah, &attr);
69
70	/* These are common */
71	entry->sl = attr.sl;
72	ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
73	entry->pkey = pkey;
74	entry->rqpn = wr->remote_qpn;
75	entry->rqkey = wr->remote_qkey;
76	entry->dlid = rdma_ah_get_dlid(&attr);
77}
78#endif
79
80static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
81static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
82
83module_param_named(send_queue_size, mad_sendq_size, int, 0444);
84MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
85module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
86MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
87
88static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
89static u32 ib_mad_client_next;
90static struct list_head ib_mad_port_list;
91
92/* Port list lock */
93static DEFINE_SPINLOCK(ib_mad_port_list_lock);
94
95/* Forward declarations */
96static int method_in_use(struct ib_mad_mgmt_method_table **method,
97			 struct ib_mad_reg_req *mad_reg_req);
98static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
99static struct ib_mad_agent_private *find_mad_agent(
100					struct ib_mad_port_private *port_priv,
101					const struct ib_mad_hdr *mad);
102static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
103				    struct ib_mad_private *mad);
104static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
105static void timeout_sends(struct work_struct *work);
106static void local_completions(struct work_struct *work);
107static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
108			      struct ib_mad_agent_private *agent_priv,
109			      u8 mgmt_class);
110static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
111			   struct ib_mad_agent_private *agent_priv);
112static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
113			      struct ib_wc *wc);
114static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
115
116/*
117 * Returns a ib_mad_port_private structure or NULL for a device/port
118 * Assumes ib_mad_port_list_lock is being held
119 */
120static inline struct ib_mad_port_private *
121__ib_get_mad_port(struct ib_device *device, int port_num)
122{
123	struct ib_mad_port_private *entry;
124
125	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
126		if (entry->device == device && entry->port_num == port_num)
127			return entry;
128	}
129	return NULL;
130}
131
132/*
133 * Wrapper function to return a ib_mad_port_private structure or NULL
134 * for a device/port
135 */
136static inline struct ib_mad_port_private *
137ib_get_mad_port(struct ib_device *device, int port_num)
138{
139	struct ib_mad_port_private *entry;
140	unsigned long flags;
141
142	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
143	entry = __ib_get_mad_port(device, port_num);
144	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
145
146	return entry;
147}
148
149static inline u8 convert_mgmt_class(u8 mgmt_class)
150{
151	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
152	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
153		0 : mgmt_class;
154}
155
156static int get_spl_qp_index(enum ib_qp_type qp_type)
157{
158	switch (qp_type)
159	{
160	case IB_QPT_SMI:
161		return 0;
162	case IB_QPT_GSI:
163		return 1;
164	default:
165		return -1;
166	}
167}
168
169static int vendor_class_index(u8 mgmt_class)
170{
171	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
172}
173
174static int is_vendor_class(u8 mgmt_class)
175{
176	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
177	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
178		return 0;
179	return 1;
180}
181
182static int is_vendor_oui(char *oui)
183{
184	if (oui[0] || oui[1] || oui[2])
185		return 1;
186	return 0;
187}
188
189static int is_vendor_method_in_use(
190		struct ib_mad_mgmt_vendor_class *vendor_class,
191		struct ib_mad_reg_req *mad_reg_req)
192{
193	struct ib_mad_mgmt_method_table *method;
194	int i;
195
196	for (i = 0; i < MAX_MGMT_OUI; i++) {
197		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
198			method = vendor_class->method_table[i];
199			if (method) {
200				if (method_in_use(&method, mad_reg_req))
201					return 1;
202				else
203					break;
204			}
205		}
206	}
207	return 0;
208}
209
210int ib_response_mad(const struct ib_mad_hdr *hdr)
211{
212	return ((hdr->method & IB_MGMT_METHOD_RESP) ||
213		(hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) ||
214		((hdr->mgmt_class == IB_MGMT_CLASS_BM) &&
215		 (hdr->attr_mod & IB_BM_ATTR_MOD_RESP)));
216}
217EXPORT_SYMBOL(ib_response_mad);
218
219/*
220 * ib_register_mad_agent - Register to send/receive MADs
221 *
222 * Context: Process context.
223 */
224struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
225					   u8 port_num,
226					   enum ib_qp_type qp_type,
227					   struct ib_mad_reg_req *mad_reg_req,
228					   u8 rmpp_version,
229					   ib_mad_send_handler send_handler,
230					   ib_mad_recv_handler recv_handler,
231					   void *context,
232					   u32 registration_flags)
233{
234	struct ib_mad_port_private *port_priv;
235	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
236	struct ib_mad_agent_private *mad_agent_priv;
237	struct ib_mad_reg_req *reg_req = NULL;
238	struct ib_mad_mgmt_class_table *class;
239	struct ib_mad_mgmt_vendor_class_table *vendor;
240	struct ib_mad_mgmt_vendor_class *vendor_class;
241	struct ib_mad_mgmt_method_table *method;
242	int ret2, qpn;
243	u8 mgmt_class, vclass;
244
245	if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
246	    (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
247		return ERR_PTR(-EPROTONOSUPPORT);
248
249	/* Validate parameters */
250	qpn = get_spl_qp_index(qp_type);
251	if (qpn == -1) {
252		dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
253				    __func__, qp_type);
254		goto error1;
255	}
256
257	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
258		dev_dbg_ratelimited(&device->dev,
259				    "%s: invalid RMPP Version %u\n",
260				    __func__, rmpp_version);
261		goto error1;
262	}
263
264	/* Validate MAD registration request if supplied */
265	if (mad_reg_req) {
266		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
267			dev_dbg_ratelimited(&device->dev,
268					    "%s: invalid Class Version %u\n",
269					    __func__,
270					    mad_reg_req->mgmt_class_version);
271			goto error1;
272		}
273		if (!recv_handler) {
274			dev_dbg_ratelimited(&device->dev,
275					    "%s: no recv_handler\n", __func__);
276			goto error1;
277		}
278		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
279			/*
280			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
281			 * one in this range currently allowed
282			 */
283			if (mad_reg_req->mgmt_class !=
284			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
285				dev_dbg_ratelimited(&device->dev,
286					"%s: Invalid Mgmt Class 0x%x\n",
287					__func__, mad_reg_req->mgmt_class);
288				goto error1;
289			}
290		} else if (mad_reg_req->mgmt_class == 0) {
291			/*
292			 * Class 0 is reserved in IBA and is used for
293			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
294			 */
295			dev_dbg_ratelimited(&device->dev,
296					    "%s: Invalid Mgmt Class 0\n",
297					    __func__);
298			goto error1;
299		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
300			/*
301			 * If class is in "new" vendor range,
302			 * ensure supplied OUI is not zero
303			 */
304			if (!is_vendor_oui(mad_reg_req->oui)) {
305				dev_dbg_ratelimited(&device->dev,
306					"%s: No OUI specified for class 0x%x\n",
307					__func__,
308					mad_reg_req->mgmt_class);
309				goto error1;
310			}
311		}
312		/* Make sure class supplied is consistent with RMPP */
313		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
314			if (rmpp_version) {
315				dev_dbg_ratelimited(&device->dev,
316					"%s: RMPP version for non-RMPP class 0x%x\n",
317					__func__, mad_reg_req->mgmt_class);
318				goto error1;
319			}
320		}
321
322		/* Make sure class supplied is consistent with QP type */
323		if (qp_type == IB_QPT_SMI) {
324			if ((mad_reg_req->mgmt_class !=
325					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
326			    (mad_reg_req->mgmt_class !=
327					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
328				dev_dbg_ratelimited(&device->dev,
329					"%s: Invalid SM QP type: class 0x%x\n",
330					__func__, mad_reg_req->mgmt_class);
331				goto error1;
332			}
333		} else {
334			if ((mad_reg_req->mgmt_class ==
335					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
336			    (mad_reg_req->mgmt_class ==
337					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
338				dev_dbg_ratelimited(&device->dev,
339					"%s: Invalid GS QP type: class 0x%x\n",
340					__func__, mad_reg_req->mgmt_class);
341				goto error1;
342			}
343		}
344	} else {
345		/* No registration request supplied */
346		if (!send_handler)
347			goto error1;
348		if (registration_flags & IB_MAD_USER_RMPP)
349			goto error1;
350	}
351
352	/* Validate device and port */
353	port_priv = ib_get_mad_port(device, port_num);
354	if (!port_priv) {
355		dev_dbg_ratelimited(&device->dev, "%s: Invalid port %d\n",
356				    __func__, port_num);
357		ret = ERR_PTR(-ENODEV);
358		goto error1;
359	}
360
361	/* Verify the QP requested is supported. For example, Ethernet devices
362	 * will not have QP0.
363	 */
364	if (!port_priv->qp_info[qpn].qp) {
365		dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
366				    __func__, qpn);
367		ret = ERR_PTR(-EPROTONOSUPPORT);
368		goto error1;
369	}
370
371	/* Allocate structures */
372	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
373	if (!mad_agent_priv) {
374		ret = ERR_PTR(-ENOMEM);
375		goto error1;
376	}
377
378	if (mad_reg_req) {
379		reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
380		if (!reg_req) {
381			ret = ERR_PTR(-ENOMEM);
382			goto error3;
383		}
384	}
385
386	/* Now, fill in the various structures */
387	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
388	mad_agent_priv->reg_req = reg_req;
389	mad_agent_priv->agent.rmpp_version = rmpp_version;
390	mad_agent_priv->agent.device = device;
391	mad_agent_priv->agent.recv_handler = recv_handler;
392	mad_agent_priv->agent.send_handler = send_handler;
393	mad_agent_priv->agent.context = context;
394	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
395	mad_agent_priv->agent.port_num = port_num;
396	mad_agent_priv->agent.flags = registration_flags;
397	spin_lock_init(&mad_agent_priv->lock);
398	INIT_LIST_HEAD(&mad_agent_priv->send_list);
399	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
400	INIT_LIST_HEAD(&mad_agent_priv->done_list);
401	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
402	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
403	INIT_LIST_HEAD(&mad_agent_priv->local_list);
404	INIT_WORK(&mad_agent_priv->local_work, local_completions);
405	refcount_set(&mad_agent_priv->refcount, 1);
406	init_completion(&mad_agent_priv->comp);
407
408	ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
409	if (ret2) {
410		ret = ERR_PTR(ret2);
411		goto error4;
412	}
413
414	/*
415	 * The mlx4 driver uses the top byte to distinguish which virtual
416	 * function generated the MAD, so we must avoid using it.
417	 */
418	ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid,
419			mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1),
420			&ib_mad_client_next, GFP_KERNEL);
421	if (ret2 < 0) {
422		ret = ERR_PTR(ret2);
423		goto error5;
424	}
425
426	/*
427	 * Make sure MAD registration (if supplied)
428	 * is non overlapping with any existing ones
429	 */
430	spin_lock_irq(&port_priv->reg_lock);
431	if (mad_reg_req) {
432		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
433		if (!is_vendor_class(mgmt_class)) {
434			class = port_priv->version[mad_reg_req->
435						   mgmt_class_version].class;
436			if (class) {
437				method = class->method_table[mgmt_class];
438				if (method) {
439					if (method_in_use(&method,
440							   mad_reg_req))
441						goto error6;
442				}
443			}
444			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
445						  mgmt_class);
446		} else {
447			/* "New" vendor class range */
448			vendor = port_priv->version[mad_reg_req->
449						    mgmt_class_version].vendor;
450			if (vendor) {
451				vclass = vendor_class_index(mgmt_class);
452				vendor_class = vendor->vendor_class[vclass];
453				if (vendor_class) {
454					if (is_vendor_method_in_use(
455							vendor_class,
456							mad_reg_req))
457						goto error6;
458				}
459			}
460			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
461		}
462		if (ret2) {
463			ret = ERR_PTR(ret2);
464			goto error6;
465		}
466	}
467	spin_unlock_irq(&port_priv->reg_lock);
468
469	trace_ib_mad_create_agent(mad_agent_priv);
470	return &mad_agent_priv->agent;
471error6:
472	spin_unlock_irq(&port_priv->reg_lock);
473	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
474error5:
475	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
476error4:
477	kfree(reg_req);
478error3:
479	kfree(mad_agent_priv);
480error1:
481	return ret;
482}
483EXPORT_SYMBOL(ib_register_mad_agent);
484
485static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
486{
487	if (refcount_dec_and_test(&mad_agent_priv->refcount))
488		complete(&mad_agent_priv->comp);
489}
490
491static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
492{
493	struct ib_mad_port_private *port_priv;
494
495	/* Note that we could still be handling received MADs */
496	trace_ib_mad_unregister_agent(mad_agent_priv);
497
498	/*
499	 * Canceling all sends results in dropping received response
500	 * MADs, preventing us from queuing additional work
501	 */
502	cancel_mads(mad_agent_priv);
503	port_priv = mad_agent_priv->qp_info->port_priv;
504	cancel_delayed_work(&mad_agent_priv->timed_work);
505
506	spin_lock_irq(&port_priv->reg_lock);
507	remove_mad_reg_req(mad_agent_priv);
508	spin_unlock_irq(&port_priv->reg_lock);
509	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
510
511	flush_workqueue(port_priv->wq);
512
513	deref_mad_agent(mad_agent_priv);
514	wait_for_completion(&mad_agent_priv->comp);
515	ib_cancel_rmpp_recvs(mad_agent_priv);
516
517	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
518
519	kfree(mad_agent_priv->reg_req);
520	kfree_rcu(mad_agent_priv, rcu);
521}
522
523/*
524 * ib_unregister_mad_agent - Unregisters a client from using MAD services
525 *
526 * Context: Process context.
527 */
528void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
529{
530	struct ib_mad_agent_private *mad_agent_priv;
531
532	mad_agent_priv = container_of(mad_agent,
533				      struct ib_mad_agent_private,
534				      agent);
535	unregister_mad_agent(mad_agent_priv);
536}
537EXPORT_SYMBOL(ib_unregister_mad_agent);
538
539static void dequeue_mad(struct ib_mad_list_head *mad_list)
540{
541	struct ib_mad_queue *mad_queue;
542	unsigned long flags;
543
544	mad_queue = mad_list->mad_queue;
545	spin_lock_irqsave(&mad_queue->lock, flags);
546	list_del(&mad_list->list);
547	mad_queue->count--;
548	spin_unlock_irqrestore(&mad_queue->lock, flags);
549}
550
551static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
552		u16 pkey_index, u8 port_num, struct ib_wc *wc)
553{
554	memset(wc, 0, sizeof *wc);
555	wc->wr_cqe = cqe;
556	wc->status = IB_WC_SUCCESS;
557	wc->opcode = IB_WC_RECV;
558	wc->pkey_index = pkey_index;
559	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
560	wc->src_qp = IB_QP0;
561	wc->qp = qp;
562	wc->slid = slid;
563	wc->sl = 0;
564	wc->dlid_path_bits = 0;
565	wc->port_num = port_num;
566}
567
568static size_t mad_priv_size(const struct ib_mad_private *mp)
569{
570	return sizeof(struct ib_mad_private) + mp->mad_size;
571}
572
573static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags)
574{
575	size_t size = sizeof(struct ib_mad_private) + mad_size;
576	struct ib_mad_private *ret = kzalloc(size, flags);
577
578	if (ret)
579		ret->mad_size = mad_size;
580
581	return ret;
582}
583
584static size_t port_mad_size(const struct ib_mad_port_private *port_priv)
585{
586	return rdma_max_mad_size(port_priv->device, port_priv->port_num);
587}
588
589static size_t mad_priv_dma_size(const struct ib_mad_private *mp)
590{
591	return sizeof(struct ib_grh) + mp->mad_size;
592}
593
594/*
595 * Return 0 if SMP is to be sent
596 * Return 1 if SMP was consumed locally (whether or not solicited)
597 * Return < 0 if error
598 */
599static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
600				  struct ib_mad_send_wr_private *mad_send_wr)
601{
602	int ret = 0;
603	struct ib_smp *smp = mad_send_wr->send_buf.mad;
604	struct opa_smp *opa_smp = (struct opa_smp *)smp;
605	unsigned long flags;
606	struct ib_mad_local_private *local;
607	struct ib_mad_private *mad_priv;
608	struct ib_mad_port_private *port_priv;
609	struct ib_mad_agent_private *recv_mad_agent = NULL;
610	struct ib_device *device = mad_agent_priv->agent.device;
611	u8 port_num;
612	struct ib_wc mad_wc;
613	struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
614	size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
615	u16 out_mad_pkey_index = 0;
616	u16 drslid;
617	bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
618				    mad_agent_priv->qp_info->port_priv->port_num);
619
620	if (rdma_cap_ib_switch(device) &&
621	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
622		port_num = send_wr->port_num;
623	else
624		port_num = mad_agent_priv->agent.port_num;
625
626	/*
627	 * Directed route handling starts if the initial LID routed part of
628	 * a request or the ending LID routed part of a response is empty.
629	 * If we are at the start of the LID routed part, don't update the
630	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
631	 */
632	if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
633		u32 opa_drslid;
634
635		trace_ib_mad_handle_out_opa_smi(opa_smp);
636
637		if ((opa_get_smp_direction(opa_smp)
638		     ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) ==
639		     OPA_LID_PERMISSIVE &&
640		     opa_smi_handle_dr_smp_send(opa_smp,
641						rdma_cap_ib_switch(device),
642						port_num) == IB_SMI_DISCARD) {
643			ret = -EINVAL;
644			dev_err(&device->dev, "OPA Invalid directed route\n");
645			goto out;
646		}
647		opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid);
648		if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) &&
649		    opa_drslid & 0xffff0000) {
650			ret = -EINVAL;
651			dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n",
652			       opa_drslid);
653			goto out;
654		}
655		drslid = (u16)(opa_drslid & 0x0000ffff);
656
657		/* Check to post send on QP or process locally */
658		if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD &&
659		    opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD)
660			goto out;
661	} else {
662		trace_ib_mad_handle_out_ib_smi(smp);
663
664		if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
665		     IB_LID_PERMISSIVE &&
666		     smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) ==
667		     IB_SMI_DISCARD) {
668			ret = -EINVAL;
669			dev_err(&device->dev, "Invalid directed route\n");
670			goto out;
671		}
672		drslid = be16_to_cpu(smp->dr_slid);
673
674		/* Check to post send on QP or process locally */
675		if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
676		    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
677			goto out;
678	}
679
680	local = kmalloc(sizeof *local, GFP_ATOMIC);
681	if (!local) {
682		ret = -ENOMEM;
683		goto out;
684	}
685	local->mad_priv = NULL;
686	local->recv_mad_agent = NULL;
687	mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
688	if (!mad_priv) {
689		ret = -ENOMEM;
690		kfree(local);
691		goto out;
692	}
693
694	build_smp_wc(mad_agent_priv->agent.qp,
695		     send_wr->wr.wr_cqe, drslid,
696		     send_wr->pkey_index,
697		     send_wr->port_num, &mad_wc);
698
699	if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
700		mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
701					+ mad_send_wr->send_buf.data_len
702					+ sizeof(struct ib_grh);
703	}
704
705	/* No GRH for DR SMP */
706	ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
707				      (const struct ib_mad *)smp,
708				      (struct ib_mad *)mad_priv->mad, &mad_size,
709				      &out_mad_pkey_index);
710	switch (ret)
711	{
712	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
713		if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
714		    mad_agent_priv->agent.recv_handler) {
715			local->mad_priv = mad_priv;
716			local->recv_mad_agent = mad_agent_priv;
717			/*
718			 * Reference MAD agent until receive
719			 * side of local completion handled
720			 */
721			refcount_inc(&mad_agent_priv->refcount);
722		} else
723			kfree(mad_priv);
724		break;
725	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
726		kfree(mad_priv);
727		break;
728	case IB_MAD_RESULT_SUCCESS:
729		/* Treat like an incoming receive MAD */
730		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
731					    mad_agent_priv->agent.port_num);
732		if (port_priv) {
733			memcpy(mad_priv->mad, smp, mad_priv->mad_size);
734			recv_mad_agent = find_mad_agent(port_priv,
735						        (const struct ib_mad_hdr *)mad_priv->mad);
736		}
737		if (!port_priv || !recv_mad_agent) {
738			/*
739			 * No receiving agent so drop packet and
740			 * generate send completion.
741			 */
742			kfree(mad_priv);
743			break;
744		}
745		local->mad_priv = mad_priv;
746		local->recv_mad_agent = recv_mad_agent;
747		break;
748	default:
749		kfree(mad_priv);
750		kfree(local);
751		ret = -EINVAL;
752		goto out;
753	}
754
755	local->mad_send_wr = mad_send_wr;
756	if (opa) {
757		local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
758		local->return_wc_byte_len = mad_size;
759	}
760	/* Reference MAD agent until send side of local completion handled */
761	refcount_inc(&mad_agent_priv->refcount);
762	/* Queue local completion to local list */
763	spin_lock_irqsave(&mad_agent_priv->lock, flags);
764	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
765	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
766	queue_work(mad_agent_priv->qp_info->port_priv->wq,
767		   &mad_agent_priv->local_work);
768	ret = 1;
769out:
770	return ret;
771}
772
773static int get_pad_size(int hdr_len, int data_len, size_t mad_size)
774{
775	int seg_size, pad;
776
777	seg_size = mad_size - hdr_len;
778	if (data_len && seg_size) {
779		pad = seg_size - data_len % seg_size;
780		return pad == seg_size ? 0 : pad;
781	} else
782		return seg_size;
783}
784
785static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
786{
787	struct ib_rmpp_segment *s, *t;
788
789	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
790		list_del(&s->list);
791		kfree(s);
792	}
793}
794
795static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
796				size_t mad_size, gfp_t gfp_mask)
797{
798	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
799	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
800	struct ib_rmpp_segment *seg = NULL;
801	int left, seg_size, pad;
802
803	send_buf->seg_size = mad_size - send_buf->hdr_len;
804	send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR;
805	seg_size = send_buf->seg_size;
806	pad = send_wr->pad;
807
808	/* Allocate data segments. */
809	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
810		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
811		if (!seg) {
812			free_send_rmpp_list(send_wr);
813			return -ENOMEM;
814		}
815		seg->num = ++send_buf->seg_count;
816		list_add_tail(&seg->list, &send_wr->rmpp_list);
817	}
818
819	/* Zero any padding */
820	if (pad)
821		memset(seg->data + seg_size - pad, 0, pad);
822
823	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
824					  agent.rmpp_version;
825	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
826	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
827
828	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
829					struct ib_rmpp_segment, list);
830	send_wr->last_ack_seg = send_wr->cur_seg;
831	return 0;
832}
833
834int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
835{
836	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
837}
838EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
839
840struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
841					    u32 remote_qpn, u16 pkey_index,
842					    int rmpp_active,
843					    int hdr_len, int data_len,
844					    gfp_t gfp_mask,
845					    u8 base_version)
846{
847	struct ib_mad_agent_private *mad_agent_priv;
848	struct ib_mad_send_wr_private *mad_send_wr;
849	int pad, message_size, ret, size;
850	void *buf;
851	size_t mad_size;
852	bool opa;
853
854	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
855				      agent);
856
857	opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num);
858
859	if (opa && base_version == OPA_MGMT_BASE_VERSION)
860		mad_size = sizeof(struct opa_mad);
861	else
862		mad_size = sizeof(struct ib_mad);
863
864	pad = get_pad_size(hdr_len, data_len, mad_size);
865	message_size = hdr_len + data_len + pad;
866
867	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
868		if (!rmpp_active && message_size > mad_size)
869			return ERR_PTR(-EINVAL);
870	} else
871		if (rmpp_active || message_size > mad_size)
872			return ERR_PTR(-EINVAL);
873
874	size = rmpp_active ? hdr_len : mad_size;
875	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
876	if (!buf)
877		return ERR_PTR(-ENOMEM);
878
879	mad_send_wr = buf + size;
880	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
881	mad_send_wr->send_buf.mad = buf;
882	mad_send_wr->send_buf.hdr_len = hdr_len;
883	mad_send_wr->send_buf.data_len = data_len;
884	mad_send_wr->pad = pad;
885
886	mad_send_wr->mad_agent_priv = mad_agent_priv;
887	mad_send_wr->sg_list[0].length = hdr_len;
888	mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey;
889
890	/* OPA MADs don't have to be the full 2048 bytes */
891	if (opa && base_version == OPA_MGMT_BASE_VERSION &&
892	    data_len < mad_size - hdr_len)
893		mad_send_wr->sg_list[1].length = data_len;
894	else
895		mad_send_wr->sg_list[1].length = mad_size - hdr_len;
896
897	mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
898
899	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
900
901	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
902	mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
903	mad_send_wr->send_wr.wr.num_sge = 2;
904	mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
905	mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
906	mad_send_wr->send_wr.remote_qpn = remote_qpn;
907	mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
908	mad_send_wr->send_wr.pkey_index = pkey_index;
909
910	if (rmpp_active) {
911		ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
912		if (ret) {
913			kfree(buf);
914			return ERR_PTR(ret);
915		}
916	}
917
918	mad_send_wr->send_buf.mad_agent = mad_agent;
919	refcount_inc(&mad_agent_priv->refcount);
920	return &mad_send_wr->send_buf;
921}
922EXPORT_SYMBOL(ib_create_send_mad);
923
924int ib_get_mad_data_offset(u8 mgmt_class)
925{
926	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
927		return IB_MGMT_SA_HDR;
928	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
929		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
930		 (mgmt_class == IB_MGMT_CLASS_BIS))
931		return IB_MGMT_DEVICE_HDR;
932	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
933		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
934		return IB_MGMT_VENDOR_HDR;
935	else
936		return IB_MGMT_MAD_HDR;
937}
938EXPORT_SYMBOL(ib_get_mad_data_offset);
939
940int ib_is_mad_class_rmpp(u8 mgmt_class)
941{
942	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
943	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
944	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
945	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
946	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
947	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
948		return 1;
949	return 0;
950}
951EXPORT_SYMBOL(ib_is_mad_class_rmpp);
952
953void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
954{
955	struct ib_mad_send_wr_private *mad_send_wr;
956	struct list_head *list;
957
958	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
959				   send_buf);
960	list = &mad_send_wr->cur_seg->list;
961
962	if (mad_send_wr->cur_seg->num < seg_num) {
963		list_for_each_entry(mad_send_wr->cur_seg, list, list)
964			if (mad_send_wr->cur_seg->num == seg_num)
965				break;
966	} else if (mad_send_wr->cur_seg->num > seg_num) {
967		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
968			if (mad_send_wr->cur_seg->num == seg_num)
969				break;
970	}
971	return mad_send_wr->cur_seg->data;
972}
973EXPORT_SYMBOL(ib_get_rmpp_segment);
974
975static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
976{
977	if (mad_send_wr->send_buf.seg_count)
978		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
979					   mad_send_wr->seg_num);
980	else
981		return mad_send_wr->send_buf.mad +
982		       mad_send_wr->send_buf.hdr_len;
983}
984
985void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
986{
987	struct ib_mad_agent_private *mad_agent_priv;
988	struct ib_mad_send_wr_private *mad_send_wr;
989
990	mad_agent_priv = container_of(send_buf->mad_agent,
991				      struct ib_mad_agent_private, agent);
992	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
993				   send_buf);
994
995	free_send_rmpp_list(mad_send_wr);
996	kfree(send_buf->mad);
997	deref_mad_agent(mad_agent_priv);
998}
999EXPORT_SYMBOL(ib_free_send_mad);
1000
1001int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
1002{
1003	struct ib_mad_qp_info *qp_info;
1004	struct list_head *list;
1005	struct ib_mad_agent *mad_agent;
1006	struct ib_sge *sge;
1007	unsigned long flags;
1008	int ret;
1009
1010	/* Set WR ID to find mad_send_wr upon completion */
1011	qp_info = mad_send_wr->mad_agent_priv->qp_info;
1012	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1013	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
1014	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
1015
1016	mad_agent = mad_send_wr->send_buf.mad_agent;
1017	sge = mad_send_wr->sg_list;
1018	sge[0].addr = ib_dma_map_single(mad_agent->device,
1019					mad_send_wr->send_buf.mad,
1020					sge[0].length,
1021					DMA_TO_DEVICE);
1022	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
1023		return -ENOMEM;
1024
1025	mad_send_wr->header_mapping = sge[0].addr;
1026
1027	sge[1].addr = ib_dma_map_single(mad_agent->device,
1028					ib_get_payload(mad_send_wr),
1029					sge[1].length,
1030					DMA_TO_DEVICE);
1031	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
1032		ib_dma_unmap_single(mad_agent->device,
1033				    mad_send_wr->header_mapping,
1034				    sge[0].length, DMA_TO_DEVICE);
1035		return -ENOMEM;
1036	}
1037	mad_send_wr->payload_mapping = sge[1].addr;
1038
1039	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1040	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1041		trace_ib_mad_ib_send_mad(mad_send_wr, qp_info);
1042		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
1043				   NULL);
1044		list = &qp_info->send_queue.list;
1045	} else {
1046		ret = 0;
1047		list = &qp_info->overflow_list;
1048	}
1049
1050	if (!ret) {
1051		qp_info->send_queue.count++;
1052		list_add_tail(&mad_send_wr->mad_list.list, list);
1053	}
1054	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1055	if (ret) {
1056		ib_dma_unmap_single(mad_agent->device,
1057				    mad_send_wr->header_mapping,
1058				    sge[0].length, DMA_TO_DEVICE);
1059		ib_dma_unmap_single(mad_agent->device,
1060				    mad_send_wr->payload_mapping,
1061				    sge[1].length, DMA_TO_DEVICE);
1062	}
1063	return ret;
1064}
1065
1066/*
1067 * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
1068 *  with the registered client
1069 */
1070int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1071		     struct ib_mad_send_buf **bad_send_buf)
1072{
1073	struct ib_mad_agent_private *mad_agent_priv;
1074	struct ib_mad_send_buf *next_send_buf;
1075	struct ib_mad_send_wr_private *mad_send_wr;
1076	unsigned long flags;
1077	int ret = -EINVAL;
1078
1079	/* Walk list of send WRs and post each on send list */
1080	for (; send_buf; send_buf = next_send_buf) {
1081		mad_send_wr = container_of(send_buf,
1082					   struct ib_mad_send_wr_private,
1083					   send_buf);
1084		mad_agent_priv = mad_send_wr->mad_agent_priv;
1085
1086		ret = ib_mad_enforce_security(mad_agent_priv,
1087					      mad_send_wr->send_wr.pkey_index);
1088		if (ret)
1089			goto error;
1090
1091		if (!send_buf->mad_agent->send_handler ||
1092		    (send_buf->timeout_ms &&
1093		     !send_buf->mad_agent->recv_handler)) {
1094			ret = -EINVAL;
1095			goto error;
1096		}
1097
1098		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1099			if (mad_agent_priv->agent.rmpp_version) {
1100				ret = -EINVAL;
1101				goto error;
1102			}
1103		}
1104
1105		/*
1106		 * Save pointer to next work request to post in case the
1107		 * current one completes, and the user modifies the work
1108		 * request associated with the completion
1109		 */
1110		next_send_buf = send_buf->next;
1111		mad_send_wr->send_wr.ah = send_buf->ah;
1112
1113		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1114		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1115			ret = handle_outgoing_dr_smp(mad_agent_priv,
1116						     mad_send_wr);
1117			if (ret < 0)		/* error */
1118				goto error;
1119			else if (ret == 1)	/* locally consumed */
1120				continue;
1121		}
1122
1123		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1124		/* Timeout will be updated after send completes */
1125		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1126		mad_send_wr->max_retries = send_buf->retries;
1127		mad_send_wr->retries_left = send_buf->retries;
1128		send_buf->retries = 0;
1129		/* Reference for work request to QP + response */
1130		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1131		mad_send_wr->status = IB_WC_SUCCESS;
1132
1133		/* Reference MAD agent until send completes */
1134		refcount_inc(&mad_agent_priv->refcount);
1135		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1136		list_add_tail(&mad_send_wr->agent_list,
1137			      &mad_agent_priv->send_list);
1138		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1139
1140		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1141			ret = ib_send_rmpp_mad(mad_send_wr);
1142			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1143				ret = ib_send_mad(mad_send_wr);
1144		} else
1145			ret = ib_send_mad(mad_send_wr);
1146		if (ret < 0) {
1147			/* Fail send request */
1148			spin_lock_irqsave(&mad_agent_priv->lock, flags);
1149			list_del(&mad_send_wr->agent_list);
1150			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1151			deref_mad_agent(mad_agent_priv);
1152			goto error;
1153		}
1154	}
1155	return 0;
1156error:
1157	if (bad_send_buf)
1158		*bad_send_buf = send_buf;
1159	return ret;
1160}
1161EXPORT_SYMBOL(ib_post_send_mad);
1162
1163/*
1164 * ib_free_recv_mad - Returns data buffers used to receive
1165 *  a MAD to the access layer
1166 */
1167void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1168{
1169	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1170	struct ib_mad_private_header *mad_priv_hdr;
1171	struct ib_mad_private *priv;
1172	struct list_head free_list;
1173
1174	INIT_LIST_HEAD(&free_list);
1175	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1176
1177	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1178					&free_list, list) {
1179		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1180					   recv_buf);
1181		mad_priv_hdr = container_of(mad_recv_wc,
1182					    struct ib_mad_private_header,
1183					    recv_wc);
1184		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1185				    header);
1186		kfree(priv);
1187	}
1188}
1189EXPORT_SYMBOL(ib_free_recv_mad);
1190
1191static int method_in_use(struct ib_mad_mgmt_method_table **method,
1192			 struct ib_mad_reg_req *mad_reg_req)
1193{
1194	int i;
1195
1196	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
1197		if ((*method)->agent[i]) {
1198			pr_err("Method %d already in use\n", i);
1199			return -EINVAL;
1200		}
1201	}
1202	return 0;
1203}
1204
1205static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1206{
1207	/* Allocate management method table */
1208	*method = kzalloc(sizeof **method, GFP_ATOMIC);
1209	return (*method) ? 0 : (-ENOMEM);
1210}
1211
1212/*
1213 * Check to see if there are any methods still in use
1214 */
1215static int check_method_table(struct ib_mad_mgmt_method_table *method)
1216{
1217	int i;
1218
1219	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1220		if (method->agent[i])
1221			return 1;
1222	return 0;
1223}
1224
1225/*
1226 * Check to see if there are any method tables for this class still in use
1227 */
1228static int check_class_table(struct ib_mad_mgmt_class_table *class)
1229{
1230	int i;
1231
1232	for (i = 0; i < MAX_MGMT_CLASS; i++)
1233		if (class->method_table[i])
1234			return 1;
1235	return 0;
1236}
1237
1238static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1239{
1240	int i;
1241
1242	for (i = 0; i < MAX_MGMT_OUI; i++)
1243		if (vendor_class->method_table[i])
1244			return 1;
1245	return 0;
1246}
1247
1248static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1249			   const char *oui)
1250{
1251	int i;
1252
1253	for (i = 0; i < MAX_MGMT_OUI; i++)
1254		/* Is there matching OUI for this vendor class ? */
1255		if (!memcmp(vendor_class->oui[i], oui, 3))
1256			return i;
1257
1258	return -1;
1259}
1260
1261static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1262{
1263	int i;
1264
1265	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1266		if (vendor->vendor_class[i])
1267			return 1;
1268
1269	return 0;
1270}
1271
1272static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1273				     struct ib_mad_agent_private *agent)
1274{
1275	int i;
1276
1277	/* Remove any methods for this mad agent */
1278	for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
1279		if (method->agent[i] == agent) {
1280			method->agent[i] = NULL;
1281		}
1282	}
1283}
1284
1285static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1286			      struct ib_mad_agent_private *agent_priv,
1287			      u8 mgmt_class)
1288{
1289	struct ib_mad_port_private *port_priv;
1290	struct ib_mad_mgmt_class_table **class;
1291	struct ib_mad_mgmt_method_table **method;
1292	int i, ret;
1293
1294	port_priv = agent_priv->qp_info->port_priv;
1295	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1296	if (!*class) {
1297		/* Allocate management class table for "new" class version */
1298		*class = kzalloc(sizeof **class, GFP_ATOMIC);
1299		if (!*class) {
1300			ret = -ENOMEM;
1301			goto error1;
1302		}
1303
1304		/* Allocate method table for this management class */
1305		method = &(*class)->method_table[mgmt_class];
1306		if ((ret = allocate_method_table(method)))
1307			goto error2;
1308	} else {
1309		method = &(*class)->method_table[mgmt_class];
1310		if (!*method) {
1311			/* Allocate method table for this management class */
1312			if ((ret = allocate_method_table(method)))
1313				goto error1;
1314		}
1315	}
1316
1317	/* Now, make sure methods are not already in use */
1318	if (method_in_use(method, mad_reg_req))
1319		goto error3;
1320
1321	/* Finally, add in methods being registered */
1322	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1323		(*method)->agent[i] = agent_priv;
1324
1325	return 0;
1326
1327error3:
1328	/* Remove any methods for this mad agent */
1329	remove_methods_mad_agent(*method, agent_priv);
1330	/* Now, check to see if there are any methods in use */
1331	if (!check_method_table(*method)) {
1332		/* If not, release management method table */
1333		kfree(*method);
1334		*method = NULL;
1335	}
1336	ret = -EINVAL;
1337	goto error1;
1338error2:
1339	kfree(*class);
1340	*class = NULL;
1341error1:
1342	return ret;
1343}
1344
1345static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1346			   struct ib_mad_agent_private *agent_priv)
1347{
1348	struct ib_mad_port_private *port_priv;
1349	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1350	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1351	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1352	struct ib_mad_mgmt_method_table **method;
1353	int i, ret = -ENOMEM;
1354	u8 vclass;
1355
1356	/* "New" vendor (with OUI) class */
1357	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1358	port_priv = agent_priv->qp_info->port_priv;
1359	vendor_table = &port_priv->version[
1360				mad_reg_req->mgmt_class_version].vendor;
1361	if (!*vendor_table) {
1362		/* Allocate mgmt vendor class table for "new" class version */
1363		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1364		if (!vendor)
1365			goto error1;
1366
1367		*vendor_table = vendor;
1368	}
1369	if (!(*vendor_table)->vendor_class[vclass]) {
1370		/* Allocate table for this management vendor class */
1371		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1372		if (!vendor_class)
1373			goto error2;
1374
1375		(*vendor_table)->vendor_class[vclass] = vendor_class;
1376	}
1377	for (i = 0; i < MAX_MGMT_OUI; i++) {
1378		/* Is there matching OUI for this vendor class ? */
1379		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1380			    mad_reg_req->oui, 3)) {
1381			method = &(*vendor_table)->vendor_class[
1382						vclass]->method_table[i];
1383			if (!*method)
1384				goto error3;
1385			goto check_in_use;
1386		}
1387	}
1388	for (i = 0; i < MAX_MGMT_OUI; i++) {
1389		/* OUI slot available ? */
1390		if (!is_vendor_oui((*vendor_table)->vendor_class[
1391				vclass]->oui[i])) {
1392			method = &(*vendor_table)->vendor_class[
1393				vclass]->method_table[i];
1394			/* Allocate method table for this OUI */
1395			if (!*method) {
1396				ret = allocate_method_table(method);
1397				if (ret)
1398					goto error3;
1399			}
1400			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1401			       mad_reg_req->oui, 3);
1402			goto check_in_use;
1403		}
1404	}
1405	dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
1406	goto error3;
1407
1408check_in_use:
1409	/* Now, make sure methods are not already in use */
1410	if (method_in_use(method, mad_reg_req))
1411		goto error4;
1412
1413	/* Finally, add in methods being registered */
1414	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1415		(*method)->agent[i] = agent_priv;
1416
1417	return 0;
1418
1419error4:
1420	/* Remove any methods for this mad agent */
1421	remove_methods_mad_agent(*method, agent_priv);
1422	/* Now, check to see if there are any methods in use */
1423	if (!check_method_table(*method)) {
1424		/* If not, release management method table */
1425		kfree(*method);
1426		*method = NULL;
1427	}
1428	ret = -EINVAL;
1429error3:
1430	if (vendor_class) {
1431		(*vendor_table)->vendor_class[vclass] = NULL;
1432		kfree(vendor_class);
1433	}
1434error2:
1435	if (vendor) {
1436		*vendor_table = NULL;
1437		kfree(vendor);
1438	}
1439error1:
1440	return ret;
1441}
1442
1443static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1444{
1445	struct ib_mad_port_private *port_priv;
1446	struct ib_mad_mgmt_class_table *class;
1447	struct ib_mad_mgmt_method_table *method;
1448	struct ib_mad_mgmt_vendor_class_table *vendor;
1449	struct ib_mad_mgmt_vendor_class *vendor_class;
1450	int index;
1451	u8 mgmt_class;
1452
1453	/*
1454	 * Was MAD registration request supplied
1455	 * with original registration ?
1456	 */
1457	if (!agent_priv->reg_req) {
1458		goto out;
1459	}
1460
1461	port_priv = agent_priv->qp_info->port_priv;
1462	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1463	class = port_priv->version[
1464			agent_priv->reg_req->mgmt_class_version].class;
1465	if (!class)
1466		goto vendor_check;
1467
1468	method = class->method_table[mgmt_class];
1469	if (method) {
1470		/* Remove any methods for this mad agent */
1471		remove_methods_mad_agent(method, agent_priv);
1472		/* Now, check to see if there are any methods still in use */
1473		if (!check_method_table(method)) {
1474			/* If not, release management method table */
1475			kfree(method);
1476			class->method_table[mgmt_class] = NULL;
1477			/* Any management classes left ? */
1478			if (!check_class_table(class)) {
1479				/* If not, release management class table */
1480				kfree(class);
1481				port_priv->version[
1482					agent_priv->reg_req->
1483					mgmt_class_version].class = NULL;
1484			}
1485		}
1486	}
1487
1488vendor_check:
1489	if (!is_vendor_class(mgmt_class))
1490		goto out;
1491
1492	/* normalize mgmt_class to vendor range 2 */
1493	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1494	vendor = port_priv->version[
1495			agent_priv->reg_req->mgmt_class_version].vendor;
1496
1497	if (!vendor)
1498		goto out;
1499
1500	vendor_class = vendor->vendor_class[mgmt_class];
1501	if (vendor_class) {
1502		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1503		if (index < 0)
1504			goto out;
1505		method = vendor_class->method_table[index];
1506		if (method) {
1507			/* Remove any methods for this mad agent */
1508			remove_methods_mad_agent(method, agent_priv);
1509			/*
1510			 * Now, check to see if there are
1511			 * any methods still in use
1512			 */
1513			if (!check_method_table(method)) {
1514				/* If not, release management method table */
1515				kfree(method);
1516				vendor_class->method_table[index] = NULL;
1517				memset(vendor_class->oui[index], 0, 3);
1518				/* Any OUIs left ? */
1519				if (!check_vendor_class(vendor_class)) {
1520					/* If not, release vendor class table */
1521					kfree(vendor_class);
1522					vendor->vendor_class[mgmt_class] = NULL;
1523					/* Any other vendor classes left ? */
1524					if (!check_vendor_table(vendor)) {
1525						kfree(vendor);
1526						port_priv->version[
1527							agent_priv->reg_req->
1528							mgmt_class_version].
1529							vendor = NULL;
1530					}
1531				}
1532			}
1533		}
1534	}
1535
1536out:
1537	return;
1538}
1539
1540static struct ib_mad_agent_private *
1541find_mad_agent(struct ib_mad_port_private *port_priv,
1542	       const struct ib_mad_hdr *mad_hdr)
1543{
1544	struct ib_mad_agent_private *mad_agent = NULL;
1545	unsigned long flags;
1546
1547	if (ib_response_mad(mad_hdr)) {
1548		u32 hi_tid;
1549
1550		/*
1551		 * Routing is based on high 32 bits of transaction ID
1552		 * of MAD.
1553		 */
1554		hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
1555		rcu_read_lock();
1556		mad_agent = xa_load(&ib_mad_clients, hi_tid);
1557		if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
1558			mad_agent = NULL;
1559		rcu_read_unlock();
1560	} else {
1561		struct ib_mad_mgmt_class_table *class;
1562		struct ib_mad_mgmt_method_table *method;
1563		struct ib_mad_mgmt_vendor_class_table *vendor;
1564		struct ib_mad_mgmt_vendor_class *vendor_class;
1565		const struct ib_vendor_mad *vendor_mad;
1566		int index;
1567
1568		spin_lock_irqsave(&port_priv->reg_lock, flags);
1569		/*
1570		 * Routing is based on version, class, and method
1571		 * For "newer" vendor MADs, also based on OUI
1572		 */
1573		if (mad_hdr->class_version >= MAX_MGMT_VERSION)
1574			goto out;
1575		if (!is_vendor_class(mad_hdr->mgmt_class)) {
1576			class = port_priv->version[
1577					mad_hdr->class_version].class;
1578			if (!class)
1579				goto out;
1580			if (convert_mgmt_class(mad_hdr->mgmt_class) >=
1581			    ARRAY_SIZE(class->method_table))
1582				goto out;
1583			method = class->method_table[convert_mgmt_class(
1584							mad_hdr->mgmt_class)];
1585			if (method)
1586				mad_agent = method->agent[mad_hdr->method &
1587							  ~IB_MGMT_METHOD_RESP];
1588		} else {
1589			vendor = port_priv->version[
1590					mad_hdr->class_version].vendor;
1591			if (!vendor)
1592				goto out;
1593			vendor_class = vendor->vendor_class[vendor_class_index(
1594						mad_hdr->mgmt_class)];
1595			if (!vendor_class)
1596				goto out;
1597			/* Find matching OUI */
1598			vendor_mad = (const struct ib_vendor_mad *)mad_hdr;
1599			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1600			if (index == -1)
1601				goto out;
1602			method = vendor_class->method_table[index];
1603			if (method) {
1604				mad_agent = method->agent[mad_hdr->method &
1605							  ~IB_MGMT_METHOD_RESP];
1606			}
1607		}
1608		if (mad_agent)
1609			refcount_inc(&mad_agent->refcount);
1610out:
1611		spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1612	}
1613
1614	if (mad_agent && !mad_agent->agent.recv_handler) {
1615		dev_notice(&port_priv->device->dev,
1616			   "No receive handler for client %p on port %d\n",
1617			   &mad_agent->agent, port_priv->port_num);
1618		deref_mad_agent(mad_agent);
1619		mad_agent = NULL;
1620	}
1621
1622	return mad_agent;
1623}
1624
1625static int validate_mad(const struct ib_mad_hdr *mad_hdr,
1626			const struct ib_mad_qp_info *qp_info,
1627			bool opa)
1628{
1629	int valid = 0;
1630	u32 qp_num = qp_info->qp->qp_num;
1631
1632	/* Make sure MAD base version is understood */
1633	if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
1634	    (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
1635		pr_err("MAD received with unsupported base version %d %s\n",
1636		       mad_hdr->base_version, opa ? "(opa)" : "");
1637		goto out;
1638	}
1639
1640	/* Filter SMI packets sent to other than QP0 */
1641	if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1642	    (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1643		if (qp_num == 0)
1644			valid = 1;
1645	} else {
1646		/* CM attributes other than ClassPortInfo only use Send method */
1647		if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
1648		    (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
1649		    (mad_hdr->method != IB_MGMT_METHOD_SEND))
1650			goto out;
1651		/* Filter GSI packets sent to QP0 */
1652		if (qp_num != 0)
1653			valid = 1;
1654	}
1655
1656out:
1657	return valid;
1658}
1659
1660static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv,
1661			    const struct ib_mad_hdr *mad_hdr)
1662{
1663	struct ib_rmpp_mad *rmpp_mad;
1664
1665	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1666	return !mad_agent_priv->agent.rmpp_version ||
1667		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
1668		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1669				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1670		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1671}
1672
1673static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
1674				     const struct ib_mad_recv_wc *rwc)
1675{
1676	return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class ==
1677		rwc->recv_buf.mad->mad_hdr.mgmt_class;
1678}
1679
1680static inline int rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
1681				   const struct ib_mad_send_wr_private *wr,
1682				   const struct ib_mad_recv_wc *rwc )
1683{
1684	struct rdma_ah_attr attr;
1685	u8 send_resp, rcv_resp;
1686	union ib_gid sgid;
1687	struct ib_device *device = mad_agent_priv->agent.device;
1688	u8 port_num = mad_agent_priv->agent.port_num;
1689	u8 lmc;
1690	bool has_grh;
1691
1692	send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
1693	rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
1694
1695	if (send_resp == rcv_resp)
1696		/* both requests, or both responses. GIDs different */
1697		return 0;
1698
1699	if (rdma_query_ah(wr->send_buf.ah, &attr))
1700		/* Assume not equal, to avoid false positives. */
1701		return 0;
1702
1703	has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
1704	if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
1705		/* one has GID, other does not.  Assume different */
1706		return 0;
1707
1708	if (!send_resp && rcv_resp) {
1709		/* is request/response. */
1710		if (!has_grh) {
1711			if (ib_get_cached_lmc(device, port_num, &lmc))
1712				return 0;
1713			return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
1714					   rwc->wc->dlid_path_bits) &
1715					  ((1 << lmc) - 1)));
1716		} else {
1717			const struct ib_global_route *grh =
1718					rdma_ah_read_grh(&attr);
1719
1720			if (rdma_query_gid(device, port_num,
1721					   grh->sgid_index, &sgid))
1722				return 0;
1723			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1724				       16);
1725		}
1726	}
1727
1728	if (!has_grh)
1729		return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
1730	else
1731		return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
1732			       rwc->recv_buf.grh->sgid.raw,
1733			       16);
1734}
1735
1736static inline int is_direct(u8 class)
1737{
1738	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1739}
1740
1741struct ib_mad_send_wr_private*
1742ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
1743		 const struct ib_mad_recv_wc *wc)
1744{
1745	struct ib_mad_send_wr_private *wr;
1746	const struct ib_mad_hdr *mad_hdr;
1747
1748	mad_hdr = &wc->recv_buf.mad->mad_hdr;
1749
1750	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1751		if ((wr->tid == mad_hdr->tid) &&
1752		    rcv_has_same_class(wr, wc) &&
1753		    /*
1754		     * Don't check GID for direct routed MADs.
1755		     * These might have permissive LIDs.
1756		     */
1757		    (is_direct(mad_hdr->mgmt_class) ||
1758		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1759			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1760	}
1761
1762	/*
1763	 * It's possible to receive the response before we've
1764	 * been notified that the send has completed
1765	 */
1766	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1767		if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1768		    wr->tid == mad_hdr->tid &&
1769		    wr->timeout &&
1770		    rcv_has_same_class(wr, wc) &&
1771		    /*
1772		     * Don't check GID for direct routed MADs.
1773		     * These might have permissive LIDs.
1774		     */
1775		    (is_direct(mad_hdr->mgmt_class) ||
1776		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1777			/* Verify request has not been canceled */
1778			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1779	}
1780	return NULL;
1781}
1782
1783void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1784{
1785	mad_send_wr->timeout = 0;
1786	if (mad_send_wr->refcount == 1)
1787		list_move_tail(&mad_send_wr->agent_list,
1788			      &mad_send_wr->mad_agent_priv->done_list);
1789}
1790
1791static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1792				 struct ib_mad_recv_wc *mad_recv_wc)
1793{
1794	struct ib_mad_send_wr_private *mad_send_wr;
1795	struct ib_mad_send_wc mad_send_wc;
1796	unsigned long flags;
1797	int ret;
1798
1799	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1800	ret = ib_mad_enforce_security(mad_agent_priv,
1801				      mad_recv_wc->wc->pkey_index);
1802	if (ret) {
1803		ib_free_recv_mad(mad_recv_wc);
1804		deref_mad_agent(mad_agent_priv);
1805		return;
1806	}
1807
1808	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1809	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1810		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1811						      mad_recv_wc);
1812		if (!mad_recv_wc) {
1813			deref_mad_agent(mad_agent_priv);
1814			return;
1815		}
1816	}
1817
1818	/* Complete corresponding request */
1819	if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) {
1820		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1821		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1822		if (!mad_send_wr) {
1823			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1824			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
1825			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
1826			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
1827					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
1828				/* user rmpp is in effect
1829				 * and this is an active RMPP MAD
1830				 */
1831				mad_agent_priv->agent.recv_handler(
1832						&mad_agent_priv->agent, NULL,
1833						mad_recv_wc);
1834				deref_mad_agent(mad_agent_priv);
1835			} else {
1836				/* not user rmpp, revert to normal behavior and
1837				 * drop the mad */
1838				ib_free_recv_mad(mad_recv_wc);
1839				deref_mad_agent(mad_agent_priv);
1840				return;
1841			}
1842		} else {
1843			ib_mark_mad_done(mad_send_wr);
1844			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1845
1846			/* Defined behavior is to complete response before request */
1847			mad_agent_priv->agent.recv_handler(
1848					&mad_agent_priv->agent,
1849					&mad_send_wr->send_buf,
1850					mad_recv_wc);
1851			deref_mad_agent(mad_agent_priv);
1852
1853			mad_send_wc.status = IB_WC_SUCCESS;
1854			mad_send_wc.vendor_err = 0;
1855			mad_send_wc.send_buf = &mad_send_wr->send_buf;
1856			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1857		}
1858	} else {
1859		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
1860						   mad_recv_wc);
1861		deref_mad_agent(mad_agent_priv);
1862	}
1863
1864	return;
1865}
1866
1867static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
1868				     const struct ib_mad_qp_info *qp_info,
1869				     const struct ib_wc *wc,
1870				     int port_num,
1871				     struct ib_mad_private *recv,
1872				     struct ib_mad_private *response)
1873{
1874	enum smi_forward_action retsmi;
1875	struct ib_smp *smp = (struct ib_smp *)recv->mad;
1876
1877	trace_ib_mad_handle_ib_smi(smp);
1878
1879	if (smi_handle_dr_smp_recv(smp,
1880				   rdma_cap_ib_switch(port_priv->device),
1881				   port_num,
1882				   port_priv->device->phys_port_cnt) ==
1883				   IB_SMI_DISCARD)
1884		return IB_SMI_DISCARD;
1885
1886	retsmi = smi_check_forward_dr_smp(smp);
1887	if (retsmi == IB_SMI_LOCAL)
1888		return IB_SMI_HANDLE;
1889
1890	if (retsmi == IB_SMI_SEND) { /* don't forward */
1891		if (smi_handle_dr_smp_send(smp,
1892					   rdma_cap_ib_switch(port_priv->device),
1893					   port_num) == IB_SMI_DISCARD)
1894			return IB_SMI_DISCARD;
1895
1896		if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD)
1897			return IB_SMI_DISCARD;
1898	} else if (rdma_cap_ib_switch(port_priv->device)) {
1899		/* forward case for switches */
1900		memcpy(response, recv, mad_priv_size(response));
1901		response->header.recv_wc.wc = &response->header.wc;
1902		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
1903		response->header.recv_wc.recv_buf.grh = &response->grh;
1904
1905		agent_send_response((const struct ib_mad_hdr *)response->mad,
1906				    &response->grh, wc,
1907				    port_priv->device,
1908				    smi_get_fwd_port(smp),
1909				    qp_info->qp->qp_num,
1910				    response->mad_size,
1911				    false);
1912
1913		return IB_SMI_DISCARD;
1914	}
1915	return IB_SMI_HANDLE;
1916}
1917
1918static bool generate_unmatched_resp(const struct ib_mad_private *recv,
1919				    struct ib_mad_private *response,
1920				    size_t *resp_len, bool opa)
1921{
1922	const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad;
1923	struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad;
1924
1925	if (recv_hdr->method == IB_MGMT_METHOD_GET ||
1926	    recv_hdr->method == IB_MGMT_METHOD_SET) {
1927		memcpy(response, recv, mad_priv_size(response));
1928		response->header.recv_wc.wc = &response->header.wc;
1929		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
1930		response->header.recv_wc.recv_buf.grh = &response->grh;
1931		resp_hdr->method = IB_MGMT_METHOD_GET_RESP;
1932		resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
1933		if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1934			resp_hdr->status |= IB_SMP_DIRECTION;
1935
1936		if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) {
1937			if (recv_hdr->mgmt_class ==
1938			    IB_MGMT_CLASS_SUBN_LID_ROUTED ||
1939			    recv_hdr->mgmt_class ==
1940			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1941				*resp_len = opa_get_smp_header_size(
1942							(struct opa_smp *)recv->mad);
1943			else
1944				*resp_len = sizeof(struct ib_mad_hdr);
1945		}
1946
1947		return true;
1948	} else {
1949		return false;
1950	}
1951}
1952
1953static enum smi_action
1954handle_opa_smi(struct ib_mad_port_private *port_priv,
1955	       struct ib_mad_qp_info *qp_info,
1956	       struct ib_wc *wc,
1957	       int port_num,
1958	       struct ib_mad_private *recv,
1959	       struct ib_mad_private *response)
1960{
1961	enum smi_forward_action retsmi;
1962	struct opa_smp *smp = (struct opa_smp *)recv->mad;
1963
1964	trace_ib_mad_handle_opa_smi(smp);
1965
1966	if (opa_smi_handle_dr_smp_recv(smp,
1967				   rdma_cap_ib_switch(port_priv->device),
1968				   port_num,
1969				   port_priv->device->phys_port_cnt) ==
1970				   IB_SMI_DISCARD)
1971		return IB_SMI_DISCARD;
1972
1973	retsmi = opa_smi_check_forward_dr_smp(smp);
1974	if (retsmi == IB_SMI_LOCAL)
1975		return IB_SMI_HANDLE;
1976
1977	if (retsmi == IB_SMI_SEND) { /* don't forward */
1978		if (opa_smi_handle_dr_smp_send(smp,
1979					   rdma_cap_ib_switch(port_priv->device),
1980					   port_num) == IB_SMI_DISCARD)
1981			return IB_SMI_DISCARD;
1982
1983		if (opa_smi_check_local_smp(smp, port_priv->device) ==
1984		    IB_SMI_DISCARD)
1985			return IB_SMI_DISCARD;
1986
1987	} else if (rdma_cap_ib_switch(port_priv->device)) {
1988		/* forward case for switches */
1989		memcpy(response, recv, mad_priv_size(response));
1990		response->header.recv_wc.wc = &response->header.wc;
1991		response->header.recv_wc.recv_buf.opa_mad =
1992				(struct opa_mad *)response->mad;
1993		response->header.recv_wc.recv_buf.grh = &response->grh;
1994
1995		agent_send_response((const struct ib_mad_hdr *)response->mad,
1996				    &response->grh, wc,
1997				    port_priv->device,
1998				    opa_smi_get_fwd_port(smp),
1999				    qp_info->qp->qp_num,
2000				    recv->header.wc.byte_len,
2001				    true);
2002
2003		return IB_SMI_DISCARD;
2004	}
2005
2006	return IB_SMI_HANDLE;
2007}
2008
2009static enum smi_action
2010handle_smi(struct ib_mad_port_private *port_priv,
2011	   struct ib_mad_qp_info *qp_info,
2012	   struct ib_wc *wc,
2013	   int port_num,
2014	   struct ib_mad_private *recv,
2015	   struct ib_mad_private *response,
2016	   bool opa)
2017{
2018	struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
2019
2020	if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
2021	    mad_hdr->class_version == OPA_SM_CLASS_VERSION)
2022		return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
2023				      response);
2024
2025	return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
2026}
2027
2028static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2029{
2030	struct ib_mad_port_private *port_priv = cq->cq_context;
2031	struct ib_mad_list_head *mad_list =
2032		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2033	struct ib_mad_qp_info *qp_info;
2034	struct ib_mad_private_header *mad_priv_hdr;
2035	struct ib_mad_private *recv, *response = NULL;
2036	struct ib_mad_agent_private *mad_agent;
2037	int port_num;
2038	int ret = IB_MAD_RESULT_SUCCESS;
2039	size_t mad_size;
2040	u16 resp_mad_pkey_index = 0;
2041	bool opa;
2042
2043	if (list_empty_careful(&port_priv->port_list))
2044		return;
2045
2046	if (wc->status != IB_WC_SUCCESS) {
2047		/*
2048		 * Receive errors indicate that the QP has entered the error
2049		 * state - error handling/shutdown code will cleanup
2050		 */
2051		return;
2052	}
2053
2054	qp_info = mad_list->mad_queue->qp_info;
2055	dequeue_mad(mad_list);
2056
2057	opa = rdma_cap_opa_mad(qp_info->port_priv->device,
2058			       qp_info->port_priv->port_num);
2059
2060	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
2061				    mad_list);
2062	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
2063	ib_dma_unmap_single(port_priv->device,
2064			    recv->header.mapping,
2065			    mad_priv_dma_size(recv),
2066			    DMA_FROM_DEVICE);
2067
2068	/* Setup MAD receive work completion from "normal" work completion */
2069	recv->header.wc = *wc;
2070	recv->header.recv_wc.wc = &recv->header.wc;
2071
2072	if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) {
2073		recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh);
2074		recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2075	} else {
2076		recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2077		recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2078	}
2079
2080	recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
2081	recv->header.recv_wc.recv_buf.grh = &recv->grh;
2082
2083	/* Validate MAD */
2084	if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
2085		goto out;
2086
2087	trace_ib_mad_recv_done_handler(qp_info, wc,
2088				       (struct ib_mad_hdr *)recv->mad);
2089
2090	mad_size = recv->mad_size;
2091	response = alloc_mad_private(mad_size, GFP_KERNEL);
2092	if (!response)
2093		goto out;
2094
2095	if (rdma_cap_ib_switch(port_priv->device))
2096		port_num = wc->port_num;
2097	else
2098		port_num = port_priv->port_num;
2099
2100	if (((struct ib_mad_hdr *)recv->mad)->mgmt_class ==
2101	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
2102		if (handle_smi(port_priv, qp_info, wc, port_num, recv,
2103			       response, opa)
2104		    == IB_SMI_DISCARD)
2105			goto out;
2106	}
2107
2108	/* Give driver "right of first refusal" on incoming MAD */
2109	if (port_priv->device->ops.process_mad) {
2110		ret = port_priv->device->ops.process_mad(
2111			port_priv->device, 0, port_priv->port_num, wc,
2112			&recv->grh, (const struct ib_mad *)recv->mad,
2113			(struct ib_mad *)response->mad, &mad_size,
2114			&resp_mad_pkey_index);
2115
2116		if (opa)
2117			wc->pkey_index = resp_mad_pkey_index;
2118
2119		if (ret & IB_MAD_RESULT_SUCCESS) {
2120			if (ret & IB_MAD_RESULT_CONSUMED)
2121				goto out;
2122			if (ret & IB_MAD_RESULT_REPLY) {
2123				agent_send_response((const struct ib_mad_hdr *)response->mad,
2124						    &recv->grh, wc,
2125						    port_priv->device,
2126						    port_num,
2127						    qp_info->qp->qp_num,
2128						    mad_size, opa);
2129				goto out;
2130			}
2131		}
2132	}
2133
2134	mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad);
2135	if (mad_agent) {
2136		trace_ib_mad_recv_done_agent(mad_agent);
2137		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
2138		/*
2139		 * recv is freed up in error cases in ib_mad_complete_recv
2140		 * or via recv_handler in ib_mad_complete_recv()
2141		 */
2142		recv = NULL;
2143	} else if ((ret & IB_MAD_RESULT_SUCCESS) &&
2144		   generate_unmatched_resp(recv, response, &mad_size, opa)) {
2145		agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc,
2146				    port_priv->device, port_num,
2147				    qp_info->qp->qp_num, mad_size, opa);
2148	}
2149
2150out:
2151	/* Post another receive request for this QP */
2152	if (response) {
2153		ib_mad_post_receive_mads(qp_info, response);
2154		kfree(recv);
2155	} else
2156		ib_mad_post_receive_mads(qp_info, recv);
2157}
2158
2159static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
2160{
2161	struct ib_mad_send_wr_private *mad_send_wr;
2162	unsigned long delay;
2163
2164	if (list_empty(&mad_agent_priv->wait_list)) {
2165		cancel_delayed_work(&mad_agent_priv->timed_work);
2166	} else {
2167		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2168					 struct ib_mad_send_wr_private,
2169					 agent_list);
2170
2171		if (time_after(mad_agent_priv->timeout,
2172			       mad_send_wr->timeout)) {
2173			mad_agent_priv->timeout = mad_send_wr->timeout;
2174			delay = mad_send_wr->timeout - jiffies;
2175			if ((long)delay <= 0)
2176				delay = 1;
2177			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2178					 &mad_agent_priv->timed_work, delay);
2179		}
2180	}
2181}
2182
2183static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2184{
2185	struct ib_mad_agent_private *mad_agent_priv;
2186	struct ib_mad_send_wr_private *temp_mad_send_wr;
2187	struct list_head *list_item;
2188	unsigned long delay;
2189
2190	mad_agent_priv = mad_send_wr->mad_agent_priv;
2191	list_del(&mad_send_wr->agent_list);
2192
2193	delay = mad_send_wr->timeout;
2194	mad_send_wr->timeout += jiffies;
2195
2196	if (delay) {
2197		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
2198			temp_mad_send_wr = list_entry(list_item,
2199						struct ib_mad_send_wr_private,
2200						agent_list);
2201			if (time_after(mad_send_wr->timeout,
2202				       temp_mad_send_wr->timeout))
2203				break;
2204		}
2205	}
2206	else
2207		list_item = &mad_agent_priv->wait_list;
2208	list_add(&mad_send_wr->agent_list, list_item);
2209
2210	/* Reschedule a work item if we have a shorter timeout */
2211	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
2212		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2213				 &mad_agent_priv->timed_work, delay);
2214}
2215
2216void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2217			  unsigned long timeout_ms)
2218{
2219	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2220	wait_for_response(mad_send_wr);
2221}
2222
2223/*
2224 * Process a send work completion
2225 */
2226void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2227			     struct ib_mad_send_wc *mad_send_wc)
2228{
2229	struct ib_mad_agent_private	*mad_agent_priv;
2230	unsigned long			flags;
2231	int				ret;
2232
2233	mad_agent_priv = mad_send_wr->mad_agent_priv;
2234	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2235	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
2236		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2237		if (ret == IB_RMPP_RESULT_CONSUMED)
2238			goto done;
2239	} else
2240		ret = IB_RMPP_RESULT_UNHANDLED;
2241
2242	if (mad_send_wc->status != IB_WC_SUCCESS &&
2243	    mad_send_wr->status == IB_WC_SUCCESS) {
2244		mad_send_wr->status = mad_send_wc->status;
2245		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2246	}
2247
2248	if (--mad_send_wr->refcount > 0) {
2249		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
2250		    mad_send_wr->status == IB_WC_SUCCESS) {
2251			wait_for_response(mad_send_wr);
2252		}
2253		goto done;
2254	}
2255
2256	/* Remove send from MAD agent and notify client of completion */
2257	list_del(&mad_send_wr->agent_list);
2258	adjust_timeout(mad_agent_priv);
2259	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2260
2261	if (mad_send_wr->status != IB_WC_SUCCESS )
2262		mad_send_wc->status = mad_send_wr->status;
2263	if (ret == IB_RMPP_RESULT_INTERNAL)
2264		ib_rmpp_send_handler(mad_send_wc);
2265	else
2266		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2267						   mad_send_wc);
2268
2269	/* Release reference on agent taken when sending */
2270	deref_mad_agent(mad_agent_priv);
2271	return;
2272done:
2273	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2274}
2275
2276static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
2277{
2278	struct ib_mad_port_private *port_priv = cq->cq_context;
2279	struct ib_mad_list_head *mad_list =
2280		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2281	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
2282	struct ib_mad_qp_info		*qp_info;
2283	struct ib_mad_queue		*send_queue;
2284	struct ib_mad_send_wc		mad_send_wc;
2285	unsigned long flags;
2286	int ret;
2287
2288	if (list_empty_careful(&port_priv->port_list))
2289		return;
2290
2291	if (wc->status != IB_WC_SUCCESS) {
2292		if (!ib_mad_send_error(port_priv, wc))
2293			return;
2294	}
2295
2296	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2297				   mad_list);
2298	send_queue = mad_list->mad_queue;
2299	qp_info = send_queue->qp_info;
2300
2301	trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv);
2302	trace_ib_mad_send_done_handler(mad_send_wr, wc);
2303
2304retry:
2305	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2306			    mad_send_wr->header_mapping,
2307			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2308	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2309			    mad_send_wr->payload_mapping,
2310			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
2311	queued_send_wr = NULL;
2312	spin_lock_irqsave(&send_queue->lock, flags);
2313	list_del(&mad_list->list);
2314
2315	/* Move queued send to the send queue */
2316	if (send_queue->count-- > send_queue->max_active) {
2317		mad_list = container_of(qp_info->overflow_list.next,
2318					struct ib_mad_list_head, list);
2319		queued_send_wr = container_of(mad_list,
2320					struct ib_mad_send_wr_private,
2321					mad_list);
2322		list_move_tail(&mad_list->list, &send_queue->list);
2323	}
2324	spin_unlock_irqrestore(&send_queue->lock, flags);
2325
2326	mad_send_wc.send_buf = &mad_send_wr->send_buf;
2327	mad_send_wc.status = wc->status;
2328	mad_send_wc.vendor_err = wc->vendor_err;
2329	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2330
2331	if (queued_send_wr) {
2332		trace_ib_mad_send_done_resend(queued_send_wr, qp_info);
2333		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
2334				   NULL);
2335		if (ret) {
2336			dev_err(&port_priv->device->dev,
2337				"ib_post_send failed: %d\n", ret);
2338			mad_send_wr = queued_send_wr;
2339			wc->status = IB_WC_LOC_QP_OP_ERR;
2340			goto retry;
2341		}
2342	}
2343}
2344
2345static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2346{
2347	struct ib_mad_send_wr_private *mad_send_wr;
2348	struct ib_mad_list_head *mad_list;
2349	unsigned long flags;
2350
2351	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
2352	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
2353		mad_send_wr = container_of(mad_list,
2354					   struct ib_mad_send_wr_private,
2355					   mad_list);
2356		mad_send_wr->retry = 1;
2357	}
2358	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2359}
2360
2361static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
2362		struct ib_wc *wc)
2363{
2364	struct ib_mad_list_head *mad_list =
2365		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2366	struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
2367	struct ib_mad_send_wr_private *mad_send_wr;
2368	int ret;
2369
2370	/*
2371	 * Send errors will transition the QP to SQE - move
2372	 * QP to RTS and repost flushed work requests
2373	 */
2374	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2375				   mad_list);
2376	if (wc->status == IB_WC_WR_FLUSH_ERR) {
2377		if (mad_send_wr->retry) {
2378			/* Repost send */
2379			mad_send_wr->retry = 0;
2380			trace_ib_mad_error_handler(mad_send_wr, qp_info);
2381			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
2382					   NULL);
2383			if (!ret)
2384				return false;
2385		}
2386	} else {
2387		struct ib_qp_attr *attr;
2388
2389		/* Transition QP to RTS and fail offending send */
2390		attr = kmalloc(sizeof *attr, GFP_KERNEL);
2391		if (attr) {
2392			attr->qp_state = IB_QPS_RTS;
2393			attr->cur_qp_state = IB_QPS_SQE;
2394			ret = ib_modify_qp(qp_info->qp, attr,
2395					   IB_QP_STATE | IB_QP_CUR_STATE);
2396			kfree(attr);
2397			if (ret)
2398				dev_err(&port_priv->device->dev,
2399					"%s - ib_modify_qp to RTS: %d\n",
2400					__func__, ret);
2401			else
2402				mark_sends_for_retry(qp_info);
2403		}
2404	}
2405
2406	return true;
2407}
2408
2409static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2410{
2411	unsigned long flags;
2412	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2413	struct ib_mad_send_wc mad_send_wc;
2414	struct list_head cancel_list;
2415
2416	INIT_LIST_HEAD(&cancel_list);
2417
2418	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2419	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2420				 &mad_agent_priv->send_list, agent_list) {
2421		if (mad_send_wr->status == IB_WC_SUCCESS) {
2422			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2423			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2424		}
2425	}
2426
2427	/* Empty wait list to prevent receives from finding a request */
2428	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2429	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2430
2431	/* Report all cancelled requests */
2432	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2433	mad_send_wc.vendor_err = 0;
2434
2435	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2436				 &cancel_list, agent_list) {
2437		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2438		list_del(&mad_send_wr->agent_list);
2439		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2440						   &mad_send_wc);
2441		deref_mad_agent(mad_agent_priv);
2442	}
2443}
2444
2445static struct ib_mad_send_wr_private*
2446find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2447	     struct ib_mad_send_buf *send_buf)
2448{
2449	struct ib_mad_send_wr_private *mad_send_wr;
2450
2451	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2452			    agent_list) {
2453		if (&mad_send_wr->send_buf == send_buf)
2454			return mad_send_wr;
2455	}
2456
2457	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2458			    agent_list) {
2459		if (is_rmpp_data_mad(mad_agent_priv,
2460				     mad_send_wr->send_buf.mad) &&
2461		    &mad_send_wr->send_buf == send_buf)
2462			return mad_send_wr;
2463	}
2464	return NULL;
2465}
2466
2467int ib_modify_mad(struct ib_mad_agent *mad_agent,
2468		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2469{
2470	struct ib_mad_agent_private *mad_agent_priv;
2471	struct ib_mad_send_wr_private *mad_send_wr;
2472	unsigned long flags;
2473	int active;
2474
2475	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2476				      agent);
2477	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2478	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2479	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2480		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2481		return -EINVAL;
2482	}
2483
2484	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2485	if (!timeout_ms) {
2486		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2487		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2488	}
2489
2490	mad_send_wr->send_buf.timeout_ms = timeout_ms;
2491	if (active)
2492		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2493	else
2494		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2495
2496	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2497	return 0;
2498}
2499EXPORT_SYMBOL(ib_modify_mad);
2500
2501void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2502		   struct ib_mad_send_buf *send_buf)
2503{
2504	ib_modify_mad(mad_agent, send_buf, 0);
2505}
2506EXPORT_SYMBOL(ib_cancel_mad);
2507
2508static void local_completions(struct work_struct *work)
2509{
2510	struct ib_mad_agent_private *mad_agent_priv;
2511	struct ib_mad_local_private *local;
2512	struct ib_mad_agent_private *recv_mad_agent;
2513	unsigned long flags;
2514	int free_mad;
2515	struct ib_wc wc;
2516	struct ib_mad_send_wc mad_send_wc;
2517	bool opa;
2518
2519	mad_agent_priv =
2520		container_of(work, struct ib_mad_agent_private, local_work);
2521
2522	opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
2523			       mad_agent_priv->qp_info->port_priv->port_num);
2524
2525	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2526	while (!list_empty(&mad_agent_priv->local_list)) {
2527		local = list_entry(mad_agent_priv->local_list.next,
2528				   struct ib_mad_local_private,
2529				   completion_list);
2530		list_del(&local->completion_list);
2531		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2532		free_mad = 0;
2533		if (local->mad_priv) {
2534			u8 base_version;
2535			recv_mad_agent = local->recv_mad_agent;
2536			if (!recv_mad_agent) {
2537				dev_err(&mad_agent_priv->agent.device->dev,
2538					"No receive MAD agent for local completion\n");
2539				free_mad = 1;
2540				goto local_send_completion;
2541			}
2542
2543			/*
2544			 * Defined behavior is to complete response
2545			 * before request
2546			 */
2547			build_smp_wc(recv_mad_agent->agent.qp,
2548				     local->mad_send_wr->send_wr.wr.wr_cqe,
2549				     be16_to_cpu(IB_LID_PERMISSIVE),
2550				     local->mad_send_wr->send_wr.pkey_index,
2551				     recv_mad_agent->agent.port_num, &wc);
2552
2553			local->mad_priv->header.recv_wc.wc = &wc;
2554
2555			base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version;
2556			if (opa && base_version == OPA_MGMT_BASE_VERSION) {
2557				local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len;
2558				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2559			} else {
2560				local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2561				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2562			}
2563
2564			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2565			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2566				 &local->mad_priv->header.recv_wc.rmpp_list);
2567			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2568			local->mad_priv->header.recv_wc.recv_buf.mad =
2569						(struct ib_mad *)local->mad_priv->mad;
2570			recv_mad_agent->agent.recv_handler(
2571						&recv_mad_agent->agent,
2572						&local->mad_send_wr->send_buf,
2573						&local->mad_priv->header.recv_wc);
2574			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2575			deref_mad_agent(recv_mad_agent);
2576			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2577		}
2578
2579local_send_completion:
2580		/* Complete send */
2581		mad_send_wc.status = IB_WC_SUCCESS;
2582		mad_send_wc.vendor_err = 0;
2583		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2584		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2585						   &mad_send_wc);
2586
2587		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2588		deref_mad_agent(mad_agent_priv);
2589		if (free_mad)
2590			kfree(local->mad_priv);
2591		kfree(local);
2592	}
2593	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2594}
2595
2596static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2597{
2598	int ret;
2599
2600	if (!mad_send_wr->retries_left)
2601		return -ETIMEDOUT;
2602
2603	mad_send_wr->retries_left--;
2604	mad_send_wr->send_buf.retries++;
2605
2606	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2607
2608	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
2609		ret = ib_retry_rmpp(mad_send_wr);
2610		switch (ret) {
2611		case IB_RMPP_RESULT_UNHANDLED:
2612			ret = ib_send_mad(mad_send_wr);
2613			break;
2614		case IB_RMPP_RESULT_CONSUMED:
2615			ret = 0;
2616			break;
2617		default:
2618			ret = -ECOMM;
2619			break;
2620		}
2621	} else
2622		ret = ib_send_mad(mad_send_wr);
2623
2624	if (!ret) {
2625		mad_send_wr->refcount++;
2626		list_add_tail(&mad_send_wr->agent_list,
2627			      &mad_send_wr->mad_agent_priv->send_list);
2628	}
2629	return ret;
2630}
2631
2632static void timeout_sends(struct work_struct *work)
2633{
2634	struct ib_mad_agent_private *mad_agent_priv;
2635	struct ib_mad_send_wr_private *mad_send_wr;
2636	struct ib_mad_send_wc mad_send_wc;
2637	unsigned long flags, delay;
2638
2639	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
2640				      timed_work.work);
2641	mad_send_wc.vendor_err = 0;
2642
2643	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2644	while (!list_empty(&mad_agent_priv->wait_list)) {
2645		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2646					 struct ib_mad_send_wr_private,
2647					 agent_list);
2648
2649		if (time_after(mad_send_wr->timeout, jiffies)) {
2650			delay = mad_send_wr->timeout - jiffies;
2651			if ((long)delay <= 0)
2652				delay = 1;
2653			queue_delayed_work(mad_agent_priv->qp_info->
2654					   port_priv->wq,
2655					   &mad_agent_priv->timed_work, delay);
2656			break;
2657		}
2658
2659		list_del(&mad_send_wr->agent_list);
2660		if (mad_send_wr->status == IB_WC_SUCCESS &&
2661		    !retry_send(mad_send_wr))
2662			continue;
2663
2664		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2665
2666		if (mad_send_wr->status == IB_WC_SUCCESS)
2667			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2668		else
2669			mad_send_wc.status = mad_send_wr->status;
2670		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2671		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2672						   &mad_send_wc);
2673
2674		deref_mad_agent(mad_agent_priv);
2675		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2676	}
2677	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2678}
2679
2680/*
2681 * Allocate receive MADs and post receive WRs for them
2682 */
2683static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2684				    struct ib_mad_private *mad)
2685{
2686	unsigned long flags;
2687	int post, ret;
2688	struct ib_mad_private *mad_priv;
2689	struct ib_sge sg_list;
2690	struct ib_recv_wr recv_wr;
2691	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2692
2693	/* Initialize common scatter list fields */
2694	sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
2695
2696	/* Initialize common receive WR fields */
2697	recv_wr.next = NULL;
2698	recv_wr.sg_list = &sg_list;
2699	recv_wr.num_sge = 1;
2700
2701	do {
2702		/* Allocate and map receive buffer */
2703		if (mad) {
2704			mad_priv = mad;
2705			mad = NULL;
2706		} else {
2707			mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
2708						     GFP_ATOMIC);
2709			if (!mad_priv) {
2710				ret = -ENOMEM;
2711				break;
2712			}
2713		}
2714		sg_list.length = mad_priv_dma_size(mad_priv);
2715		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
2716						 &mad_priv->grh,
2717						 mad_priv_dma_size(mad_priv),
2718						 DMA_FROM_DEVICE);
2719		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
2720						  sg_list.addr))) {
2721			kfree(mad_priv);
2722			ret = -ENOMEM;
2723			break;
2724		}
2725		mad_priv->header.mapping = sg_list.addr;
2726		mad_priv->header.mad_list.mad_queue = recv_queue;
2727		mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
2728		recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
2729
2730		/* Post receive WR */
2731		spin_lock_irqsave(&recv_queue->lock, flags);
2732		post = (++recv_queue->count < recv_queue->max_active);
2733		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2734		spin_unlock_irqrestore(&recv_queue->lock, flags);
2735		ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
2736		if (ret) {
2737			spin_lock_irqsave(&recv_queue->lock, flags);
2738			list_del(&mad_priv->header.mad_list.list);
2739			recv_queue->count--;
2740			spin_unlock_irqrestore(&recv_queue->lock, flags);
2741			ib_dma_unmap_single(qp_info->port_priv->device,
2742					    mad_priv->header.mapping,
2743					    mad_priv_dma_size(mad_priv),
2744					    DMA_FROM_DEVICE);
2745			kfree(mad_priv);
2746			dev_err(&qp_info->port_priv->device->dev,
2747				"ib_post_recv failed: %d\n", ret);
2748			break;
2749		}
2750	} while (post);
2751
2752	return ret;
2753}
2754
2755/*
2756 * Return all the posted receive MADs
2757 */
2758static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2759{
2760	struct ib_mad_private_header *mad_priv_hdr;
2761	struct ib_mad_private *recv;
2762	struct ib_mad_list_head *mad_list;
2763
2764	if (!qp_info->qp)
2765		return;
2766
2767	while (!list_empty(&qp_info->recv_queue.list)) {
2768
2769		mad_list = list_entry(qp_info->recv_queue.list.next,
2770				      struct ib_mad_list_head, list);
2771		mad_priv_hdr = container_of(mad_list,
2772					    struct ib_mad_private_header,
2773					    mad_list);
2774		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2775				    header);
2776
2777		/* Remove from posted receive MAD list */
2778		list_del(&mad_list->list);
2779
2780		ib_dma_unmap_single(qp_info->port_priv->device,
2781				    recv->header.mapping,
2782				    mad_priv_dma_size(recv),
2783				    DMA_FROM_DEVICE);
2784		kfree(recv);
2785	}
2786
2787	qp_info->recv_queue.count = 0;
2788}
2789
2790/*
2791 * Start the port
2792 */
2793static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2794{
2795	int ret, i;
2796	struct ib_qp_attr *attr;
2797	struct ib_qp *qp;
2798	u16 pkey_index;
2799
2800	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2801	if (!attr)
2802		return -ENOMEM;
2803
2804	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
2805			   IB_DEFAULT_PKEY_FULL, &pkey_index);
2806	if (ret)
2807		pkey_index = 0;
2808
2809	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2810		qp = port_priv->qp_info[i].qp;
2811		if (!qp)
2812			continue;
2813
2814		/*
2815		 * PKey index for QP1 is irrelevant but
2816		 * one is needed for the Reset to Init transition
2817		 */
2818		attr->qp_state = IB_QPS_INIT;
2819		attr->pkey_index = pkey_index;
2820		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2821		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2822					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
2823		if (ret) {
2824			dev_err(&port_priv->device->dev,
2825				"Couldn't change QP%d state to INIT: %d\n",
2826				i, ret);
2827			goto out;
2828		}
2829
2830		attr->qp_state = IB_QPS_RTR;
2831		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2832		if (ret) {
2833			dev_err(&port_priv->device->dev,
2834				"Couldn't change QP%d state to RTR: %d\n",
2835				i, ret);
2836			goto out;
2837		}
2838
2839		attr->qp_state = IB_QPS_RTS;
2840		attr->sq_psn = IB_MAD_SEND_Q_PSN;
2841		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2842		if (ret) {
2843			dev_err(&port_priv->device->dev,
2844				"Couldn't change QP%d state to RTS: %d\n",
2845				i, ret);
2846			goto out;
2847		}
2848	}
2849
2850	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2851	if (ret) {
2852		dev_err(&port_priv->device->dev,
2853			"Failed to request completion notification: %d\n",
2854			ret);
2855		goto out;
2856	}
2857
2858	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2859		if (!port_priv->qp_info[i].qp)
2860			continue;
2861
2862		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2863		if (ret) {
2864			dev_err(&port_priv->device->dev,
2865				"Couldn't post receive WRs\n");
2866			goto out;
2867		}
2868	}
2869out:
2870	kfree(attr);
2871	return ret;
2872}
2873
2874static void qp_event_handler(struct ib_event *event, void *qp_context)
2875{
2876	struct ib_mad_qp_info	*qp_info = qp_context;
2877
2878	/* It's worse than that! He's dead, Jim! */
2879	dev_err(&qp_info->port_priv->device->dev,
2880		"Fatal error (%d) on MAD QP (%d)\n",
2881		event->event, qp_info->qp->qp_num);
2882}
2883
2884static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2885			   struct ib_mad_queue *mad_queue)
2886{
2887	mad_queue->qp_info = qp_info;
2888	mad_queue->count = 0;
2889	spin_lock_init(&mad_queue->lock);
2890	INIT_LIST_HEAD(&mad_queue->list);
2891}
2892
2893static void init_mad_qp(struct ib_mad_port_private *port_priv,
2894			struct ib_mad_qp_info *qp_info)
2895{
2896	qp_info->port_priv = port_priv;
2897	init_mad_queue(qp_info, &qp_info->send_queue);
2898	init_mad_queue(qp_info, &qp_info->recv_queue);
2899	INIT_LIST_HEAD(&qp_info->overflow_list);
2900}
2901
2902static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2903			 enum ib_qp_type qp_type)
2904{
2905	struct ib_qp_init_attr	qp_init_attr;
2906	int ret;
2907
2908	memset(&qp_init_attr, 0, sizeof qp_init_attr);
2909	qp_init_attr.send_cq = qp_info->port_priv->cq;
2910	qp_init_attr.recv_cq = qp_info->port_priv->cq;
2911	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2912	qp_init_attr.cap.max_send_wr = mad_sendq_size;
2913	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
2914	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2915	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2916	qp_init_attr.qp_type = qp_type;
2917	qp_init_attr.port_num = qp_info->port_priv->port_num;
2918	qp_init_attr.qp_context = qp_info;
2919	qp_init_attr.event_handler = qp_event_handler;
2920	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2921	if (IS_ERR(qp_info->qp)) {
2922		dev_err(&qp_info->port_priv->device->dev,
2923			"Couldn't create ib_mad QP%d\n",
2924			get_spl_qp_index(qp_type));
2925		ret = PTR_ERR(qp_info->qp);
2926		goto error;
2927	}
2928	/* Use minimum queue sizes unless the CQ is resized */
2929	qp_info->send_queue.max_active = mad_sendq_size;
2930	qp_info->recv_queue.max_active = mad_recvq_size;
2931	return 0;
2932
2933error:
2934	return ret;
2935}
2936
2937static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2938{
2939	if (!qp_info->qp)
2940		return;
2941
2942	ib_destroy_qp(qp_info->qp);
2943}
2944
2945/*
2946 * Open the port
2947 * Create the QP, PD, MR, and CQ if needed
2948 */
2949static int ib_mad_port_open(struct ib_device *device,
2950			    int port_num)
2951{
2952	int ret, cq_size;
2953	struct ib_mad_port_private *port_priv;
2954	unsigned long flags;
2955	char name[sizeof "ib_mad123"];
2956	int has_smi;
2957
2958	if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
2959		return -EFAULT;
2960
2961	if (WARN_ON(rdma_cap_opa_mad(device, port_num) &&
2962		    rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE))
2963		return -EFAULT;
2964
2965	/* Create new device info */
2966	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
2967	if (!port_priv)
2968		return -ENOMEM;
2969
2970	port_priv->device = device;
2971	port_priv->port_num = port_num;
2972	spin_lock_init(&port_priv->reg_lock);
2973	init_mad_qp(port_priv, &port_priv->qp_info[0]);
2974	init_mad_qp(port_priv, &port_priv->qp_info[1]);
2975
2976	cq_size = mad_sendq_size + mad_recvq_size;
2977	has_smi = rdma_cap_ib_smi(device, port_num);
2978	if (has_smi)
2979		cq_size *= 2;
2980
2981	port_priv->pd = ib_alloc_pd(device, 0);
2982	if (IS_ERR(port_priv->pd)) {
2983		dev_err(&device->dev, "Couldn't create ib_mad PD\n");
2984		ret = PTR_ERR(port_priv->pd);
2985		goto error3;
2986	}
2987
2988	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
2989			IB_POLL_UNBOUND_WORKQUEUE);
2990	if (IS_ERR(port_priv->cq)) {
2991		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
2992		ret = PTR_ERR(port_priv->cq);
2993		goto error4;
2994	}
2995
2996	if (has_smi) {
2997		ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2998		if (ret)
2999			goto error6;
3000	}
3001	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
3002	if (ret)
3003		goto error7;
3004
3005	snprintf(name, sizeof name, "ib_mad%d", port_num);
3006	port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
3007	if (!port_priv->wq) {
3008		ret = -ENOMEM;
3009		goto error8;
3010	}
3011
3012	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3013	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
3014	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3015
3016	ret = ib_mad_port_start(port_priv);
3017	if (ret) {
3018		dev_err(&device->dev, "Couldn't start port\n");
3019		goto error9;
3020	}
3021
3022	return 0;
3023
3024error9:
3025	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3026	list_del_init(&port_priv->port_list);
3027	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3028
3029	destroy_workqueue(port_priv->wq);
3030error8:
3031	destroy_mad_qp(&port_priv->qp_info[1]);
3032error7:
3033	destroy_mad_qp(&port_priv->qp_info[0]);
3034error6:
3035	ib_free_cq(port_priv->cq);
3036	cleanup_recv_queue(&port_priv->qp_info[1]);
3037	cleanup_recv_queue(&port_priv->qp_info[0]);
3038error4:
3039	ib_dealloc_pd(port_priv->pd);
3040error3:
3041	kfree(port_priv);
3042
3043	return ret;
3044}
3045
3046/*
3047 * Close the port
3048 * If there are no classes using the port, free the port
3049 * resources (CQ, MR, PD, QP) and remove the port's info structure
3050 */
3051static int ib_mad_port_close(struct ib_device *device, int port_num)
3052{
3053	struct ib_mad_port_private *port_priv;
3054	unsigned long flags;
3055
3056	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3057	port_priv = __ib_get_mad_port(device, port_num);
3058	if (port_priv == NULL) {
3059		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3060		dev_err(&device->dev, "Port %d not found\n", port_num);
3061		return -ENODEV;
3062	}
3063	list_del_init(&port_priv->port_list);
3064	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3065
3066	destroy_workqueue(port_priv->wq);
3067	destroy_mad_qp(&port_priv->qp_info[1]);
3068	destroy_mad_qp(&port_priv->qp_info[0]);
3069	ib_free_cq(port_priv->cq);
3070	ib_dealloc_pd(port_priv->pd);
3071	cleanup_recv_queue(&port_priv->qp_info[1]);
3072	cleanup_recv_queue(&port_priv->qp_info[0]);
3073	/* XXX: Handle deallocation of MAD registration tables */
3074
3075	kfree(port_priv);
3076
3077	return 0;
3078}
3079
3080static int ib_mad_init_device(struct ib_device *device)
3081{
3082	int start, i;
3083	unsigned int count = 0;
3084	int ret;
3085
3086	start = rdma_start_port(device);
3087
3088	for (i = start; i <= rdma_end_port(device); i++) {
3089		if (!rdma_cap_ib_mad(device, i))
3090			continue;
3091
3092		ret = ib_mad_port_open(device, i);
3093		if (ret) {
3094			dev_err(&device->dev, "Couldn't open port %d\n", i);
3095			goto error;
3096		}
3097		ret = ib_agent_port_open(device, i);
3098		if (ret) {
3099			dev_err(&device->dev,
3100				"Couldn't open port %d for agents\n", i);
3101			goto error_agent;
3102		}
3103		count++;
3104	}
3105	if (!count)
3106		return -EOPNOTSUPP;
3107
3108	return 0;
3109
3110error_agent:
3111	if (ib_mad_port_close(device, i))
3112		dev_err(&device->dev, "Couldn't close port %d\n", i);
3113
3114error:
3115	while (--i >= start) {
3116		if (!rdma_cap_ib_mad(device, i))
3117			continue;
3118
3119		if (ib_agent_port_close(device, i))
3120			dev_err(&device->dev,
3121				"Couldn't close port %d for agents\n", i);
3122		if (ib_mad_port_close(device, i))
3123			dev_err(&device->dev, "Couldn't close port %d\n", i);
3124	}
3125	return ret;
3126}
3127
3128static void ib_mad_remove_device(struct ib_device *device, void *client_data)
3129{
3130	unsigned int i;
3131
3132	rdma_for_each_port (device, i) {
3133		if (!rdma_cap_ib_mad(device, i))
3134			continue;
3135
3136		if (ib_agent_port_close(device, i))
3137			dev_err(&device->dev,
3138				"Couldn't close port %d for agents\n", i);
3139		if (ib_mad_port_close(device, i))
3140			dev_err(&device->dev, "Couldn't close port %d\n", i);
3141	}
3142}
3143
3144static struct ib_client mad_client = {
3145	.name   = "mad",
3146	.add = ib_mad_init_device,
3147	.remove = ib_mad_remove_device
3148};
3149
3150int ib_mad_init(void)
3151{
3152	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
3153	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
3154
3155	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
3156	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
3157
3158	INIT_LIST_HEAD(&ib_mad_port_list);
3159
3160	if (ib_register_client(&mad_client)) {
3161		pr_err("Couldn't register ib_mad client\n");
3162		return -EINVAL;
3163	}
3164
3165	return 0;
3166}
3167
3168void ib_mad_cleanup(void)
3169{
3170	ib_unregister_client(&mad_client);
3171}
3172