1/*
2 * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *        copyright notice, this list of conditions and the following
16 *        disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *        copyright notice, this list of conditions and the following
20 *        disclaimer in the documentation and/or other materials
21 *        provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32
33#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35#include <linux/module.h>
36#include <linux/init.h>
37#include <linux/slab.h>
38#include <linux/err.h>
39#include <linux/string.h>
40#include <linux/parser.h>
41#include <linux/random.h>
42#include <linux/jiffies.h>
43#include <linux/lockdep.h>
44#include <linux/inet.h>
45#include <rdma/ib_cache.h>
46
47#include <linux/atomic.h>
48
49#include <scsi/scsi.h>
50#include <scsi/scsi_device.h>
51#include <scsi/scsi_dbg.h>
52#include <scsi/scsi_tcq.h>
53#include <scsi/srp.h>
54#include <scsi/scsi_transport_srp.h>
55
56#include "ib_srp.h"
57
58#define DRV_NAME	"ib_srp"
59#define PFX		DRV_NAME ": "
60
61MODULE_AUTHOR("Roland Dreier");
62MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63MODULE_LICENSE("Dual BSD/GPL");
64
65#if !defined(CONFIG_DYNAMIC_DEBUG)
66#define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
67#define DYNAMIC_DEBUG_BRANCH(descriptor) false
68#endif
69
70static unsigned int srp_sg_tablesize;
71static unsigned int cmd_sg_entries;
72static unsigned int indirect_sg_entries;
73static bool allow_ext_sg;
74static bool register_always = true;
75static bool never_register;
76static int topspin_workarounds = 1;
77
78module_param(srp_sg_tablesize, uint, 0444);
79MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
80
81module_param(cmd_sg_entries, uint, 0444);
82MODULE_PARM_DESC(cmd_sg_entries,
83		 "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
84
85module_param(indirect_sg_entries, uint, 0444);
86MODULE_PARM_DESC(indirect_sg_entries,
87		 "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
88
89module_param(allow_ext_sg, bool, 0444);
90MODULE_PARM_DESC(allow_ext_sg,
91		  "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
92
93module_param(topspin_workarounds, int, 0444);
94MODULE_PARM_DESC(topspin_workarounds,
95		 "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
96
97module_param(register_always, bool, 0444);
98MODULE_PARM_DESC(register_always,
99		 "Use memory registration even for contiguous memory regions");
100
101module_param(never_register, bool, 0444);
102MODULE_PARM_DESC(never_register, "Never register memory");
103
104static const struct kernel_param_ops srp_tmo_ops;
105
106static int srp_reconnect_delay = 10;
107module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
108		S_IRUGO | S_IWUSR);
109MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
110
111static int srp_fast_io_fail_tmo = 15;
112module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
113		S_IRUGO | S_IWUSR);
114MODULE_PARM_DESC(fast_io_fail_tmo,
115		 "Number of seconds between the observation of a transport"
116		 " layer error and failing all I/O. \"off\" means that this"
117		 " functionality is disabled.");
118
119static int srp_dev_loss_tmo = 600;
120module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
121		S_IRUGO | S_IWUSR);
122MODULE_PARM_DESC(dev_loss_tmo,
123		 "Maximum number of seconds that the SRP transport should"
124		 " insulate transport layer errors. After this time has been"
125		 " exceeded the SCSI host is removed. Should be"
126		 " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
127		 " if fast_io_fail_tmo has not been set. \"off\" means that"
128		 " this functionality is disabled.");
129
130static bool srp_use_imm_data = true;
131module_param_named(use_imm_data, srp_use_imm_data, bool, 0644);
132MODULE_PARM_DESC(use_imm_data,
133		 "Whether or not to request permission to use immediate data during SRP login.");
134
135static unsigned int srp_max_imm_data = 8 * 1024;
136module_param_named(max_imm_data, srp_max_imm_data, uint, 0644);
137MODULE_PARM_DESC(max_imm_data, "Maximum immediate data size.");
138
139static unsigned ch_count;
140module_param(ch_count, uint, 0444);
141MODULE_PARM_DESC(ch_count,
142		 "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
143
144static int srp_add_one(struct ib_device *device);
145static void srp_remove_one(struct ib_device *device, void *client_data);
146static void srp_rename_dev(struct ib_device *device, void *client_data);
147static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
148static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
149		const char *opname);
150static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
151			     const struct ib_cm_event *event);
152static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
153			       struct rdma_cm_event *event);
154
155static struct scsi_transport_template *ib_srp_transport_template;
156static struct workqueue_struct *srp_remove_wq;
157
158static struct ib_client srp_client = {
159	.name   = "srp",
160	.add    = srp_add_one,
161	.remove = srp_remove_one,
162	.rename = srp_rename_dev
163};
164
165static struct ib_sa_client srp_sa_client;
166
167static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
168{
169	int tmo = *(int *)kp->arg;
170
171	if (tmo >= 0)
172		return sprintf(buffer, "%d\n", tmo);
173	else
174		return sprintf(buffer, "off\n");
175}
176
177static int srp_tmo_set(const char *val, const struct kernel_param *kp)
178{
179	int tmo, res;
180
181	res = srp_parse_tmo(&tmo, val);
182	if (res)
183		goto out;
184
185	if (kp->arg == &srp_reconnect_delay)
186		res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
187				    srp_dev_loss_tmo);
188	else if (kp->arg == &srp_fast_io_fail_tmo)
189		res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
190	else
191		res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
192				    tmo);
193	if (res)
194		goto out;
195	*(int *)kp->arg = tmo;
196
197out:
198	return res;
199}
200
201static const struct kernel_param_ops srp_tmo_ops = {
202	.get = srp_tmo_get,
203	.set = srp_tmo_set,
204};
205
206static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
207{
208	return (struct srp_target_port *) host->hostdata;
209}
210
211static const char *srp_target_info(struct Scsi_Host *host)
212{
213	return host_to_target(host)->target_name;
214}
215
216static int srp_target_is_topspin(struct srp_target_port *target)
217{
218	static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
219	static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
220
221	return topspin_workarounds &&
222		(!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
223		 !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
224}
225
226static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
227				   gfp_t gfp_mask,
228				   enum dma_data_direction direction)
229{
230	struct srp_iu *iu;
231
232	iu = kmalloc(sizeof *iu, gfp_mask);
233	if (!iu)
234		goto out;
235
236	iu->buf = kzalloc(size, gfp_mask);
237	if (!iu->buf)
238		goto out_free_iu;
239
240	iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
241				    direction);
242	if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
243		goto out_free_buf;
244
245	iu->size      = size;
246	iu->direction = direction;
247
248	return iu;
249
250out_free_buf:
251	kfree(iu->buf);
252out_free_iu:
253	kfree(iu);
254out:
255	return NULL;
256}
257
258static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
259{
260	if (!iu)
261		return;
262
263	ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
264			    iu->direction);
265	kfree(iu->buf);
266	kfree(iu);
267}
268
269static void srp_qp_event(struct ib_event *event, void *context)
270{
271	pr_debug("QP event %s (%d)\n",
272		 ib_event_msg(event->event), event->event);
273}
274
275static int srp_init_ib_qp(struct srp_target_port *target,
276			  struct ib_qp *qp)
277{
278	struct ib_qp_attr *attr;
279	int ret;
280
281	attr = kmalloc(sizeof *attr, GFP_KERNEL);
282	if (!attr)
283		return -ENOMEM;
284
285	ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
286				  target->srp_host->port,
287				  be16_to_cpu(target->ib_cm.pkey),
288				  &attr->pkey_index);
289	if (ret)
290		goto out;
291
292	attr->qp_state        = IB_QPS_INIT;
293	attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
294				    IB_ACCESS_REMOTE_WRITE);
295	attr->port_num        = target->srp_host->port;
296
297	ret = ib_modify_qp(qp, attr,
298			   IB_QP_STATE		|
299			   IB_QP_PKEY_INDEX	|
300			   IB_QP_ACCESS_FLAGS	|
301			   IB_QP_PORT);
302
303out:
304	kfree(attr);
305	return ret;
306}
307
308static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
309{
310	struct srp_target_port *target = ch->target;
311	struct ib_cm_id *new_cm_id;
312
313	new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
314				    srp_ib_cm_handler, ch);
315	if (IS_ERR(new_cm_id))
316		return PTR_ERR(new_cm_id);
317
318	if (ch->ib_cm.cm_id)
319		ib_destroy_cm_id(ch->ib_cm.cm_id);
320	ch->ib_cm.cm_id = new_cm_id;
321	if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
322			    target->srp_host->port))
323		ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
324	else
325		ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
326	ch->ib_cm.path.sgid = target->sgid;
327	ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
328	ch->ib_cm.path.pkey = target->ib_cm.pkey;
329	ch->ib_cm.path.service_id = target->ib_cm.service_id;
330
331	return 0;
332}
333
334static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
335{
336	struct srp_target_port *target = ch->target;
337	struct rdma_cm_id *new_cm_id;
338	int ret;
339
340	new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
341				   RDMA_PS_TCP, IB_QPT_RC);
342	if (IS_ERR(new_cm_id)) {
343		ret = PTR_ERR(new_cm_id);
344		new_cm_id = NULL;
345		goto out;
346	}
347
348	init_completion(&ch->done);
349	ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
350				&target->rdma_cm.src.sa : NULL,
351				&target->rdma_cm.dst.sa,
352				SRP_PATH_REC_TIMEOUT_MS);
353	if (ret) {
354		pr_err("No route available from %pISpsc to %pISpsc (%d)\n",
355		       &target->rdma_cm.src, &target->rdma_cm.dst, ret);
356		goto out;
357	}
358	ret = wait_for_completion_interruptible(&ch->done);
359	if (ret < 0)
360		goto out;
361
362	ret = ch->status;
363	if (ret) {
364		pr_err("Resolving address %pISpsc failed (%d)\n",
365		       &target->rdma_cm.dst, ret);
366		goto out;
367	}
368
369	swap(ch->rdma_cm.cm_id, new_cm_id);
370
371out:
372	if (new_cm_id)
373		rdma_destroy_id(new_cm_id);
374
375	return ret;
376}
377
378static int srp_new_cm_id(struct srp_rdma_ch *ch)
379{
380	struct srp_target_port *target = ch->target;
381
382	return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
383		srp_new_ib_cm_id(ch);
384}
385
386/**
387 * srp_destroy_fr_pool() - free the resources owned by a pool
388 * @pool: Fast registration pool to be destroyed.
389 */
390static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
391{
392	int i;
393	struct srp_fr_desc *d;
394
395	if (!pool)
396		return;
397
398	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
399		if (d->mr)
400			ib_dereg_mr(d->mr);
401	}
402	kfree(pool);
403}
404
405/**
406 * srp_create_fr_pool() - allocate and initialize a pool for fast registration
407 * @device:            IB device to allocate fast registration descriptors for.
408 * @pd:                Protection domain associated with the FR descriptors.
409 * @pool_size:         Number of descriptors to allocate.
410 * @max_page_list_len: Maximum fast registration work request page list length.
411 */
412static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
413					      struct ib_pd *pd, int pool_size,
414					      int max_page_list_len)
415{
416	struct srp_fr_pool *pool;
417	struct srp_fr_desc *d;
418	struct ib_mr *mr;
419	int i, ret = -EINVAL;
420	enum ib_mr_type mr_type;
421
422	if (pool_size <= 0)
423		goto err;
424	ret = -ENOMEM;
425	pool = kzalloc(struct_size(pool, desc, pool_size), GFP_KERNEL);
426	if (!pool)
427		goto err;
428	pool->size = pool_size;
429	pool->max_page_list_len = max_page_list_len;
430	spin_lock_init(&pool->lock);
431	INIT_LIST_HEAD(&pool->free_list);
432
433	if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
434		mr_type = IB_MR_TYPE_SG_GAPS;
435	else
436		mr_type = IB_MR_TYPE_MEM_REG;
437
438	for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
439		mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
440		if (IS_ERR(mr)) {
441			ret = PTR_ERR(mr);
442			if (ret == -ENOMEM)
443				pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
444					dev_name(&device->dev));
445			goto destroy_pool;
446		}
447		d->mr = mr;
448		list_add_tail(&d->entry, &pool->free_list);
449	}
450
451out:
452	return pool;
453
454destroy_pool:
455	srp_destroy_fr_pool(pool);
456
457err:
458	pool = ERR_PTR(ret);
459	goto out;
460}
461
462/**
463 * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
464 * @pool: Pool to obtain descriptor from.
465 */
466static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
467{
468	struct srp_fr_desc *d = NULL;
469	unsigned long flags;
470
471	spin_lock_irqsave(&pool->lock, flags);
472	if (!list_empty(&pool->free_list)) {
473		d = list_first_entry(&pool->free_list, typeof(*d), entry);
474		list_del(&d->entry);
475	}
476	spin_unlock_irqrestore(&pool->lock, flags);
477
478	return d;
479}
480
481/**
482 * srp_fr_pool_put() - put an FR descriptor back in the free list
483 * @pool: Pool the descriptor was allocated from.
484 * @desc: Pointer to an array of fast registration descriptor pointers.
485 * @n:    Number of descriptors to put back.
486 *
487 * Note: The caller must already have queued an invalidation request for
488 * desc->mr->rkey before calling this function.
489 */
490static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
491			    int n)
492{
493	unsigned long flags;
494	int i;
495
496	spin_lock_irqsave(&pool->lock, flags);
497	for (i = 0; i < n; i++)
498		list_add(&desc[i]->entry, &pool->free_list);
499	spin_unlock_irqrestore(&pool->lock, flags);
500}
501
502static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
503{
504	struct srp_device *dev = target->srp_host->srp_dev;
505
506	return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
507				  dev->max_pages_per_mr);
508}
509
510/**
511 * srp_destroy_qp() - destroy an RDMA queue pair
512 * @ch: SRP RDMA channel.
513 *
514 * Drain the qp before destroying it.  This avoids that the receive
515 * completion handler can access the queue pair while it is
516 * being destroyed.
517 */
518static void srp_destroy_qp(struct srp_rdma_ch *ch)
519{
520	spin_lock_irq(&ch->lock);
521	ib_process_cq_direct(ch->send_cq, -1);
522	spin_unlock_irq(&ch->lock);
523
524	ib_drain_qp(ch->qp);
525	ib_destroy_qp(ch->qp);
526}
527
528static int srp_create_ch_ib(struct srp_rdma_ch *ch)
529{
530	struct srp_target_port *target = ch->target;
531	struct srp_device *dev = target->srp_host->srp_dev;
532	const struct ib_device_attr *attr = &dev->dev->attrs;
533	struct ib_qp_init_attr *init_attr;
534	struct ib_cq *recv_cq, *send_cq;
535	struct ib_qp *qp;
536	struct srp_fr_pool *fr_pool = NULL;
537	const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
538	int ret;
539
540	init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
541	if (!init_attr)
542		return -ENOMEM;
543
544	/* queue_size + 1 for ib_drain_rq() */
545	recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
546				ch->comp_vector, IB_POLL_SOFTIRQ);
547	if (IS_ERR(recv_cq)) {
548		ret = PTR_ERR(recv_cq);
549		goto err;
550	}
551
552	send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
553				ch->comp_vector, IB_POLL_DIRECT);
554	if (IS_ERR(send_cq)) {
555		ret = PTR_ERR(send_cq);
556		goto err_recv_cq;
557	}
558
559	init_attr->event_handler       = srp_qp_event;
560	init_attr->cap.max_send_wr     = m * target->queue_size;
561	init_attr->cap.max_recv_wr     = target->queue_size + 1;
562	init_attr->cap.max_recv_sge    = 1;
563	init_attr->cap.max_send_sge    = min(SRP_MAX_SGE, attr->max_send_sge);
564	init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
565	init_attr->qp_type             = IB_QPT_RC;
566	init_attr->send_cq             = send_cq;
567	init_attr->recv_cq             = recv_cq;
568
569	ch->max_imm_sge = min(init_attr->cap.max_send_sge - 1U, 255U);
570
571	if (target->using_rdma_cm) {
572		ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
573		qp = ch->rdma_cm.cm_id->qp;
574	} else {
575		qp = ib_create_qp(dev->pd, init_attr);
576		if (!IS_ERR(qp)) {
577			ret = srp_init_ib_qp(target, qp);
578			if (ret)
579				ib_destroy_qp(qp);
580		} else {
581			ret = PTR_ERR(qp);
582		}
583	}
584	if (ret) {
585		pr_err("QP creation failed for dev %s: %d\n",
586		       dev_name(&dev->dev->dev), ret);
587		goto err_send_cq;
588	}
589
590	if (dev->use_fast_reg) {
591		fr_pool = srp_alloc_fr_pool(target);
592		if (IS_ERR(fr_pool)) {
593			ret = PTR_ERR(fr_pool);
594			shost_printk(KERN_WARNING, target->scsi_host, PFX
595				     "FR pool allocation failed (%d)\n", ret);
596			goto err_qp;
597		}
598	}
599
600	if (ch->qp)
601		srp_destroy_qp(ch);
602	if (ch->recv_cq)
603		ib_free_cq(ch->recv_cq);
604	if (ch->send_cq)
605		ib_free_cq(ch->send_cq);
606
607	ch->qp = qp;
608	ch->recv_cq = recv_cq;
609	ch->send_cq = send_cq;
610
611	if (dev->use_fast_reg) {
612		if (ch->fr_pool)
613			srp_destroy_fr_pool(ch->fr_pool);
614		ch->fr_pool = fr_pool;
615	}
616
617	kfree(init_attr);
618	return 0;
619
620err_qp:
621	if (target->using_rdma_cm)
622		rdma_destroy_qp(ch->rdma_cm.cm_id);
623	else
624		ib_destroy_qp(qp);
625
626err_send_cq:
627	ib_free_cq(send_cq);
628
629err_recv_cq:
630	ib_free_cq(recv_cq);
631
632err:
633	kfree(init_attr);
634	return ret;
635}
636
637/*
638 * Note: this function may be called without srp_alloc_iu_bufs() having been
639 * invoked. Hence the ch->[rt]x_ring checks.
640 */
641static void srp_free_ch_ib(struct srp_target_port *target,
642			   struct srp_rdma_ch *ch)
643{
644	struct srp_device *dev = target->srp_host->srp_dev;
645	int i;
646
647	if (!ch->target)
648		return;
649
650	if (target->using_rdma_cm) {
651		if (ch->rdma_cm.cm_id) {
652			rdma_destroy_id(ch->rdma_cm.cm_id);
653			ch->rdma_cm.cm_id = NULL;
654		}
655	} else {
656		if (ch->ib_cm.cm_id) {
657			ib_destroy_cm_id(ch->ib_cm.cm_id);
658			ch->ib_cm.cm_id = NULL;
659		}
660	}
661
662	/* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
663	if (!ch->qp)
664		return;
665
666	if (dev->use_fast_reg) {
667		if (ch->fr_pool)
668			srp_destroy_fr_pool(ch->fr_pool);
669	}
670
671	srp_destroy_qp(ch);
672	ib_free_cq(ch->send_cq);
673	ib_free_cq(ch->recv_cq);
674
675	/*
676	 * Avoid that the SCSI error handler tries to use this channel after
677	 * it has been freed. The SCSI error handler can namely continue
678	 * trying to perform recovery actions after scsi_remove_host()
679	 * returned.
680	 */
681	ch->target = NULL;
682
683	ch->qp = NULL;
684	ch->send_cq = ch->recv_cq = NULL;
685
686	if (ch->rx_ring) {
687		for (i = 0; i < target->queue_size; ++i)
688			srp_free_iu(target->srp_host, ch->rx_ring[i]);
689		kfree(ch->rx_ring);
690		ch->rx_ring = NULL;
691	}
692	if (ch->tx_ring) {
693		for (i = 0; i < target->queue_size; ++i)
694			srp_free_iu(target->srp_host, ch->tx_ring[i]);
695		kfree(ch->tx_ring);
696		ch->tx_ring = NULL;
697	}
698}
699
700static void srp_path_rec_completion(int status,
701				    struct sa_path_rec *pathrec,
702				    void *ch_ptr)
703{
704	struct srp_rdma_ch *ch = ch_ptr;
705	struct srp_target_port *target = ch->target;
706
707	ch->status = status;
708	if (status)
709		shost_printk(KERN_ERR, target->scsi_host,
710			     PFX "Got failed path rec status %d\n", status);
711	else
712		ch->ib_cm.path = *pathrec;
713	complete(&ch->done);
714}
715
716static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
717{
718	struct srp_target_port *target = ch->target;
719	int ret;
720
721	ch->ib_cm.path.numb_path = 1;
722
723	init_completion(&ch->done);
724
725	ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
726					       target->srp_host->srp_dev->dev,
727					       target->srp_host->port,
728					       &ch->ib_cm.path,
729					       IB_SA_PATH_REC_SERVICE_ID |
730					       IB_SA_PATH_REC_DGID	 |
731					       IB_SA_PATH_REC_SGID	 |
732					       IB_SA_PATH_REC_NUMB_PATH	 |
733					       IB_SA_PATH_REC_PKEY,
734					       SRP_PATH_REC_TIMEOUT_MS,
735					       GFP_KERNEL,
736					       srp_path_rec_completion,
737					       ch, &ch->ib_cm.path_query);
738	if (ch->ib_cm.path_query_id < 0)
739		return ch->ib_cm.path_query_id;
740
741	ret = wait_for_completion_interruptible(&ch->done);
742	if (ret < 0)
743		return ret;
744
745	if (ch->status < 0)
746		shost_printk(KERN_WARNING, target->scsi_host,
747			     PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
748			     ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
749			     be16_to_cpu(target->ib_cm.pkey),
750			     be64_to_cpu(target->ib_cm.service_id));
751
752	return ch->status;
753}
754
755static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
756{
757	struct srp_target_port *target = ch->target;
758	int ret;
759
760	init_completion(&ch->done);
761
762	ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
763	if (ret)
764		return ret;
765
766	wait_for_completion_interruptible(&ch->done);
767
768	if (ch->status != 0)
769		shost_printk(KERN_WARNING, target->scsi_host,
770			     PFX "Path resolution failed\n");
771
772	return ch->status;
773}
774
775static int srp_lookup_path(struct srp_rdma_ch *ch)
776{
777	struct srp_target_port *target = ch->target;
778
779	return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
780		srp_ib_lookup_path(ch);
781}
782
783static u8 srp_get_subnet_timeout(struct srp_host *host)
784{
785	struct ib_port_attr attr;
786	int ret;
787	u8 subnet_timeout = 18;
788
789	ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
790	if (ret == 0)
791		subnet_timeout = attr.subnet_timeout;
792
793	if (unlikely(subnet_timeout < 15))
794		pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
795			dev_name(&host->srp_dev->dev->dev), subnet_timeout);
796
797	return subnet_timeout;
798}
799
800static int srp_send_req(struct srp_rdma_ch *ch, uint32_t max_iu_len,
801			bool multich)
802{
803	struct srp_target_port *target = ch->target;
804	struct {
805		struct rdma_conn_param	  rdma_param;
806		struct srp_login_req_rdma rdma_req;
807		struct ib_cm_req_param	  ib_param;
808		struct srp_login_req	  ib_req;
809	} *req = NULL;
810	char *ipi, *tpi;
811	int status;
812
813	req = kzalloc(sizeof *req, GFP_KERNEL);
814	if (!req)
815		return -ENOMEM;
816
817	req->ib_param.flow_control = 1;
818	req->ib_param.retry_count = target->tl_retry_count;
819
820	/*
821	 * Pick some arbitrary defaults here; we could make these
822	 * module parameters if anyone cared about setting them.
823	 */
824	req->ib_param.responder_resources = 4;
825	req->ib_param.rnr_retry_count = 7;
826	req->ib_param.max_cm_retries = 15;
827
828	req->ib_req.opcode = SRP_LOGIN_REQ;
829	req->ib_req.tag = 0;
830	req->ib_req.req_it_iu_len = cpu_to_be32(max_iu_len);
831	req->ib_req.req_buf_fmt	= cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
832					      SRP_BUF_FORMAT_INDIRECT);
833	req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
834				 SRP_MULTICHAN_SINGLE);
835	if (srp_use_imm_data) {
836		req->ib_req.req_flags |= SRP_IMMED_REQUESTED;
837		req->ib_req.imm_data_offset = cpu_to_be16(SRP_IMM_DATA_OFFSET);
838	}
839
840	if (target->using_rdma_cm) {
841		req->rdma_param.flow_control = req->ib_param.flow_control;
842		req->rdma_param.responder_resources =
843			req->ib_param.responder_resources;
844		req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
845		req->rdma_param.retry_count = req->ib_param.retry_count;
846		req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
847		req->rdma_param.private_data = &req->rdma_req;
848		req->rdma_param.private_data_len = sizeof(req->rdma_req);
849
850		req->rdma_req.opcode = req->ib_req.opcode;
851		req->rdma_req.tag = req->ib_req.tag;
852		req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
853		req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
854		req->rdma_req.req_flags	= req->ib_req.req_flags;
855		req->rdma_req.imm_data_offset = req->ib_req.imm_data_offset;
856
857		ipi = req->rdma_req.initiator_port_id;
858		tpi = req->rdma_req.target_port_id;
859	} else {
860		u8 subnet_timeout;
861
862		subnet_timeout = srp_get_subnet_timeout(target->srp_host);
863
864		req->ib_param.primary_path = &ch->ib_cm.path;
865		req->ib_param.alternate_path = NULL;
866		req->ib_param.service_id = target->ib_cm.service_id;
867		get_random_bytes(&req->ib_param.starting_psn, 4);
868		req->ib_param.starting_psn &= 0xffffff;
869		req->ib_param.qp_num = ch->qp->qp_num;
870		req->ib_param.qp_type = ch->qp->qp_type;
871		req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
872		req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
873		req->ib_param.private_data = &req->ib_req;
874		req->ib_param.private_data_len = sizeof(req->ib_req);
875
876		ipi = req->ib_req.initiator_port_id;
877		tpi = req->ib_req.target_port_id;
878	}
879
880	/*
881	 * In the published SRP specification (draft rev. 16a), the
882	 * port identifier format is 8 bytes of ID extension followed
883	 * by 8 bytes of GUID.  Older drafts put the two halves in the
884	 * opposite order, so that the GUID comes first.
885	 *
886	 * Targets conforming to these obsolete drafts can be
887	 * recognized by the I/O Class they report.
888	 */
889	if (target->io_class == SRP_REV10_IB_IO_CLASS) {
890		memcpy(ipi,     &target->sgid.global.interface_id, 8);
891		memcpy(ipi + 8, &target->initiator_ext, 8);
892		memcpy(tpi,     &target->ioc_guid, 8);
893		memcpy(tpi + 8, &target->id_ext, 8);
894	} else {
895		memcpy(ipi,     &target->initiator_ext, 8);
896		memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
897		memcpy(tpi,     &target->id_ext, 8);
898		memcpy(tpi + 8, &target->ioc_guid, 8);
899	}
900
901	/*
902	 * Topspin/Cisco SRP targets will reject our login unless we
903	 * zero out the first 8 bytes of our initiator port ID and set
904	 * the second 8 bytes to the local node GUID.
905	 */
906	if (srp_target_is_topspin(target)) {
907		shost_printk(KERN_DEBUG, target->scsi_host,
908			     PFX "Topspin/Cisco initiator port ID workaround "
909			     "activated for target GUID %016llx\n",
910			     be64_to_cpu(target->ioc_guid));
911		memset(ipi, 0, 8);
912		memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
913	}
914
915	if (target->using_rdma_cm)
916		status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
917	else
918		status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
919
920	kfree(req);
921
922	return status;
923}
924
925static bool srp_queue_remove_work(struct srp_target_port *target)
926{
927	bool changed = false;
928
929	spin_lock_irq(&target->lock);
930	if (target->state != SRP_TARGET_REMOVED) {
931		target->state = SRP_TARGET_REMOVED;
932		changed = true;
933	}
934	spin_unlock_irq(&target->lock);
935
936	if (changed)
937		queue_work(srp_remove_wq, &target->remove_work);
938
939	return changed;
940}
941
942static void srp_disconnect_target(struct srp_target_port *target)
943{
944	struct srp_rdma_ch *ch;
945	int i, ret;
946
947	/* XXX should send SRP_I_LOGOUT request */
948
949	for (i = 0; i < target->ch_count; i++) {
950		ch = &target->ch[i];
951		ch->connected = false;
952		ret = 0;
953		if (target->using_rdma_cm) {
954			if (ch->rdma_cm.cm_id)
955				rdma_disconnect(ch->rdma_cm.cm_id);
956		} else {
957			if (ch->ib_cm.cm_id)
958				ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
959						      NULL, 0);
960		}
961		if (ret < 0) {
962			shost_printk(KERN_DEBUG, target->scsi_host,
963				     PFX "Sending CM DREQ failed\n");
964		}
965	}
966}
967
968static int srp_exit_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
969{
970	struct srp_target_port *target = host_to_target(shost);
971	struct srp_device *dev = target->srp_host->srp_dev;
972	struct ib_device *ibdev = dev->dev;
973	struct srp_request *req = scsi_cmd_priv(cmd);
974
975	kfree(req->fr_list);
976	if (req->indirect_dma_addr) {
977		ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
978				    target->indirect_size,
979				    DMA_TO_DEVICE);
980	}
981	kfree(req->indirect_desc);
982
983	return 0;
984}
985
986static int srp_init_cmd_priv(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
987{
988	struct srp_target_port *target = host_to_target(shost);
989	struct srp_device *srp_dev = target->srp_host->srp_dev;
990	struct ib_device *ibdev = srp_dev->dev;
991	struct srp_request *req = scsi_cmd_priv(cmd);
992	dma_addr_t dma_addr;
993	int ret = -ENOMEM;
994
995	if (srp_dev->use_fast_reg) {
996		req->fr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *),
997					GFP_KERNEL);
998		if (!req->fr_list)
999			goto out;
1000	}
1001	req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
1002	if (!req->indirect_desc)
1003		goto out;
1004
1005	dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1006				     target->indirect_size,
1007				     DMA_TO_DEVICE);
1008	if (ib_dma_mapping_error(ibdev, dma_addr)) {
1009		srp_exit_cmd_priv(shost, cmd);
1010		goto out;
1011	}
1012
1013	req->indirect_dma_addr = dma_addr;
1014	ret = 0;
1015
1016out:
1017	return ret;
1018}
1019
1020/**
1021 * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1022 * @shost: SCSI host whose attributes to remove from sysfs.
1023 *
1024 * Note: Any attributes defined in the host template and that did not exist
1025 * before invocation of this function will be ignored.
1026 */
1027static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1028{
1029	struct device_attribute **attr;
1030
1031	for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
1032		device_remove_file(&shost->shost_dev, *attr);
1033}
1034
1035static void srp_remove_target(struct srp_target_port *target)
1036{
1037	struct srp_rdma_ch *ch;
1038	int i;
1039
1040	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1041
1042	srp_del_scsi_host_attr(target->scsi_host);
1043	srp_rport_get(target->rport);
1044	srp_remove_host(target->scsi_host);
1045	scsi_remove_host(target->scsi_host);
1046	srp_stop_rport_timers(target->rport);
1047	srp_disconnect_target(target);
1048	kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1049	for (i = 0; i < target->ch_count; i++) {
1050		ch = &target->ch[i];
1051		srp_free_ch_ib(target, ch);
1052	}
1053	cancel_work_sync(&target->tl_err_work);
1054	srp_rport_put(target->rport);
1055	kfree(target->ch);
1056	target->ch = NULL;
1057
1058	spin_lock(&target->srp_host->target_lock);
1059	list_del(&target->list);
1060	spin_unlock(&target->srp_host->target_lock);
1061
1062	scsi_host_put(target->scsi_host);
1063}
1064
1065static void srp_remove_work(struct work_struct *work)
1066{
1067	struct srp_target_port *target =
1068		container_of(work, struct srp_target_port, remove_work);
1069
1070	WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1071
1072	srp_remove_target(target);
1073}
1074
1075static void srp_rport_delete(struct srp_rport *rport)
1076{
1077	struct srp_target_port *target = rport->lld_data;
1078
1079	srp_queue_remove_work(target);
1080}
1081
1082/**
1083 * srp_connected_ch() - number of connected channels
1084 * @target: SRP target port.
1085 */
1086static int srp_connected_ch(struct srp_target_port *target)
1087{
1088	int i, c = 0;
1089
1090	for (i = 0; i < target->ch_count; i++)
1091		c += target->ch[i].connected;
1092
1093	return c;
1094}
1095
1096static int srp_connect_ch(struct srp_rdma_ch *ch, uint32_t max_iu_len,
1097			  bool multich)
1098{
1099	struct srp_target_port *target = ch->target;
1100	int ret;
1101
1102	WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1103
1104	ret = srp_lookup_path(ch);
1105	if (ret)
1106		goto out;
1107
1108	while (1) {
1109		init_completion(&ch->done);
1110		ret = srp_send_req(ch, max_iu_len, multich);
1111		if (ret)
1112			goto out;
1113		ret = wait_for_completion_interruptible(&ch->done);
1114		if (ret < 0)
1115			goto out;
1116
1117		/*
1118		 * The CM event handling code will set status to
1119		 * SRP_PORT_REDIRECT if we get a port redirect REJ
1120		 * back, or SRP_DLID_REDIRECT if we get a lid/qp
1121		 * redirect REJ back.
1122		 */
1123		ret = ch->status;
1124		switch (ret) {
1125		case 0:
1126			ch->connected = true;
1127			goto out;
1128
1129		case SRP_PORT_REDIRECT:
1130			ret = srp_lookup_path(ch);
1131			if (ret)
1132				goto out;
1133			break;
1134
1135		case SRP_DLID_REDIRECT:
1136			break;
1137
1138		case SRP_STALE_CONN:
1139			shost_printk(KERN_ERR, target->scsi_host, PFX
1140				     "giving up on stale connection\n");
1141			ret = -ECONNRESET;
1142			goto out;
1143
1144		default:
1145			goto out;
1146		}
1147	}
1148
1149out:
1150	return ret <= 0 ? ret : -ENODEV;
1151}
1152
1153static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1154{
1155	srp_handle_qp_err(cq, wc, "INV RKEY");
1156}
1157
1158static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1159		u32 rkey)
1160{
1161	struct ib_send_wr wr = {
1162		.opcode		    = IB_WR_LOCAL_INV,
1163		.next		    = NULL,
1164		.num_sge	    = 0,
1165		.send_flags	    = 0,
1166		.ex.invalidate_rkey = rkey,
1167	};
1168
1169	wr.wr_cqe = &req->reg_cqe;
1170	req->reg_cqe.done = srp_inv_rkey_err_done;
1171	return ib_post_send(ch->qp, &wr, NULL);
1172}
1173
1174static void srp_unmap_data(struct scsi_cmnd *scmnd,
1175			   struct srp_rdma_ch *ch,
1176			   struct srp_request *req)
1177{
1178	struct srp_target_port *target = ch->target;
1179	struct srp_device *dev = target->srp_host->srp_dev;
1180	struct ib_device *ibdev = dev->dev;
1181	int i, res;
1182
1183	if (!scsi_sglist(scmnd) ||
1184	    (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1185	     scmnd->sc_data_direction != DMA_FROM_DEVICE))
1186		return;
1187
1188	if (dev->use_fast_reg) {
1189		struct srp_fr_desc **pfr;
1190
1191		for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1192			res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1193			if (res < 0) {
1194				shost_printk(KERN_ERR, target->scsi_host, PFX
1195				  "Queueing INV WR for rkey %#x failed (%d)\n",
1196				  (*pfr)->mr->rkey, res);
1197				queue_work(system_long_wq,
1198					   &target->tl_err_work);
1199			}
1200		}
1201		if (req->nmdesc)
1202			srp_fr_pool_put(ch->fr_pool, req->fr_list,
1203					req->nmdesc);
1204	}
1205
1206	ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1207			scmnd->sc_data_direction);
1208}
1209
1210/**
1211 * srp_claim_req - Take ownership of the scmnd associated with a request.
1212 * @ch: SRP RDMA channel.
1213 * @req: SRP request.
1214 * @sdev: If not NULL, only take ownership for this SCSI device.
1215 * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1216 *         ownership of @req->scmnd if it equals @scmnd.
1217 *
1218 * Return value:
1219 * Either NULL or a pointer to the SCSI command the caller became owner of.
1220 */
1221static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1222				       struct srp_request *req,
1223				       struct scsi_device *sdev,
1224				       struct scsi_cmnd *scmnd)
1225{
1226	unsigned long flags;
1227
1228	spin_lock_irqsave(&ch->lock, flags);
1229	if (req->scmnd &&
1230	    (!sdev || req->scmnd->device == sdev) &&
1231	    (!scmnd || req->scmnd == scmnd)) {
1232		scmnd = req->scmnd;
1233		req->scmnd = NULL;
1234	} else {
1235		scmnd = NULL;
1236	}
1237	spin_unlock_irqrestore(&ch->lock, flags);
1238
1239	return scmnd;
1240}
1241
1242/**
1243 * srp_free_req() - Unmap data and adjust ch->req_lim.
1244 * @ch:     SRP RDMA channel.
1245 * @req:    Request to be freed.
1246 * @scmnd:  SCSI command associated with @req.
1247 * @req_lim_delta: Amount to be added to @target->req_lim.
1248 */
1249static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1250			 struct scsi_cmnd *scmnd, s32 req_lim_delta)
1251{
1252	unsigned long flags;
1253
1254	srp_unmap_data(scmnd, ch, req);
1255
1256	spin_lock_irqsave(&ch->lock, flags);
1257	ch->req_lim += req_lim_delta;
1258	spin_unlock_irqrestore(&ch->lock, flags);
1259}
1260
1261static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1262			   struct scsi_device *sdev, int result)
1263{
1264	struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1265
1266	if (scmnd) {
1267		srp_free_req(ch, req, scmnd, 0);
1268		scmnd->result = result;
1269		scmnd->scsi_done(scmnd);
1270	}
1271}
1272
1273struct srp_terminate_context {
1274	struct srp_target_port *srp_target;
1275	int scsi_result;
1276};
1277
1278static bool srp_terminate_cmd(struct scsi_cmnd *scmnd, void *context_ptr,
1279			      bool reserved)
1280{
1281	struct srp_terminate_context *context = context_ptr;
1282	struct srp_target_port *target = context->srp_target;
1283	u32 tag = blk_mq_unique_tag(scmnd->request);
1284	struct srp_rdma_ch *ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
1285	struct srp_request *req = scsi_cmd_priv(scmnd);
1286
1287	srp_finish_req(ch, req, NULL, context->scsi_result);
1288
1289	return true;
1290}
1291
1292static void srp_terminate_io(struct srp_rport *rport)
1293{
1294	struct srp_target_port *target = rport->lld_data;
1295	struct srp_terminate_context context = { .srp_target = target,
1296		.scsi_result = DID_TRANSPORT_FAILFAST << 16 };
1297
1298	scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd, &context);
1299}
1300
1301/* Calculate maximum initiator to target information unit length. */
1302static uint32_t srp_max_it_iu_len(int cmd_sg_cnt, bool use_imm_data,
1303				  uint32_t max_it_iu_size)
1304{
1305	uint32_t max_iu_len = sizeof(struct srp_cmd) + SRP_MAX_ADD_CDB_LEN +
1306		sizeof(struct srp_indirect_buf) +
1307		cmd_sg_cnt * sizeof(struct srp_direct_buf);
1308
1309	if (use_imm_data)
1310		max_iu_len = max(max_iu_len, SRP_IMM_DATA_OFFSET +
1311				 srp_max_imm_data);
1312
1313	if (max_it_iu_size)
1314		max_iu_len = min(max_iu_len, max_it_iu_size);
1315
1316	pr_debug("max_iu_len = %d\n", max_iu_len);
1317
1318	return max_iu_len;
1319}
1320
1321/*
1322 * It is up to the caller to ensure that srp_rport_reconnect() calls are
1323 * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1324 * srp_reset_device() or srp_reset_host() calls will occur while this function
1325 * is in progress. One way to realize that is not to call this function
1326 * directly but to call srp_reconnect_rport() instead since that last function
1327 * serializes calls of this function via rport->mutex and also blocks
1328 * srp_queuecommand() calls before invoking this function.
1329 */
1330static int srp_rport_reconnect(struct srp_rport *rport)
1331{
1332	struct srp_target_port *target = rport->lld_data;
1333	struct srp_rdma_ch *ch;
1334	uint32_t max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
1335						srp_use_imm_data,
1336						target->max_it_iu_size);
1337	int i, j, ret = 0;
1338	bool multich = false;
1339
1340	srp_disconnect_target(target);
1341
1342	if (target->state == SRP_TARGET_SCANNING)
1343		return -ENODEV;
1344
1345	/*
1346	 * Now get a new local CM ID so that we avoid confusing the target in
1347	 * case things are really fouled up. Doing so also ensures that all CM
1348	 * callbacks will have finished before a new QP is allocated.
1349	 */
1350	for (i = 0; i < target->ch_count; i++) {
1351		ch = &target->ch[i];
1352		ret += srp_new_cm_id(ch);
1353	}
1354	{
1355		struct srp_terminate_context context = {
1356			.srp_target = target, .scsi_result = DID_RESET << 16};
1357
1358		scsi_host_busy_iter(target->scsi_host, srp_terminate_cmd,
1359				    &context);
1360	}
1361	for (i = 0; i < target->ch_count; i++) {
1362		ch = &target->ch[i];
1363		/*
1364		 * Whether or not creating a new CM ID succeeded, create a new
1365		 * QP. This guarantees that all completion callback function
1366		 * invocations have finished before request resetting starts.
1367		 */
1368		ret += srp_create_ch_ib(ch);
1369
1370		INIT_LIST_HEAD(&ch->free_tx);
1371		for (j = 0; j < target->queue_size; ++j)
1372			list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1373	}
1374
1375	target->qp_in_error = false;
1376
1377	for (i = 0; i < target->ch_count; i++) {
1378		ch = &target->ch[i];
1379		if (ret)
1380			break;
1381		ret = srp_connect_ch(ch, max_iu_len, multich);
1382		multich = true;
1383	}
1384
1385	if (ret == 0)
1386		shost_printk(KERN_INFO, target->scsi_host,
1387			     PFX "reconnect succeeded\n");
1388
1389	return ret;
1390}
1391
1392static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1393			 unsigned int dma_len, u32 rkey)
1394{
1395	struct srp_direct_buf *desc = state->desc;
1396
1397	WARN_ON_ONCE(!dma_len);
1398
1399	desc->va = cpu_to_be64(dma_addr);
1400	desc->key = cpu_to_be32(rkey);
1401	desc->len = cpu_to_be32(dma_len);
1402
1403	state->total_len += dma_len;
1404	state->desc++;
1405	state->ndesc++;
1406}
1407
1408static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1409{
1410	srp_handle_qp_err(cq, wc, "FAST REG");
1411}
1412
1413/*
1414 * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1415 * where to start in the first element. If sg_offset_p != NULL then
1416 * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1417 * byte that has not yet been mapped.
1418 */
1419static int srp_map_finish_fr(struct srp_map_state *state,
1420			     struct srp_request *req,
1421			     struct srp_rdma_ch *ch, int sg_nents,
1422			     unsigned int *sg_offset_p)
1423{
1424	struct srp_target_port *target = ch->target;
1425	struct srp_device *dev = target->srp_host->srp_dev;
1426	struct ib_reg_wr wr;
1427	struct srp_fr_desc *desc;
1428	u32 rkey;
1429	int n, err;
1430
1431	if (state->fr.next >= state->fr.end) {
1432		shost_printk(KERN_ERR, ch->target->scsi_host,
1433			     PFX "Out of MRs (mr_per_cmd = %d)\n",
1434			     ch->target->mr_per_cmd);
1435		return -ENOMEM;
1436	}
1437
1438	WARN_ON_ONCE(!dev->use_fast_reg);
1439
1440	if (sg_nents == 1 && target->global_rkey) {
1441		unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1442
1443		srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1444			     sg_dma_len(state->sg) - sg_offset,
1445			     target->global_rkey);
1446		if (sg_offset_p)
1447			*sg_offset_p = 0;
1448		return 1;
1449	}
1450
1451	desc = srp_fr_pool_get(ch->fr_pool);
1452	if (!desc)
1453		return -ENOMEM;
1454
1455	rkey = ib_inc_rkey(desc->mr->rkey);
1456	ib_update_fast_reg_key(desc->mr, rkey);
1457
1458	n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1459			 dev->mr_page_size);
1460	if (unlikely(n < 0)) {
1461		srp_fr_pool_put(ch->fr_pool, &desc, 1);
1462		pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1463			 dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1464			 sg_offset_p ? *sg_offset_p : -1, n);
1465		return n;
1466	}
1467
1468	WARN_ON_ONCE(desc->mr->length == 0);
1469
1470	req->reg_cqe.done = srp_reg_mr_err_done;
1471
1472	wr.wr.next = NULL;
1473	wr.wr.opcode = IB_WR_REG_MR;
1474	wr.wr.wr_cqe = &req->reg_cqe;
1475	wr.wr.num_sge = 0;
1476	wr.wr.send_flags = 0;
1477	wr.mr = desc->mr;
1478	wr.key = desc->mr->rkey;
1479	wr.access = (IB_ACCESS_LOCAL_WRITE |
1480		     IB_ACCESS_REMOTE_READ |
1481		     IB_ACCESS_REMOTE_WRITE);
1482
1483	*state->fr.next++ = desc;
1484	state->nmdesc++;
1485
1486	srp_map_desc(state, desc->mr->iova,
1487		     desc->mr->length, desc->mr->rkey);
1488
1489	err = ib_post_send(ch->qp, &wr.wr, NULL);
1490	if (unlikely(err)) {
1491		WARN_ON_ONCE(err == -ENOMEM);
1492		return err;
1493	}
1494
1495	return n;
1496}
1497
1498static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1499			 struct srp_request *req, struct scatterlist *scat,
1500			 int count)
1501{
1502	unsigned int sg_offset = 0;
1503
1504	state->fr.next = req->fr_list;
1505	state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1506	state->sg = scat;
1507
1508	if (count == 0)
1509		return 0;
1510
1511	while (count) {
1512		int i, n;
1513
1514		n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1515		if (unlikely(n < 0))
1516			return n;
1517
1518		count -= n;
1519		for (i = 0; i < n; i++)
1520			state->sg = sg_next(state->sg);
1521	}
1522
1523	return 0;
1524}
1525
1526static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1527			  struct srp_request *req, struct scatterlist *scat,
1528			  int count)
1529{
1530	struct srp_target_port *target = ch->target;
1531	struct scatterlist *sg;
1532	int i;
1533
1534	for_each_sg(scat, sg, count, i) {
1535		srp_map_desc(state, sg_dma_address(sg), sg_dma_len(sg),
1536			     target->global_rkey);
1537	}
1538
1539	return 0;
1540}
1541
1542/*
1543 * Register the indirect data buffer descriptor with the HCA.
1544 *
1545 * Note: since the indirect data buffer descriptor has been allocated with
1546 * kmalloc() it is guaranteed that this buffer is a physically contiguous
1547 * memory buffer.
1548 */
1549static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1550		       void **next_mr, void **end_mr, u32 idb_len,
1551		       __be32 *idb_rkey)
1552{
1553	struct srp_target_port *target = ch->target;
1554	struct srp_device *dev = target->srp_host->srp_dev;
1555	struct srp_map_state state;
1556	struct srp_direct_buf idb_desc;
1557	struct scatterlist idb_sg[1];
1558	int ret;
1559
1560	memset(&state, 0, sizeof(state));
1561	memset(&idb_desc, 0, sizeof(idb_desc));
1562	state.gen.next = next_mr;
1563	state.gen.end = end_mr;
1564	state.desc = &idb_desc;
1565	state.base_dma_addr = req->indirect_dma_addr;
1566	state.dma_len = idb_len;
1567
1568	if (dev->use_fast_reg) {
1569		state.sg = idb_sg;
1570		sg_init_one(idb_sg, req->indirect_desc, idb_len);
1571		idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1572#ifdef CONFIG_NEED_SG_DMA_LENGTH
1573		idb_sg->dma_length = idb_sg->length;	      /* hack^2 */
1574#endif
1575		ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1576		if (ret < 0)
1577			return ret;
1578		WARN_ON_ONCE(ret < 1);
1579	} else {
1580		return -EINVAL;
1581	}
1582
1583	*idb_rkey = idb_desc.key;
1584
1585	return 0;
1586}
1587
1588static void srp_check_mapping(struct srp_map_state *state,
1589			      struct srp_rdma_ch *ch, struct srp_request *req,
1590			      struct scatterlist *scat, int count)
1591{
1592	struct srp_device *dev = ch->target->srp_host->srp_dev;
1593	struct srp_fr_desc **pfr;
1594	u64 desc_len = 0, mr_len = 0;
1595	int i;
1596
1597	for (i = 0; i < state->ndesc; i++)
1598		desc_len += be32_to_cpu(req->indirect_desc[i].len);
1599	if (dev->use_fast_reg)
1600		for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1601			mr_len += (*pfr)->mr->length;
1602	if (desc_len != scsi_bufflen(req->scmnd) ||
1603	    mr_len > scsi_bufflen(req->scmnd))
1604		pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1605		       scsi_bufflen(req->scmnd), desc_len, mr_len,
1606		       state->ndesc, state->nmdesc);
1607}
1608
1609/**
1610 * srp_map_data() - map SCSI data buffer onto an SRP request
1611 * @scmnd: SCSI command to map
1612 * @ch: SRP RDMA channel
1613 * @req: SRP request
1614 *
1615 * Returns the length in bytes of the SRP_CMD IU or a negative value if
1616 * mapping failed. The size of any immediate data is not included in the
1617 * return value.
1618 */
1619static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1620			struct srp_request *req)
1621{
1622	struct srp_target_port *target = ch->target;
1623	struct scatterlist *scat, *sg;
1624	struct srp_cmd *cmd = req->cmd->buf;
1625	int i, len, nents, count, ret;
1626	struct srp_device *dev;
1627	struct ib_device *ibdev;
1628	struct srp_map_state state;
1629	struct srp_indirect_buf *indirect_hdr;
1630	u64 data_len;
1631	u32 idb_len, table_len;
1632	__be32 idb_rkey;
1633	u8 fmt;
1634
1635	req->cmd->num_sge = 1;
1636
1637	if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1638		return sizeof(struct srp_cmd) + cmd->add_cdb_len;
1639
1640	if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1641	    scmnd->sc_data_direction != DMA_TO_DEVICE) {
1642		shost_printk(KERN_WARNING, target->scsi_host,
1643			     PFX "Unhandled data direction %d\n",
1644			     scmnd->sc_data_direction);
1645		return -EINVAL;
1646	}
1647
1648	nents = scsi_sg_count(scmnd);
1649	scat  = scsi_sglist(scmnd);
1650	data_len = scsi_bufflen(scmnd);
1651
1652	dev = target->srp_host->srp_dev;
1653	ibdev = dev->dev;
1654
1655	count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1656	if (unlikely(count == 0))
1657		return -EIO;
1658
1659	if (ch->use_imm_data &&
1660	    count <= ch->max_imm_sge &&
1661	    SRP_IMM_DATA_OFFSET + data_len <= ch->max_it_iu_len &&
1662	    scmnd->sc_data_direction == DMA_TO_DEVICE) {
1663		struct srp_imm_buf *buf;
1664		struct ib_sge *sge = &req->cmd->sge[1];
1665
1666		fmt = SRP_DATA_DESC_IMM;
1667		len = SRP_IMM_DATA_OFFSET;
1668		req->nmdesc = 0;
1669		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1670		buf->len = cpu_to_be32(data_len);
1671		WARN_ON_ONCE((void *)(buf + 1) > (void *)cmd + len);
1672		for_each_sg(scat, sg, count, i) {
1673			sge[i].addr   = sg_dma_address(sg);
1674			sge[i].length = sg_dma_len(sg);
1675			sge[i].lkey   = target->lkey;
1676		}
1677		req->cmd->num_sge += count;
1678		goto map_complete;
1679	}
1680
1681	fmt = SRP_DATA_DESC_DIRECT;
1682	len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1683		sizeof(struct srp_direct_buf);
1684
1685	if (count == 1 && target->global_rkey) {
1686		/*
1687		 * The midlayer only generated a single gather/scatter
1688		 * entry, or DMA mapping coalesced everything to a
1689		 * single entry.  So a direct descriptor along with
1690		 * the DMA MR suffices.
1691		 */
1692		struct srp_direct_buf *buf;
1693
1694		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1695		buf->va  = cpu_to_be64(sg_dma_address(scat));
1696		buf->key = cpu_to_be32(target->global_rkey);
1697		buf->len = cpu_to_be32(sg_dma_len(scat));
1698
1699		req->nmdesc = 0;
1700		goto map_complete;
1701	}
1702
1703	/*
1704	 * We have more than one scatter/gather entry, so build our indirect
1705	 * descriptor table, trying to merge as many entries as we can.
1706	 */
1707	indirect_hdr = (void *)cmd->add_data + cmd->add_cdb_len;
1708
1709	ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1710				   target->indirect_size, DMA_TO_DEVICE);
1711
1712	memset(&state, 0, sizeof(state));
1713	state.desc = req->indirect_desc;
1714	if (dev->use_fast_reg)
1715		ret = srp_map_sg_fr(&state, ch, req, scat, count);
1716	else
1717		ret = srp_map_sg_dma(&state, ch, req, scat, count);
1718	req->nmdesc = state.nmdesc;
1719	if (ret < 0)
1720		goto unmap;
1721
1722	{
1723		DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1724			"Memory mapping consistency check");
1725		if (DYNAMIC_DEBUG_BRANCH(ddm))
1726			srp_check_mapping(&state, ch, req, scat, count);
1727	}
1728
1729	/* We've mapped the request, now pull as much of the indirect
1730	 * descriptor table as we can into the command buffer. If this
1731	 * target is not using an external indirect table, we are
1732	 * guaranteed to fit into the command, as the SCSI layer won't
1733	 * give us more S/G entries than we allow.
1734	 */
1735	if (state.ndesc == 1) {
1736		/*
1737		 * Memory registration collapsed the sg-list into one entry,
1738		 * so use a direct descriptor.
1739		 */
1740		struct srp_direct_buf *buf;
1741
1742		buf = (void *)cmd->add_data + cmd->add_cdb_len;
1743		*buf = req->indirect_desc[0];
1744		goto map_complete;
1745	}
1746
1747	if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1748						!target->allow_ext_sg)) {
1749		shost_printk(KERN_ERR, target->scsi_host,
1750			     "Could not fit S/G list into SRP_CMD\n");
1751		ret = -EIO;
1752		goto unmap;
1753	}
1754
1755	count = min(state.ndesc, target->cmd_sg_cnt);
1756	table_len = state.ndesc * sizeof (struct srp_direct_buf);
1757	idb_len = sizeof(struct srp_indirect_buf) + table_len;
1758
1759	fmt = SRP_DATA_DESC_INDIRECT;
1760	len = sizeof(struct srp_cmd) + cmd->add_cdb_len +
1761		sizeof(struct srp_indirect_buf);
1762	len += count * sizeof (struct srp_direct_buf);
1763
1764	memcpy(indirect_hdr->desc_list, req->indirect_desc,
1765	       count * sizeof (struct srp_direct_buf));
1766
1767	if (!target->global_rkey) {
1768		ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1769				  idb_len, &idb_rkey);
1770		if (ret < 0)
1771			goto unmap;
1772		req->nmdesc++;
1773	} else {
1774		idb_rkey = cpu_to_be32(target->global_rkey);
1775	}
1776
1777	indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1778	indirect_hdr->table_desc.key = idb_rkey;
1779	indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1780	indirect_hdr->len = cpu_to_be32(state.total_len);
1781
1782	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1783		cmd->data_out_desc_cnt = count;
1784	else
1785		cmd->data_in_desc_cnt = count;
1786
1787	ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1788				      DMA_TO_DEVICE);
1789
1790map_complete:
1791	if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1792		cmd->buf_fmt = fmt << 4;
1793	else
1794		cmd->buf_fmt = fmt;
1795
1796	return len;
1797
1798unmap:
1799	srp_unmap_data(scmnd, ch, req);
1800	if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1801		ret = -E2BIG;
1802	return ret;
1803}
1804
1805/*
1806 * Return an IU and possible credit to the free pool
1807 */
1808static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1809			  enum srp_iu_type iu_type)
1810{
1811	unsigned long flags;
1812
1813	spin_lock_irqsave(&ch->lock, flags);
1814	list_add(&iu->list, &ch->free_tx);
1815	if (iu_type != SRP_IU_RSP)
1816		++ch->req_lim;
1817	spin_unlock_irqrestore(&ch->lock, flags);
1818}
1819
1820/*
1821 * Must be called with ch->lock held to protect req_lim and free_tx.
1822 * If IU is not sent, it must be returned using srp_put_tx_iu().
1823 *
1824 * Note:
1825 * An upper limit for the number of allocated information units for each
1826 * request type is:
1827 * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1828 *   more than Scsi_Host.can_queue requests.
1829 * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1830 * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1831 *   one unanswered SRP request to an initiator.
1832 */
1833static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1834				      enum srp_iu_type iu_type)
1835{
1836	struct srp_target_port *target = ch->target;
1837	s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1838	struct srp_iu *iu;
1839
1840	lockdep_assert_held(&ch->lock);
1841
1842	ib_process_cq_direct(ch->send_cq, -1);
1843
1844	if (list_empty(&ch->free_tx))
1845		return NULL;
1846
1847	/* Initiator responses to target requests do not consume credits */
1848	if (iu_type != SRP_IU_RSP) {
1849		if (ch->req_lim <= rsv) {
1850			++target->zero_req_lim;
1851			return NULL;
1852		}
1853
1854		--ch->req_lim;
1855	}
1856
1857	iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1858	list_del(&iu->list);
1859	return iu;
1860}
1861
1862/*
1863 * Note: if this function is called from inside ib_drain_sq() then it will
1864 * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1865 * with status IB_WC_SUCCESS then that's a bug.
1866 */
1867static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1868{
1869	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
1870	struct srp_rdma_ch *ch = cq->cq_context;
1871
1872	if (unlikely(wc->status != IB_WC_SUCCESS)) {
1873		srp_handle_qp_err(cq, wc, "SEND");
1874		return;
1875	}
1876
1877	lockdep_assert_held(&ch->lock);
1878
1879	list_add(&iu->list, &ch->free_tx);
1880}
1881
1882/**
1883 * srp_post_send() - send an SRP information unit
1884 * @ch: RDMA channel over which to send the information unit.
1885 * @iu: Information unit to send.
1886 * @len: Length of the information unit excluding immediate data.
1887 */
1888static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
1889{
1890	struct srp_target_port *target = ch->target;
1891	struct ib_send_wr wr;
1892
1893	if (WARN_ON_ONCE(iu->num_sge > SRP_MAX_SGE))
1894		return -EINVAL;
1895
1896	iu->sge[0].addr   = iu->dma;
1897	iu->sge[0].length = len;
1898	iu->sge[0].lkey   = target->lkey;
1899
1900	iu->cqe.done = srp_send_done;
1901
1902	wr.next       = NULL;
1903	wr.wr_cqe     = &iu->cqe;
1904	wr.sg_list    = &iu->sge[0];
1905	wr.num_sge    = iu->num_sge;
1906	wr.opcode     = IB_WR_SEND;
1907	wr.send_flags = IB_SEND_SIGNALED;
1908
1909	return ib_post_send(ch->qp, &wr, NULL);
1910}
1911
1912static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
1913{
1914	struct srp_target_port *target = ch->target;
1915	struct ib_recv_wr wr;
1916	struct ib_sge list;
1917
1918	list.addr   = iu->dma;
1919	list.length = iu->size;
1920	list.lkey   = target->lkey;
1921
1922	iu->cqe.done = srp_recv_done;
1923
1924	wr.next     = NULL;
1925	wr.wr_cqe   = &iu->cqe;
1926	wr.sg_list  = &list;
1927	wr.num_sge  = 1;
1928
1929	return ib_post_recv(ch->qp, &wr, NULL);
1930}
1931
1932static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
1933{
1934	struct srp_target_port *target = ch->target;
1935	struct srp_request *req;
1936	struct scsi_cmnd *scmnd;
1937	unsigned long flags;
1938
1939	if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
1940		spin_lock_irqsave(&ch->lock, flags);
1941		ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1942		if (rsp->tag == ch->tsk_mgmt_tag) {
1943			ch->tsk_mgmt_status = -1;
1944			if (be32_to_cpu(rsp->resp_data_len) >= 4)
1945				ch->tsk_mgmt_status = rsp->data[3];
1946			complete(&ch->tsk_mgmt_done);
1947		} else {
1948			shost_printk(KERN_ERR, target->scsi_host,
1949				     "Received tsk mgmt response too late for tag %#llx\n",
1950				     rsp->tag);
1951		}
1952		spin_unlock_irqrestore(&ch->lock, flags);
1953	} else {
1954		scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
1955		if (scmnd) {
1956			req = scsi_cmd_priv(scmnd);
1957			scmnd = srp_claim_req(ch, req, NULL, scmnd);
1958		}
1959		if (!scmnd) {
1960			shost_printk(KERN_ERR, target->scsi_host,
1961				     "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
1962				     rsp->tag, ch - target->ch, ch->qp->qp_num);
1963
1964			spin_lock_irqsave(&ch->lock, flags);
1965			ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
1966			spin_unlock_irqrestore(&ch->lock, flags);
1967
1968			return;
1969		}
1970		scmnd->result = rsp->status;
1971
1972		if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
1973			memcpy(scmnd->sense_buffer, rsp->data +
1974			       be32_to_cpu(rsp->resp_data_len),
1975			       min_t(int, be32_to_cpu(rsp->sense_data_len),
1976				     SCSI_SENSE_BUFFERSIZE));
1977		}
1978
1979		if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
1980			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
1981		else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
1982			scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
1983
1984		srp_free_req(ch, req, scmnd,
1985			     be32_to_cpu(rsp->req_lim_delta));
1986
1987		scmnd->scsi_done(scmnd);
1988	}
1989}
1990
1991static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
1992			       void *rsp, int len)
1993{
1994	struct srp_target_port *target = ch->target;
1995	struct ib_device *dev = target->srp_host->srp_dev->dev;
1996	unsigned long flags;
1997	struct srp_iu *iu;
1998	int err;
1999
2000	spin_lock_irqsave(&ch->lock, flags);
2001	ch->req_lim += req_delta;
2002	iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2003	spin_unlock_irqrestore(&ch->lock, flags);
2004
2005	if (!iu) {
2006		shost_printk(KERN_ERR, target->scsi_host, PFX
2007			     "no IU available to send response\n");
2008		return 1;
2009	}
2010
2011	iu->num_sge = 1;
2012	ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2013	memcpy(iu->buf, rsp, len);
2014	ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2015
2016	err = srp_post_send(ch, iu, len);
2017	if (err) {
2018		shost_printk(KERN_ERR, target->scsi_host, PFX
2019			     "unable to post response: %d\n", err);
2020		srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2021	}
2022
2023	return err;
2024}
2025
2026static void srp_process_cred_req(struct srp_rdma_ch *ch,
2027				 struct srp_cred_req *req)
2028{
2029	struct srp_cred_rsp rsp = {
2030		.opcode = SRP_CRED_RSP,
2031		.tag = req->tag,
2032	};
2033	s32 delta = be32_to_cpu(req->req_lim_delta);
2034
2035	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2036		shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2037			     "problems processing SRP_CRED_REQ\n");
2038}
2039
2040static void srp_process_aer_req(struct srp_rdma_ch *ch,
2041				struct srp_aer_req *req)
2042{
2043	struct srp_target_port *target = ch->target;
2044	struct srp_aer_rsp rsp = {
2045		.opcode = SRP_AER_RSP,
2046		.tag = req->tag,
2047	};
2048	s32 delta = be32_to_cpu(req->req_lim_delta);
2049
2050	shost_printk(KERN_ERR, target->scsi_host, PFX
2051		     "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2052
2053	if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2054		shost_printk(KERN_ERR, target->scsi_host, PFX
2055			     "problems processing SRP_AER_REQ\n");
2056}
2057
2058static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2059{
2060	struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2061	struct srp_rdma_ch *ch = cq->cq_context;
2062	struct srp_target_port *target = ch->target;
2063	struct ib_device *dev = target->srp_host->srp_dev->dev;
2064	int res;
2065	u8 opcode;
2066
2067	if (unlikely(wc->status != IB_WC_SUCCESS)) {
2068		srp_handle_qp_err(cq, wc, "RECV");
2069		return;
2070	}
2071
2072	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2073				   DMA_FROM_DEVICE);
2074
2075	opcode = *(u8 *) iu->buf;
2076
2077	if (0) {
2078		shost_printk(KERN_ERR, target->scsi_host,
2079			     PFX "recv completion, opcode 0x%02x\n", opcode);
2080		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2081			       iu->buf, wc->byte_len, true);
2082	}
2083
2084	switch (opcode) {
2085	case SRP_RSP:
2086		srp_process_rsp(ch, iu->buf);
2087		break;
2088
2089	case SRP_CRED_REQ:
2090		srp_process_cred_req(ch, iu->buf);
2091		break;
2092
2093	case SRP_AER_REQ:
2094		srp_process_aer_req(ch, iu->buf);
2095		break;
2096
2097	case SRP_T_LOGOUT:
2098		/* XXX Handle target logout */
2099		shost_printk(KERN_WARNING, target->scsi_host,
2100			     PFX "Got target logout request\n");
2101		break;
2102
2103	default:
2104		shost_printk(KERN_WARNING, target->scsi_host,
2105			     PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2106		break;
2107	}
2108
2109	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2110				      DMA_FROM_DEVICE);
2111
2112	res = srp_post_recv(ch, iu);
2113	if (res != 0)
2114		shost_printk(KERN_ERR, target->scsi_host,
2115			     PFX "Recv failed with error code %d\n", res);
2116}
2117
2118/**
2119 * srp_tl_err_work() - handle a transport layer error
2120 * @work: Work structure embedded in an SRP target port.
2121 *
2122 * Note: This function may get invoked before the rport has been created,
2123 * hence the target->rport test.
2124 */
2125static void srp_tl_err_work(struct work_struct *work)
2126{
2127	struct srp_target_port *target;
2128
2129	target = container_of(work, struct srp_target_port, tl_err_work);
2130	if (target->rport)
2131		srp_start_tl_fail_timers(target->rport);
2132}
2133
2134static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2135		const char *opname)
2136{
2137	struct srp_rdma_ch *ch = cq->cq_context;
2138	struct srp_target_port *target = ch->target;
2139
2140	if (ch->connected && !target->qp_in_error) {
2141		shost_printk(KERN_ERR, target->scsi_host,
2142			     PFX "failed %s status %s (%d) for CQE %p\n",
2143			     opname, ib_wc_status_msg(wc->status), wc->status,
2144			     wc->wr_cqe);
2145		queue_work(system_long_wq, &target->tl_err_work);
2146	}
2147	target->qp_in_error = true;
2148}
2149
2150static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2151{
2152	struct srp_target_port *target = host_to_target(shost);
2153	struct srp_rdma_ch *ch;
2154	struct srp_request *req = scsi_cmd_priv(scmnd);
2155	struct srp_iu *iu;
2156	struct srp_cmd *cmd;
2157	struct ib_device *dev;
2158	unsigned long flags;
2159	u32 tag;
2160	int len, ret;
2161
2162	scmnd->result = srp_chkready(target->rport);
2163	if (unlikely(scmnd->result))
2164		goto err;
2165
2166	WARN_ON_ONCE(scmnd->request->tag < 0);
2167	tag = blk_mq_unique_tag(scmnd->request);
2168	ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2169
2170	spin_lock_irqsave(&ch->lock, flags);
2171	iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2172	spin_unlock_irqrestore(&ch->lock, flags);
2173
2174	if (!iu)
2175		goto err;
2176
2177	dev = target->srp_host->srp_dev->dev;
2178	ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_it_iu_len,
2179				   DMA_TO_DEVICE);
2180
2181	cmd = iu->buf;
2182	memset(cmd, 0, sizeof *cmd);
2183
2184	cmd->opcode = SRP_CMD;
2185	int_to_scsilun(scmnd->device->lun, &cmd->lun);
2186	cmd->tag    = tag;
2187	memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2188	if (unlikely(scmnd->cmd_len > sizeof(cmd->cdb))) {
2189		cmd->add_cdb_len = round_up(scmnd->cmd_len - sizeof(cmd->cdb),
2190					    4);
2191		if (WARN_ON_ONCE(cmd->add_cdb_len > SRP_MAX_ADD_CDB_LEN))
2192			goto err_iu;
2193	}
2194
2195	req->scmnd    = scmnd;
2196	req->cmd      = iu;
2197
2198	len = srp_map_data(scmnd, ch, req);
2199	if (len < 0) {
2200		shost_printk(KERN_ERR, target->scsi_host,
2201			     PFX "Failed to map data (%d)\n", len);
2202		/*
2203		 * If we ran out of memory descriptors (-ENOMEM) because an
2204		 * application is queuing many requests with more than
2205		 * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2206		 * to reduce queue depth temporarily.
2207		 */
2208		scmnd->result = len == -ENOMEM ?
2209			DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2210		goto err_iu;
2211	}
2212
2213	ib_dma_sync_single_for_device(dev, iu->dma, ch->max_it_iu_len,
2214				      DMA_TO_DEVICE);
2215
2216	if (srp_post_send(ch, iu, len)) {
2217		shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2218		scmnd->result = DID_ERROR << 16;
2219		goto err_unmap;
2220	}
2221
2222	return 0;
2223
2224err_unmap:
2225	srp_unmap_data(scmnd, ch, req);
2226
2227err_iu:
2228	srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2229
2230	/*
2231	 * Avoid that the loops that iterate over the request ring can
2232	 * encounter a dangling SCSI command pointer.
2233	 */
2234	req->scmnd = NULL;
2235
2236err:
2237	if (scmnd->result) {
2238		scmnd->scsi_done(scmnd);
2239		ret = 0;
2240	} else {
2241		ret = SCSI_MLQUEUE_HOST_BUSY;
2242	}
2243
2244	return ret;
2245}
2246
2247/*
2248 * Note: the resources allocated in this function are freed in
2249 * srp_free_ch_ib().
2250 */
2251static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2252{
2253	struct srp_target_port *target = ch->target;
2254	int i;
2255
2256	ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2257			      GFP_KERNEL);
2258	if (!ch->rx_ring)
2259		goto err_no_ring;
2260	ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2261			      GFP_KERNEL);
2262	if (!ch->tx_ring)
2263		goto err_no_ring;
2264
2265	for (i = 0; i < target->queue_size; ++i) {
2266		ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2267					      ch->max_ti_iu_len,
2268					      GFP_KERNEL, DMA_FROM_DEVICE);
2269		if (!ch->rx_ring[i])
2270			goto err;
2271	}
2272
2273	for (i = 0; i < target->queue_size; ++i) {
2274		ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2275					      ch->max_it_iu_len,
2276					      GFP_KERNEL, DMA_TO_DEVICE);
2277		if (!ch->tx_ring[i])
2278			goto err;
2279
2280		list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2281	}
2282
2283	return 0;
2284
2285err:
2286	for (i = 0; i < target->queue_size; ++i) {
2287		srp_free_iu(target->srp_host, ch->rx_ring[i]);
2288		srp_free_iu(target->srp_host, ch->tx_ring[i]);
2289	}
2290
2291
2292err_no_ring:
2293	kfree(ch->tx_ring);
2294	ch->tx_ring = NULL;
2295	kfree(ch->rx_ring);
2296	ch->rx_ring = NULL;
2297
2298	return -ENOMEM;
2299}
2300
2301static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2302{
2303	uint64_t T_tr_ns, max_compl_time_ms;
2304	uint32_t rq_tmo_jiffies;
2305
2306	/*
2307	 * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2308	 * table 91), both the QP timeout and the retry count have to be set
2309	 * for RC QP's during the RTR to RTS transition.
2310	 */
2311	WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2312		     (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2313
2314	/*
2315	 * Set target->rq_tmo_jiffies to one second more than the largest time
2316	 * it can take before an error completion is generated. See also
2317	 * C9-140..142 in the IBTA spec for more information about how to
2318	 * convert the QP Local ACK Timeout value to nanoseconds.
2319	 */
2320	T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2321	max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2322	do_div(max_compl_time_ms, NSEC_PER_MSEC);
2323	rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2324
2325	return rq_tmo_jiffies;
2326}
2327
2328static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2329			       const struct srp_login_rsp *lrsp,
2330			       struct srp_rdma_ch *ch)
2331{
2332	struct srp_target_port *target = ch->target;
2333	struct ib_qp_attr *qp_attr = NULL;
2334	int attr_mask = 0;
2335	int ret = 0;
2336	int i;
2337
2338	if (lrsp->opcode == SRP_LOGIN_RSP) {
2339		ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2340		ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2341		ch->use_imm_data  = srp_use_imm_data &&
2342			(lrsp->rsp_flags & SRP_LOGIN_RSP_IMMED_SUPP);
2343		ch->max_it_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
2344						      ch->use_imm_data,
2345						      target->max_it_iu_size);
2346		WARN_ON_ONCE(ch->max_it_iu_len >
2347			     be32_to_cpu(lrsp->max_it_iu_len));
2348
2349		if (ch->use_imm_data)
2350			shost_printk(KERN_DEBUG, target->scsi_host,
2351				     PFX "using immediate data\n");
2352
2353		/*
2354		 * Reserve credits for task management so we don't
2355		 * bounce requests back to the SCSI mid-layer.
2356		 */
2357		target->scsi_host->can_queue
2358			= min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2359			      target->scsi_host->can_queue);
2360		target->scsi_host->cmd_per_lun
2361			= min_t(int, target->scsi_host->can_queue,
2362				target->scsi_host->cmd_per_lun);
2363	} else {
2364		shost_printk(KERN_WARNING, target->scsi_host,
2365			     PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2366		ret = -ECONNRESET;
2367		goto error;
2368	}
2369
2370	if (!ch->rx_ring) {
2371		ret = srp_alloc_iu_bufs(ch);
2372		if (ret)
2373			goto error;
2374	}
2375
2376	for (i = 0; i < target->queue_size; i++) {
2377		struct srp_iu *iu = ch->rx_ring[i];
2378
2379		ret = srp_post_recv(ch, iu);
2380		if (ret)
2381			goto error;
2382	}
2383
2384	if (!target->using_rdma_cm) {
2385		ret = -ENOMEM;
2386		qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2387		if (!qp_attr)
2388			goto error;
2389
2390		qp_attr->qp_state = IB_QPS_RTR;
2391		ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2392		if (ret)
2393			goto error_free;
2394
2395		ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2396		if (ret)
2397			goto error_free;
2398
2399		qp_attr->qp_state = IB_QPS_RTS;
2400		ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2401		if (ret)
2402			goto error_free;
2403
2404		target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2405
2406		ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2407		if (ret)
2408			goto error_free;
2409
2410		ret = ib_send_cm_rtu(cm_id, NULL, 0);
2411	}
2412
2413error_free:
2414	kfree(qp_attr);
2415
2416error:
2417	ch->status = ret;
2418}
2419
2420static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2421				  const struct ib_cm_event *event,
2422				  struct srp_rdma_ch *ch)
2423{
2424	struct srp_target_port *target = ch->target;
2425	struct Scsi_Host *shost = target->scsi_host;
2426	struct ib_class_port_info *cpi;
2427	int opcode;
2428	u16 dlid;
2429
2430	switch (event->param.rej_rcvd.reason) {
2431	case IB_CM_REJ_PORT_CM_REDIRECT:
2432		cpi = event->param.rej_rcvd.ari;
2433		dlid = be16_to_cpu(cpi->redirect_lid);
2434		sa_path_set_dlid(&ch->ib_cm.path, dlid);
2435		ch->ib_cm.path.pkey = cpi->redirect_pkey;
2436		cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2437		memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2438
2439		ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2440		break;
2441
2442	case IB_CM_REJ_PORT_REDIRECT:
2443		if (srp_target_is_topspin(target)) {
2444			union ib_gid *dgid = &ch->ib_cm.path.dgid;
2445
2446			/*
2447			 * Topspin/Cisco SRP gateways incorrectly send
2448			 * reject reason code 25 when they mean 24
2449			 * (port redirect).
2450			 */
2451			memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2452
2453			shost_printk(KERN_DEBUG, shost,
2454				     PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2455				     be64_to_cpu(dgid->global.subnet_prefix),
2456				     be64_to_cpu(dgid->global.interface_id));
2457
2458			ch->status = SRP_PORT_REDIRECT;
2459		} else {
2460			shost_printk(KERN_WARNING, shost,
2461				     "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2462			ch->status = -ECONNRESET;
2463		}
2464		break;
2465
2466	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2467		shost_printk(KERN_WARNING, shost,
2468			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2469		ch->status = -ECONNRESET;
2470		break;
2471
2472	case IB_CM_REJ_CONSUMER_DEFINED:
2473		opcode = *(u8 *) event->private_data;
2474		if (opcode == SRP_LOGIN_REJ) {
2475			struct srp_login_rej *rej = event->private_data;
2476			u32 reason = be32_to_cpu(rej->reason);
2477
2478			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2479				shost_printk(KERN_WARNING, shost,
2480					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2481			else
2482				shost_printk(KERN_WARNING, shost, PFX
2483					     "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2484					     target->sgid.raw,
2485					     target->ib_cm.orig_dgid.raw,
2486					     reason);
2487		} else
2488			shost_printk(KERN_WARNING, shost,
2489				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2490				     " opcode 0x%02x\n", opcode);
2491		ch->status = -ECONNRESET;
2492		break;
2493
2494	case IB_CM_REJ_STALE_CONN:
2495		shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2496		ch->status = SRP_STALE_CONN;
2497		break;
2498
2499	default:
2500		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2501			     event->param.rej_rcvd.reason);
2502		ch->status = -ECONNRESET;
2503	}
2504}
2505
2506static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
2507			     const struct ib_cm_event *event)
2508{
2509	struct srp_rdma_ch *ch = cm_id->context;
2510	struct srp_target_port *target = ch->target;
2511	int comp = 0;
2512
2513	switch (event->event) {
2514	case IB_CM_REQ_ERROR:
2515		shost_printk(KERN_DEBUG, target->scsi_host,
2516			     PFX "Sending CM REQ failed\n");
2517		comp = 1;
2518		ch->status = -ECONNRESET;
2519		break;
2520
2521	case IB_CM_REP_RECEIVED:
2522		comp = 1;
2523		srp_cm_rep_handler(cm_id, event->private_data, ch);
2524		break;
2525
2526	case IB_CM_REJ_RECEIVED:
2527		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2528		comp = 1;
2529
2530		srp_ib_cm_rej_handler(cm_id, event, ch);
2531		break;
2532
2533	case IB_CM_DREQ_RECEIVED:
2534		shost_printk(KERN_WARNING, target->scsi_host,
2535			     PFX "DREQ received - connection closed\n");
2536		ch->connected = false;
2537		if (ib_send_cm_drep(cm_id, NULL, 0))
2538			shost_printk(KERN_ERR, target->scsi_host,
2539				     PFX "Sending CM DREP failed\n");
2540		queue_work(system_long_wq, &target->tl_err_work);
2541		break;
2542
2543	case IB_CM_TIMEWAIT_EXIT:
2544		shost_printk(KERN_ERR, target->scsi_host,
2545			     PFX "connection closed\n");
2546		comp = 1;
2547
2548		ch->status = 0;
2549		break;
2550
2551	case IB_CM_MRA_RECEIVED:
2552	case IB_CM_DREQ_ERROR:
2553	case IB_CM_DREP_RECEIVED:
2554		break;
2555
2556	default:
2557		shost_printk(KERN_WARNING, target->scsi_host,
2558			     PFX "Unhandled CM event %d\n", event->event);
2559		break;
2560	}
2561
2562	if (comp)
2563		complete(&ch->done);
2564
2565	return 0;
2566}
2567
2568static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2569				    struct rdma_cm_event *event)
2570{
2571	struct srp_target_port *target = ch->target;
2572	struct Scsi_Host *shost = target->scsi_host;
2573	int opcode;
2574
2575	switch (event->status) {
2576	case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2577		shost_printk(KERN_WARNING, shost,
2578			    "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2579		ch->status = -ECONNRESET;
2580		break;
2581
2582	case IB_CM_REJ_CONSUMER_DEFINED:
2583		opcode = *(u8 *) event->param.conn.private_data;
2584		if (opcode == SRP_LOGIN_REJ) {
2585			struct srp_login_rej *rej =
2586				(struct srp_login_rej *)
2587				event->param.conn.private_data;
2588			u32 reason = be32_to_cpu(rej->reason);
2589
2590			if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2591				shost_printk(KERN_WARNING, shost,
2592					     PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2593			else
2594				shost_printk(KERN_WARNING, shost,
2595					    PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2596		} else {
2597			shost_printk(KERN_WARNING, shost,
2598				     "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2599				     opcode);
2600		}
2601		ch->status = -ECONNRESET;
2602		break;
2603
2604	case IB_CM_REJ_STALE_CONN:
2605		shost_printk(KERN_WARNING, shost,
2606			     "  REJ reason: stale connection\n");
2607		ch->status = SRP_STALE_CONN;
2608		break;
2609
2610	default:
2611		shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2612			     event->status);
2613		ch->status = -ECONNRESET;
2614		break;
2615	}
2616}
2617
2618static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2619			       struct rdma_cm_event *event)
2620{
2621	struct srp_rdma_ch *ch = cm_id->context;
2622	struct srp_target_port *target = ch->target;
2623	int comp = 0;
2624
2625	switch (event->event) {
2626	case RDMA_CM_EVENT_ADDR_RESOLVED:
2627		ch->status = 0;
2628		comp = 1;
2629		break;
2630
2631	case RDMA_CM_EVENT_ADDR_ERROR:
2632		ch->status = -ENXIO;
2633		comp = 1;
2634		break;
2635
2636	case RDMA_CM_EVENT_ROUTE_RESOLVED:
2637		ch->status = 0;
2638		comp = 1;
2639		break;
2640
2641	case RDMA_CM_EVENT_ROUTE_ERROR:
2642	case RDMA_CM_EVENT_UNREACHABLE:
2643		ch->status = -EHOSTUNREACH;
2644		comp = 1;
2645		break;
2646
2647	case RDMA_CM_EVENT_CONNECT_ERROR:
2648		shost_printk(KERN_DEBUG, target->scsi_host,
2649			     PFX "Sending CM REQ failed\n");
2650		comp = 1;
2651		ch->status = -ECONNRESET;
2652		break;
2653
2654	case RDMA_CM_EVENT_ESTABLISHED:
2655		comp = 1;
2656		srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2657		break;
2658
2659	case RDMA_CM_EVENT_REJECTED:
2660		shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2661		comp = 1;
2662
2663		srp_rdma_cm_rej_handler(ch, event);
2664		break;
2665
2666	case RDMA_CM_EVENT_DISCONNECTED:
2667		if (ch->connected) {
2668			shost_printk(KERN_WARNING, target->scsi_host,
2669				     PFX "received DREQ\n");
2670			rdma_disconnect(ch->rdma_cm.cm_id);
2671			comp = 1;
2672			ch->status = 0;
2673			queue_work(system_long_wq, &target->tl_err_work);
2674		}
2675		break;
2676
2677	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2678		shost_printk(KERN_ERR, target->scsi_host,
2679			     PFX "connection closed\n");
2680
2681		comp = 1;
2682		ch->status = 0;
2683		break;
2684
2685	default:
2686		shost_printk(KERN_WARNING, target->scsi_host,
2687			     PFX "Unhandled CM event %d\n", event->event);
2688		break;
2689	}
2690
2691	if (comp)
2692		complete(&ch->done);
2693
2694	return 0;
2695}
2696
2697/**
2698 * srp_change_queue_depth - setting device queue depth
2699 * @sdev: scsi device struct
2700 * @qdepth: requested queue depth
2701 *
2702 * Returns queue depth.
2703 */
2704static int
2705srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2706{
2707	if (!sdev->tagged_supported)
2708		qdepth = 1;
2709	return scsi_change_queue_depth(sdev, qdepth);
2710}
2711
2712static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2713			     u8 func, u8 *status)
2714{
2715	struct srp_target_port *target = ch->target;
2716	struct srp_rport *rport = target->rport;
2717	struct ib_device *dev = target->srp_host->srp_dev->dev;
2718	struct srp_iu *iu;
2719	struct srp_tsk_mgmt *tsk_mgmt;
2720	int res;
2721
2722	if (!ch->connected || target->qp_in_error)
2723		return -1;
2724
2725	/*
2726	 * Lock the rport mutex to avoid that srp_create_ch_ib() is
2727	 * invoked while a task management function is being sent.
2728	 */
2729	mutex_lock(&rport->mutex);
2730	spin_lock_irq(&ch->lock);
2731	iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2732	spin_unlock_irq(&ch->lock);
2733
2734	if (!iu) {
2735		mutex_unlock(&rport->mutex);
2736
2737		return -1;
2738	}
2739
2740	iu->num_sge = 1;
2741
2742	ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2743				   DMA_TO_DEVICE);
2744	tsk_mgmt = iu->buf;
2745	memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2746
2747	tsk_mgmt->opcode 	= SRP_TSK_MGMT;
2748	int_to_scsilun(lun, &tsk_mgmt->lun);
2749	tsk_mgmt->tsk_mgmt_func = func;
2750	tsk_mgmt->task_tag	= req_tag;
2751
2752	spin_lock_irq(&ch->lock);
2753	ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2754	tsk_mgmt->tag = ch->tsk_mgmt_tag;
2755	spin_unlock_irq(&ch->lock);
2756
2757	init_completion(&ch->tsk_mgmt_done);
2758
2759	ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2760				      DMA_TO_DEVICE);
2761	if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2762		srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2763		mutex_unlock(&rport->mutex);
2764
2765		return -1;
2766	}
2767	res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2768					msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2769	if (res > 0 && status)
2770		*status = ch->tsk_mgmt_status;
2771	mutex_unlock(&rport->mutex);
2772
2773	WARN_ON_ONCE(res < 0);
2774
2775	return res > 0 ? 0 : -1;
2776}
2777
2778static int srp_abort(struct scsi_cmnd *scmnd)
2779{
2780	struct srp_target_port *target = host_to_target(scmnd->device->host);
2781	struct srp_request *req = scsi_cmd_priv(scmnd);
2782	u32 tag;
2783	u16 ch_idx;
2784	struct srp_rdma_ch *ch;
2785
2786	shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2787
2788	tag = blk_mq_unique_tag(scmnd->request);
2789	ch_idx = blk_mq_unique_tag_to_hwq(tag);
2790	if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2791		return SUCCESS;
2792	ch = &target->ch[ch_idx];
2793	if (!srp_claim_req(ch, req, NULL, scmnd))
2794		return SUCCESS;
2795	shost_printk(KERN_ERR, target->scsi_host,
2796		     "Sending SRP abort for tag %#x\n", tag);
2797	if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2798			      SRP_TSK_ABORT_TASK, NULL) == 0) {
2799		srp_free_req(ch, req, scmnd, 0);
2800		return SUCCESS;
2801	}
2802	if (target->rport->state == SRP_RPORT_LOST)
2803		return FAST_IO_FAIL;
2804
2805	return FAILED;
2806}
2807
2808static int srp_reset_device(struct scsi_cmnd *scmnd)
2809{
2810	struct srp_target_port *target = host_to_target(scmnd->device->host);
2811	struct srp_rdma_ch *ch;
2812	u8 status;
2813
2814	shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2815
2816	ch = &target->ch[0];
2817	if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2818			      SRP_TSK_LUN_RESET, &status))
2819		return FAILED;
2820	if (status)
2821		return FAILED;
2822
2823	return SUCCESS;
2824}
2825
2826static int srp_reset_host(struct scsi_cmnd *scmnd)
2827{
2828	struct srp_target_port *target = host_to_target(scmnd->device->host);
2829
2830	shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2831
2832	return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2833}
2834
2835static int srp_target_alloc(struct scsi_target *starget)
2836{
2837	struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
2838	struct srp_target_port *target = host_to_target(shost);
2839
2840	if (target->target_can_queue)
2841		starget->can_queue = target->target_can_queue;
2842	return 0;
2843}
2844
2845static int srp_slave_configure(struct scsi_device *sdev)
2846{
2847	struct Scsi_Host *shost = sdev->host;
2848	struct srp_target_port *target = host_to_target(shost);
2849	struct request_queue *q = sdev->request_queue;
2850	unsigned long timeout;
2851
2852	if (sdev->type == TYPE_DISK) {
2853		timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
2854		blk_queue_rq_timeout(q, timeout);
2855	}
2856
2857	return 0;
2858}
2859
2860static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
2861			   char *buf)
2862{
2863	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2864
2865	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
2866}
2867
2868static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
2869			     char *buf)
2870{
2871	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2872
2873	return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
2874}
2875
2876static ssize_t show_service_id(struct device *dev,
2877			       struct device_attribute *attr, char *buf)
2878{
2879	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2880
2881	if (target->using_rdma_cm)
2882		return -ENOENT;
2883	return sprintf(buf, "0x%016llx\n",
2884		       be64_to_cpu(target->ib_cm.service_id));
2885}
2886
2887static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
2888			 char *buf)
2889{
2890	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2891
2892	if (target->using_rdma_cm)
2893		return -ENOENT;
2894	return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
2895}
2896
2897static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
2898			 char *buf)
2899{
2900	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2901
2902	return sprintf(buf, "%pI6\n", target->sgid.raw);
2903}
2904
2905static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
2906			 char *buf)
2907{
2908	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2909	struct srp_rdma_ch *ch = &target->ch[0];
2910
2911	if (target->using_rdma_cm)
2912		return -ENOENT;
2913	return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
2914}
2915
2916static ssize_t show_orig_dgid(struct device *dev,
2917			      struct device_attribute *attr, char *buf)
2918{
2919	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2920
2921	if (target->using_rdma_cm)
2922		return -ENOENT;
2923	return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
2924}
2925
2926static ssize_t show_req_lim(struct device *dev,
2927			    struct device_attribute *attr, char *buf)
2928{
2929	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2930	struct srp_rdma_ch *ch;
2931	int i, req_lim = INT_MAX;
2932
2933	for (i = 0; i < target->ch_count; i++) {
2934		ch = &target->ch[i];
2935		req_lim = min(req_lim, ch->req_lim);
2936	}
2937	return sprintf(buf, "%d\n", req_lim);
2938}
2939
2940static ssize_t show_zero_req_lim(struct device *dev,
2941				 struct device_attribute *attr, char *buf)
2942{
2943	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2944
2945	return sprintf(buf, "%d\n", target->zero_req_lim);
2946}
2947
2948static ssize_t show_local_ib_port(struct device *dev,
2949				  struct device_attribute *attr, char *buf)
2950{
2951	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2952
2953	return sprintf(buf, "%d\n", target->srp_host->port);
2954}
2955
2956static ssize_t show_local_ib_device(struct device *dev,
2957				    struct device_attribute *attr, char *buf)
2958{
2959	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2960
2961	return sprintf(buf, "%s\n",
2962		       dev_name(&target->srp_host->srp_dev->dev->dev));
2963}
2964
2965static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
2966			     char *buf)
2967{
2968	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2969
2970	return sprintf(buf, "%d\n", target->ch_count);
2971}
2972
2973static ssize_t show_comp_vector(struct device *dev,
2974				struct device_attribute *attr, char *buf)
2975{
2976	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2977
2978	return sprintf(buf, "%d\n", target->comp_vector);
2979}
2980
2981static ssize_t show_tl_retry_count(struct device *dev,
2982				   struct device_attribute *attr, char *buf)
2983{
2984	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2985
2986	return sprintf(buf, "%d\n", target->tl_retry_count);
2987}
2988
2989static ssize_t show_cmd_sg_entries(struct device *dev,
2990				   struct device_attribute *attr, char *buf)
2991{
2992	struct srp_target_port *target = host_to_target(class_to_shost(dev));
2993
2994	return sprintf(buf, "%u\n", target->cmd_sg_cnt);
2995}
2996
2997static ssize_t show_allow_ext_sg(struct device *dev,
2998				 struct device_attribute *attr, char *buf)
2999{
3000	struct srp_target_port *target = host_to_target(class_to_shost(dev));
3001
3002	return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3003}
3004
3005static DEVICE_ATTR(id_ext,	    S_IRUGO, show_id_ext,	   NULL);
3006static DEVICE_ATTR(ioc_guid,	    S_IRUGO, show_ioc_guid,	   NULL);
3007static DEVICE_ATTR(service_id,	    S_IRUGO, show_service_id,	   NULL);
3008static DEVICE_ATTR(pkey,	    S_IRUGO, show_pkey,		   NULL);
3009static DEVICE_ATTR(sgid,	    S_IRUGO, show_sgid,		   NULL);
3010static DEVICE_ATTR(dgid,	    S_IRUGO, show_dgid,		   NULL);
3011static DEVICE_ATTR(orig_dgid,	    S_IRUGO, show_orig_dgid,	   NULL);
3012static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
3013static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,	   NULL);
3014static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
3015static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
3016static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
3017static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
3018static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
3019static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
3020static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
3021
3022static struct device_attribute *srp_host_attrs[] = {
3023	&dev_attr_id_ext,
3024	&dev_attr_ioc_guid,
3025	&dev_attr_service_id,
3026	&dev_attr_pkey,
3027	&dev_attr_sgid,
3028	&dev_attr_dgid,
3029	&dev_attr_orig_dgid,
3030	&dev_attr_req_lim,
3031	&dev_attr_zero_req_lim,
3032	&dev_attr_local_ib_port,
3033	&dev_attr_local_ib_device,
3034	&dev_attr_ch_count,
3035	&dev_attr_comp_vector,
3036	&dev_attr_tl_retry_count,
3037	&dev_attr_cmd_sg_entries,
3038	&dev_attr_allow_ext_sg,
3039	NULL
3040};
3041
3042static struct scsi_host_template srp_template = {
3043	.module				= THIS_MODULE,
3044	.name				= "InfiniBand SRP initiator",
3045	.proc_name			= DRV_NAME,
3046	.target_alloc			= srp_target_alloc,
3047	.slave_configure		= srp_slave_configure,
3048	.info				= srp_target_info,
3049	.init_cmd_priv			= srp_init_cmd_priv,
3050	.exit_cmd_priv			= srp_exit_cmd_priv,
3051	.queuecommand			= srp_queuecommand,
3052	.change_queue_depth             = srp_change_queue_depth,
3053	.eh_timed_out			= srp_timed_out,
3054	.eh_abort_handler		= srp_abort,
3055	.eh_device_reset_handler	= srp_reset_device,
3056	.eh_host_reset_handler		= srp_reset_host,
3057	.skip_settle_delay		= true,
3058	.sg_tablesize			= SRP_DEF_SG_TABLESIZE,
3059	.can_queue			= SRP_DEFAULT_CMD_SQ_SIZE,
3060	.this_id			= -1,
3061	.cmd_per_lun			= SRP_DEFAULT_CMD_SQ_SIZE,
3062	.shost_attrs			= srp_host_attrs,
3063	.track_queue_depth		= 1,
3064	.cmd_size			= sizeof(struct srp_request),
3065};
3066
3067static int srp_sdev_count(struct Scsi_Host *host)
3068{
3069	struct scsi_device *sdev;
3070	int c = 0;
3071
3072	shost_for_each_device(sdev, host)
3073		c++;
3074
3075	return c;
3076}
3077
3078/*
3079 * Return values:
3080 * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3081 * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3082 *    removal has been scheduled.
3083 * 0 and target->state != SRP_TARGET_REMOVED upon success.
3084 */
3085static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3086{
3087	struct srp_rport_identifiers ids;
3088	struct srp_rport *rport;
3089
3090	target->state = SRP_TARGET_SCANNING;
3091	sprintf(target->target_name, "SRP.T10:%016llX",
3092		be64_to_cpu(target->id_ext));
3093
3094	if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3095		return -ENODEV;
3096
3097	memcpy(ids.port_id, &target->id_ext, 8);
3098	memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3099	ids.roles = SRP_RPORT_ROLE_TARGET;
3100	rport = srp_rport_add(target->scsi_host, &ids);
3101	if (IS_ERR(rport)) {
3102		scsi_remove_host(target->scsi_host);
3103		return PTR_ERR(rport);
3104	}
3105
3106	rport->lld_data = target;
3107	target->rport = rport;
3108
3109	spin_lock(&host->target_lock);
3110	list_add_tail(&target->list, &host->target_list);
3111	spin_unlock(&host->target_lock);
3112
3113	scsi_scan_target(&target->scsi_host->shost_gendev,
3114			 0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3115
3116	if (srp_connected_ch(target) < target->ch_count ||
3117	    target->qp_in_error) {
3118		shost_printk(KERN_INFO, target->scsi_host,
3119			     PFX "SCSI scan failed - removing SCSI host\n");
3120		srp_queue_remove_work(target);
3121		goto out;
3122	}
3123
3124	pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3125		 dev_name(&target->scsi_host->shost_gendev),
3126		 srp_sdev_count(target->scsi_host));
3127
3128	spin_lock_irq(&target->lock);
3129	if (target->state == SRP_TARGET_SCANNING)
3130		target->state = SRP_TARGET_LIVE;
3131	spin_unlock_irq(&target->lock);
3132
3133out:
3134	return 0;
3135}
3136
3137static void srp_release_dev(struct device *dev)
3138{
3139	struct srp_host *host =
3140		container_of(dev, struct srp_host, dev);
3141
3142	complete(&host->released);
3143}
3144
3145static struct class srp_class = {
3146	.name    = "infiniband_srp",
3147	.dev_release = srp_release_dev
3148};
3149
3150/**
3151 * srp_conn_unique() - check whether the connection to a target is unique
3152 * @host:   SRP host.
3153 * @target: SRP target port.
3154 */
3155static bool srp_conn_unique(struct srp_host *host,
3156			    struct srp_target_port *target)
3157{
3158	struct srp_target_port *t;
3159	bool ret = false;
3160
3161	if (target->state == SRP_TARGET_REMOVED)
3162		goto out;
3163
3164	ret = true;
3165
3166	spin_lock(&host->target_lock);
3167	list_for_each_entry(t, &host->target_list, list) {
3168		if (t != target &&
3169		    target->id_ext == t->id_ext &&
3170		    target->ioc_guid == t->ioc_guid &&
3171		    target->initiator_ext == t->initiator_ext) {
3172			ret = false;
3173			break;
3174		}
3175	}
3176	spin_unlock(&host->target_lock);
3177
3178out:
3179	return ret;
3180}
3181
3182/*
3183 * Target ports are added by writing
3184 *
3185 *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3186 *     pkey=<P_Key>,service_id=<service ID>
3187 * or
3188 *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3189 *     [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3190 *
3191 * to the add_target sysfs attribute.
3192 */
3193enum {
3194	SRP_OPT_ERR		= 0,
3195	SRP_OPT_ID_EXT		= 1 << 0,
3196	SRP_OPT_IOC_GUID	= 1 << 1,
3197	SRP_OPT_DGID		= 1 << 2,
3198	SRP_OPT_PKEY		= 1 << 3,
3199	SRP_OPT_SERVICE_ID	= 1 << 4,
3200	SRP_OPT_MAX_SECT	= 1 << 5,
3201	SRP_OPT_MAX_CMD_PER_LUN	= 1 << 6,
3202	SRP_OPT_IO_CLASS	= 1 << 7,
3203	SRP_OPT_INITIATOR_EXT	= 1 << 8,
3204	SRP_OPT_CMD_SG_ENTRIES	= 1 << 9,
3205	SRP_OPT_ALLOW_EXT_SG	= 1 << 10,
3206	SRP_OPT_SG_TABLESIZE	= 1 << 11,
3207	SRP_OPT_COMP_VECTOR	= 1 << 12,
3208	SRP_OPT_TL_RETRY_COUNT	= 1 << 13,
3209	SRP_OPT_QUEUE_SIZE	= 1 << 14,
3210	SRP_OPT_IP_SRC		= 1 << 15,
3211	SRP_OPT_IP_DEST		= 1 << 16,
3212	SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3213	SRP_OPT_MAX_IT_IU_SIZE  = 1 << 18,
3214	SRP_OPT_CH_COUNT	= 1 << 19,
3215};
3216
3217static unsigned int srp_opt_mandatory[] = {
3218	SRP_OPT_ID_EXT		|
3219	SRP_OPT_IOC_GUID	|
3220	SRP_OPT_DGID		|
3221	SRP_OPT_PKEY		|
3222	SRP_OPT_SERVICE_ID,
3223	SRP_OPT_ID_EXT		|
3224	SRP_OPT_IOC_GUID	|
3225	SRP_OPT_IP_DEST,
3226};
3227
3228static const match_table_t srp_opt_tokens = {
3229	{ SRP_OPT_ID_EXT,		"id_ext=%s" 		},
3230	{ SRP_OPT_IOC_GUID,		"ioc_guid=%s" 		},
3231	{ SRP_OPT_DGID,			"dgid=%s" 		},
3232	{ SRP_OPT_PKEY,			"pkey=%x" 		},
3233	{ SRP_OPT_SERVICE_ID,		"service_id=%s"		},
3234	{ SRP_OPT_MAX_SECT,		"max_sect=%d" 		},
3235	{ SRP_OPT_MAX_CMD_PER_LUN,	"max_cmd_per_lun=%d" 	},
3236	{ SRP_OPT_TARGET_CAN_QUEUE,	"target_can_queue=%d"	},
3237	{ SRP_OPT_IO_CLASS,		"io_class=%x"		},
3238	{ SRP_OPT_INITIATOR_EXT,	"initiator_ext=%s"	},
3239	{ SRP_OPT_CMD_SG_ENTRIES,	"cmd_sg_entries=%u"	},
3240	{ SRP_OPT_ALLOW_EXT_SG,		"allow_ext_sg=%u"	},
3241	{ SRP_OPT_SG_TABLESIZE,		"sg_tablesize=%u"	},
3242	{ SRP_OPT_COMP_VECTOR,		"comp_vector=%u"	},
3243	{ SRP_OPT_TL_RETRY_COUNT,	"tl_retry_count=%u"	},
3244	{ SRP_OPT_QUEUE_SIZE,		"queue_size=%d"		},
3245	{ SRP_OPT_IP_SRC,		"src=%s"		},
3246	{ SRP_OPT_IP_DEST,		"dest=%s"		},
3247	{ SRP_OPT_MAX_IT_IU_SIZE,	"max_it_iu_size=%d"	},
3248	{ SRP_OPT_CH_COUNT,		"ch_count=%u",		},
3249	{ SRP_OPT_ERR,			NULL 			}
3250};
3251
3252/**
3253 * srp_parse_in - parse an IP address and port number combination
3254 * @net:	   [in]  Network namespace.
3255 * @sa:		   [out] Address family, IP address and port number.
3256 * @addr_port_str: [in]  IP address and port number.
3257 * @has_port:	   [out] Whether or not @addr_port_str includes a port number.
3258 *
3259 * Parse the following address formats:
3260 * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3261 * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3262 */
3263static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3264			const char *addr_port_str, bool *has_port)
3265{
3266	char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
3267	char *port_str;
3268	int ret;
3269
3270	if (!addr)
3271		return -ENOMEM;
3272	port_str = strrchr(addr, ':');
3273	if (port_str && strchr(port_str, ']'))
3274		port_str = NULL;
3275	if (port_str)
3276		*port_str++ = '\0';
3277	if (has_port)
3278		*has_port = port_str != NULL;
3279	ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
3280	if (ret && addr[0]) {
3281		addr_end = addr + strlen(addr) - 1;
3282		if (addr[0] == '[' && *addr_end == ']') {
3283			*addr_end = '\0';
3284			ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
3285						   port_str, sa);
3286		}
3287	}
3288	kfree(addr);
3289	pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
3290	return ret;
3291}
3292
3293static int srp_parse_options(struct net *net, const char *buf,
3294			     struct srp_target_port *target)
3295{
3296	char *options, *sep_opt;
3297	char *p;
3298	substring_t args[MAX_OPT_ARGS];
3299	unsigned long long ull;
3300	bool has_port;
3301	int opt_mask = 0;
3302	int token;
3303	int ret = -EINVAL;
3304	int i;
3305
3306	options = kstrdup(buf, GFP_KERNEL);
3307	if (!options)
3308		return -ENOMEM;
3309
3310	sep_opt = options;
3311	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3312		if (!*p)
3313			continue;
3314
3315		token = match_token(p, srp_opt_tokens, args);
3316		opt_mask |= token;
3317
3318		switch (token) {
3319		case SRP_OPT_ID_EXT:
3320			p = match_strdup(args);
3321			if (!p) {
3322				ret = -ENOMEM;
3323				goto out;
3324			}
3325			ret = kstrtoull(p, 16, &ull);
3326			if (ret) {
3327				pr_warn("invalid id_ext parameter '%s'\n", p);
3328				kfree(p);
3329				goto out;
3330			}
3331			target->id_ext = cpu_to_be64(ull);
3332			kfree(p);
3333			break;
3334
3335		case SRP_OPT_IOC_GUID:
3336			p = match_strdup(args);
3337			if (!p) {
3338				ret = -ENOMEM;
3339				goto out;
3340			}
3341			ret = kstrtoull(p, 16, &ull);
3342			if (ret) {
3343				pr_warn("invalid ioc_guid parameter '%s'\n", p);
3344				kfree(p);
3345				goto out;
3346			}
3347			target->ioc_guid = cpu_to_be64(ull);
3348			kfree(p);
3349			break;
3350
3351		case SRP_OPT_DGID:
3352			p = match_strdup(args);
3353			if (!p) {
3354				ret = -ENOMEM;
3355				goto out;
3356			}
3357			if (strlen(p) != 32) {
3358				pr_warn("bad dest GID parameter '%s'\n", p);
3359				kfree(p);
3360				goto out;
3361			}
3362
3363			ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3364			kfree(p);
3365			if (ret < 0)
3366				goto out;
3367			break;
3368
3369		case SRP_OPT_PKEY:
3370			ret = match_hex(args, &token);
3371			if (ret) {
3372				pr_warn("bad P_Key parameter '%s'\n", p);
3373				goto out;
3374			}
3375			target->ib_cm.pkey = cpu_to_be16(token);
3376			break;
3377
3378		case SRP_OPT_SERVICE_ID:
3379			p = match_strdup(args);
3380			if (!p) {
3381				ret = -ENOMEM;
3382				goto out;
3383			}
3384			ret = kstrtoull(p, 16, &ull);
3385			if (ret) {
3386				pr_warn("bad service_id parameter '%s'\n", p);
3387				kfree(p);
3388				goto out;
3389			}
3390			target->ib_cm.service_id = cpu_to_be64(ull);
3391			kfree(p);
3392			break;
3393
3394		case SRP_OPT_IP_SRC:
3395			p = match_strdup(args);
3396			if (!p) {
3397				ret = -ENOMEM;
3398				goto out;
3399			}
3400			ret = srp_parse_in(net, &target->rdma_cm.src.ss, p,
3401					   NULL);
3402			if (ret < 0) {
3403				pr_warn("bad source parameter '%s'\n", p);
3404				kfree(p);
3405				goto out;
3406			}
3407			target->rdma_cm.src_specified = true;
3408			kfree(p);
3409			break;
3410
3411		case SRP_OPT_IP_DEST:
3412			p = match_strdup(args);
3413			if (!p) {
3414				ret = -ENOMEM;
3415				goto out;
3416			}
3417			ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p,
3418					   &has_port);
3419			if (!has_port)
3420				ret = -EINVAL;
3421			if (ret < 0) {
3422				pr_warn("bad dest parameter '%s'\n", p);
3423				kfree(p);
3424				goto out;
3425			}
3426			target->using_rdma_cm = true;
3427			kfree(p);
3428			break;
3429
3430		case SRP_OPT_MAX_SECT:
3431			ret = match_int(args, &token);
3432			if (ret) {
3433				pr_warn("bad max sect parameter '%s'\n", p);
3434				goto out;
3435			}
3436			target->scsi_host->max_sectors = token;
3437			break;
3438
3439		case SRP_OPT_QUEUE_SIZE:
3440			ret = match_int(args, &token);
3441			if (ret) {
3442				pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n",
3443					p, ret);
3444				goto out;
3445			}
3446			if (token < 1) {
3447				pr_warn("bad queue_size parameter '%s'\n", p);
3448				ret = -EINVAL;
3449				goto out;
3450			}
3451			target->scsi_host->can_queue = token;
3452			target->queue_size = token + SRP_RSP_SQ_SIZE +
3453					     SRP_TSK_MGMT_SQ_SIZE;
3454			if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3455				target->scsi_host->cmd_per_lun = token;
3456			break;
3457
3458		case SRP_OPT_MAX_CMD_PER_LUN:
3459			ret = match_int(args, &token);
3460			if (ret) {
3461				pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n",
3462					p, ret);
3463				goto out;
3464			}
3465			if (token < 1) {
3466				pr_warn("bad max cmd_per_lun parameter '%s'\n",
3467					p);
3468				ret = -EINVAL;
3469				goto out;
3470			}
3471			target->scsi_host->cmd_per_lun = token;
3472			break;
3473
3474		case SRP_OPT_TARGET_CAN_QUEUE:
3475			ret = match_int(args, &token);
3476			if (ret) {
3477				pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n",
3478					p, ret);
3479				goto out;
3480			}
3481			if (token < 1) {
3482				pr_warn("bad max target_can_queue parameter '%s'\n",
3483					p);
3484				ret = -EINVAL;
3485				goto out;
3486			}
3487			target->target_can_queue = token;
3488			break;
3489
3490		case SRP_OPT_IO_CLASS:
3491			ret = match_hex(args, &token);
3492			if (ret) {
3493				pr_warn("bad IO class parameter '%s'\n", p);
3494				goto out;
3495			}
3496			if (token != SRP_REV10_IB_IO_CLASS &&
3497			    token != SRP_REV16A_IB_IO_CLASS) {
3498				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3499					token, SRP_REV10_IB_IO_CLASS,
3500					SRP_REV16A_IB_IO_CLASS);
3501				ret = -EINVAL;
3502				goto out;
3503			}
3504			target->io_class = token;
3505			break;
3506
3507		case SRP_OPT_INITIATOR_EXT:
3508			p = match_strdup(args);
3509			if (!p) {
3510				ret = -ENOMEM;
3511				goto out;
3512			}
3513			ret = kstrtoull(p, 16, &ull);
3514			if (ret) {
3515				pr_warn("bad initiator_ext value '%s'\n", p);
3516				kfree(p);
3517				goto out;
3518			}
3519			target->initiator_ext = cpu_to_be64(ull);
3520			kfree(p);
3521			break;
3522
3523		case SRP_OPT_CMD_SG_ENTRIES:
3524			ret = match_int(args, &token);
3525			if (ret) {
3526				pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n",
3527					p, ret);
3528				goto out;
3529			}
3530			if (token < 1 || token > 255) {
3531				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3532					p);
3533				ret = -EINVAL;
3534				goto out;
3535			}
3536			target->cmd_sg_cnt = token;
3537			break;
3538
3539		case SRP_OPT_ALLOW_EXT_SG:
3540			ret = match_int(args, &token);
3541			if (ret) {
3542				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3543				goto out;
3544			}
3545			target->allow_ext_sg = !!token;
3546			break;
3547
3548		case SRP_OPT_SG_TABLESIZE:
3549			ret = match_int(args, &token);
3550			if (ret) {
3551				pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n",
3552					p, ret);
3553				goto out;
3554			}
3555			if (token < 1 || token > SG_MAX_SEGMENTS) {
3556				pr_warn("bad max sg_tablesize parameter '%s'\n",
3557					p);
3558				ret = -EINVAL;
3559				goto out;
3560			}
3561			target->sg_tablesize = token;
3562			break;
3563
3564		case SRP_OPT_COMP_VECTOR:
3565			ret = match_int(args, &token);
3566			if (ret) {
3567				pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n",
3568					p, ret);
3569				goto out;
3570			}
3571			if (token < 0) {
3572				pr_warn("bad comp_vector parameter '%s'\n", p);
3573				ret = -EINVAL;
3574				goto out;
3575			}
3576			target->comp_vector = token;
3577			break;
3578
3579		case SRP_OPT_TL_RETRY_COUNT:
3580			ret = match_int(args, &token);
3581			if (ret) {
3582				pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n",
3583					p, ret);
3584				goto out;
3585			}
3586			if (token < 2 || token > 7) {
3587				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3588					p);
3589				ret = -EINVAL;
3590				goto out;
3591			}
3592			target->tl_retry_count = token;
3593			break;
3594
3595		case SRP_OPT_MAX_IT_IU_SIZE:
3596			ret = match_int(args, &token);
3597			if (ret) {
3598				pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n",
3599					p, ret);
3600				goto out;
3601			}
3602			if (token < 0) {
3603				pr_warn("bad maximum initiator to target IU size '%s'\n", p);
3604				ret = -EINVAL;
3605				goto out;
3606			}
3607			target->max_it_iu_size = token;
3608			break;
3609
3610		case SRP_OPT_CH_COUNT:
3611			ret = match_int(args, &token);
3612			if (ret) {
3613				pr_warn("match_int() failed for channel count parameter '%s', Error %d\n",
3614					p, ret);
3615				goto out;
3616			}
3617			if (token < 1) {
3618				pr_warn("bad channel count %s\n", p);
3619				ret = -EINVAL;
3620				goto out;
3621			}
3622			target->ch_count = token;
3623			break;
3624
3625		default:
3626			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3627				p);
3628			ret = -EINVAL;
3629			goto out;
3630		}
3631	}
3632
3633	for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3634		if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3635			ret = 0;
3636			break;
3637		}
3638	}
3639	if (ret)
3640		pr_warn("target creation request is missing one or more parameters\n");
3641
3642	if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3643	    && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3644		pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3645			target->scsi_host->cmd_per_lun,
3646			target->scsi_host->can_queue);
3647
3648out:
3649	kfree(options);
3650	return ret;
3651}
3652
3653static ssize_t srp_create_target(struct device *dev,
3654				 struct device_attribute *attr,
3655				 const char *buf, size_t count)
3656{
3657	struct srp_host *host =
3658		container_of(dev, struct srp_host, dev);
3659	struct Scsi_Host *target_host;
3660	struct srp_target_port *target;
3661	struct srp_rdma_ch *ch;
3662	struct srp_device *srp_dev = host->srp_dev;
3663	struct ib_device *ibdev = srp_dev->dev;
3664	int ret, i, ch_idx;
3665	unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3666	bool multich = false;
3667	uint32_t max_iu_len;
3668
3669	target_host = scsi_host_alloc(&srp_template,
3670				      sizeof (struct srp_target_port));
3671	if (!target_host)
3672		return -ENOMEM;
3673
3674	target_host->transportt  = ib_srp_transport_template;
3675	target_host->max_channel = 0;
3676	target_host->max_id      = 1;
3677	target_host->max_lun     = -1LL;
3678	target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3679	target_host->max_segment_size = ib_dma_max_seg_size(ibdev);
3680
3681	if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
3682		target_host->virt_boundary_mask = ~srp_dev->mr_page_mask;
3683
3684	target = host_to_target(target_host);
3685
3686	target->net		= kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3687	target->io_class	= SRP_REV16A_IB_IO_CLASS;
3688	target->scsi_host	= target_host;
3689	target->srp_host	= host;
3690	target->lkey		= host->srp_dev->pd->local_dma_lkey;
3691	target->global_rkey	= host->srp_dev->global_rkey;
3692	target->cmd_sg_cnt	= cmd_sg_entries;
3693	target->sg_tablesize	= indirect_sg_entries ? : cmd_sg_entries;
3694	target->allow_ext_sg	= allow_ext_sg;
3695	target->tl_retry_count	= 7;
3696	target->queue_size	= SRP_DEFAULT_QUEUE_SIZE;
3697
3698	/*
3699	 * Avoid that the SCSI host can be removed by srp_remove_target()
3700	 * before this function returns.
3701	 */
3702	scsi_host_get(target->scsi_host);
3703
3704	ret = mutex_lock_interruptible(&host->add_target_mutex);
3705	if (ret < 0)
3706		goto put;
3707
3708	ret = srp_parse_options(target->net, buf, target);
3709	if (ret)
3710		goto out;
3711
3712	if (!srp_conn_unique(target->srp_host, target)) {
3713		if (target->using_rdma_cm) {
3714			shost_printk(KERN_INFO, target->scsi_host,
3715				     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3716				     be64_to_cpu(target->id_ext),
3717				     be64_to_cpu(target->ioc_guid),
3718				     &target->rdma_cm.dst);
3719		} else {
3720			shost_printk(KERN_INFO, target->scsi_host,
3721				     PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3722				     be64_to_cpu(target->id_ext),
3723				     be64_to_cpu(target->ioc_guid),
3724				     be64_to_cpu(target->initiator_ext));
3725		}
3726		ret = -EEXIST;
3727		goto out;
3728	}
3729
3730	if (!srp_dev->has_fr && !target->allow_ext_sg &&
3731	    target->cmd_sg_cnt < target->sg_tablesize) {
3732		pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3733		target->sg_tablesize = target->cmd_sg_cnt;
3734	}
3735
3736	if (srp_dev->use_fast_reg) {
3737		bool gaps_reg = (ibdev->attrs.device_cap_flags &
3738				 IB_DEVICE_SG_GAPS_REG);
3739
3740		max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3741				  (ilog2(srp_dev->mr_page_size) - 9);
3742		if (!gaps_reg) {
3743			/*
3744			 * FR can only map one HCA page per entry. If the start
3745			 * address is not aligned on a HCA page boundary two
3746			 * entries will be used for the head and the tail
3747			 * although these two entries combined contain at most
3748			 * one HCA page of data. Hence the "+ 1" in the
3749			 * calculation below.
3750			 *
3751			 * The indirect data buffer descriptor is contiguous
3752			 * so the memory for that buffer will only be
3753			 * registered if register_always is true. Hence add
3754			 * one to mr_per_cmd if register_always has been set.
3755			 */
3756			mr_per_cmd = register_always +
3757				(target->scsi_host->max_sectors + 1 +
3758				 max_sectors_per_mr - 1) / max_sectors_per_mr;
3759		} else {
3760			mr_per_cmd = register_always +
3761				(target->sg_tablesize +
3762				 srp_dev->max_pages_per_mr - 1) /
3763				srp_dev->max_pages_per_mr;
3764		}
3765		pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3766			 target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3767			 max_sectors_per_mr, mr_per_cmd);
3768	}
3769
3770	target_host->sg_tablesize = target->sg_tablesize;
3771	target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3772	target->mr_per_cmd = mr_per_cmd;
3773	target->indirect_size = target->sg_tablesize *
3774				sizeof (struct srp_direct_buf);
3775	max_iu_len = srp_max_it_iu_len(target->cmd_sg_cnt,
3776				       srp_use_imm_data,
3777				       target->max_it_iu_size);
3778
3779	INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3780	INIT_WORK(&target->remove_work, srp_remove_work);
3781	spin_lock_init(&target->lock);
3782	ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
3783	if (ret)
3784		goto out;
3785
3786	ret = -ENOMEM;
3787	if (target->ch_count == 0) {
3788		target->ch_count =
3789			min(ch_count ?:
3790				max(4 * num_online_nodes(),
3791				    ibdev->num_comp_vectors),
3792				num_online_cpus());
3793	}
3794
3795	target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3796			     GFP_KERNEL);
3797	if (!target->ch)
3798		goto out;
3799
3800	for (ch_idx = 0; ch_idx < target->ch_count; ++ch_idx) {
3801		ch = &target->ch[ch_idx];
3802		ch->target = target;
3803		ch->comp_vector = ch_idx % ibdev->num_comp_vectors;
3804		spin_lock_init(&ch->lock);
3805		INIT_LIST_HEAD(&ch->free_tx);
3806		ret = srp_new_cm_id(ch);
3807		if (ret)
3808			goto err_disconnect;
3809
3810		ret = srp_create_ch_ib(ch);
3811		if (ret)
3812			goto err_disconnect;
3813
3814		ret = srp_connect_ch(ch, max_iu_len, multich);
3815		if (ret) {
3816			char dst[64];
3817
3818			if (target->using_rdma_cm)
3819				snprintf(dst, sizeof(dst), "%pIS",
3820					&target->rdma_cm.dst);
3821			else
3822				snprintf(dst, sizeof(dst), "%pI6",
3823					target->ib_cm.orig_dgid.raw);
3824			shost_printk(KERN_ERR, target->scsi_host,
3825				PFX "Connection %d/%d to %s failed\n",
3826				ch_idx,
3827				target->ch_count, dst);
3828			if (ch_idx == 0) {
3829				goto free_ch;
3830			} else {
3831				srp_free_ch_ib(target, ch);
3832				target->ch_count = ch - target->ch;
3833				goto connected;
3834			}
3835		}
3836		multich = true;
3837	}
3838
3839connected:
3840	target->scsi_host->nr_hw_queues = target->ch_count;
3841
3842	ret = srp_add_target(host, target);
3843	if (ret)
3844		goto err_disconnect;
3845
3846	if (target->state != SRP_TARGET_REMOVED) {
3847		if (target->using_rdma_cm) {
3848			shost_printk(KERN_DEBUG, target->scsi_host, PFX
3849				     "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
3850				     be64_to_cpu(target->id_ext),
3851				     be64_to_cpu(target->ioc_guid),
3852				     target->sgid.raw, &target->rdma_cm.dst);
3853		} else {
3854			shost_printk(KERN_DEBUG, target->scsi_host, PFX
3855				     "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3856				     be64_to_cpu(target->id_ext),
3857				     be64_to_cpu(target->ioc_guid),
3858				     be16_to_cpu(target->ib_cm.pkey),
3859				     be64_to_cpu(target->ib_cm.service_id),
3860				     target->sgid.raw,
3861				     target->ib_cm.orig_dgid.raw);
3862		}
3863	}
3864
3865	ret = count;
3866
3867out:
3868	mutex_unlock(&host->add_target_mutex);
3869
3870put:
3871	scsi_host_put(target->scsi_host);
3872	if (ret < 0) {
3873		/*
3874		 * If a call to srp_remove_target() has not been scheduled,
3875		 * drop the network namespace reference now that was obtained
3876		 * earlier in this function.
3877		 */
3878		if (target->state != SRP_TARGET_REMOVED)
3879			kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
3880		scsi_host_put(target->scsi_host);
3881	}
3882
3883	return ret;
3884
3885err_disconnect:
3886	srp_disconnect_target(target);
3887
3888free_ch:
3889	for (i = 0; i < target->ch_count; i++) {
3890		ch = &target->ch[i];
3891		srp_free_ch_ib(target, ch);
3892	}
3893
3894	kfree(target->ch);
3895	goto out;
3896}
3897
3898static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3899
3900static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3901			  char *buf)
3902{
3903	struct srp_host *host = container_of(dev, struct srp_host, dev);
3904
3905	return sprintf(buf, "%s\n", dev_name(&host->srp_dev->dev->dev));
3906}
3907
3908static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3909
3910static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3911			 char *buf)
3912{
3913	struct srp_host *host = container_of(dev, struct srp_host, dev);
3914
3915	return sprintf(buf, "%d\n", host->port);
3916}
3917
3918static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
3919
3920static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
3921{
3922	struct srp_host *host;
3923
3924	host = kzalloc(sizeof *host, GFP_KERNEL);
3925	if (!host)
3926		return NULL;
3927
3928	INIT_LIST_HEAD(&host->target_list);
3929	spin_lock_init(&host->target_lock);
3930	init_completion(&host->released);
3931	mutex_init(&host->add_target_mutex);
3932	host->srp_dev = device;
3933	host->port = port;
3934
3935	host->dev.class = &srp_class;
3936	host->dev.parent = device->dev->dev.parent;
3937	dev_set_name(&host->dev, "srp-%s-%d", dev_name(&device->dev->dev),
3938		     port);
3939
3940	if (device_register(&host->dev))
3941		goto free_host;
3942	if (device_create_file(&host->dev, &dev_attr_add_target))
3943		goto err_class;
3944	if (device_create_file(&host->dev, &dev_attr_ibdev))
3945		goto err_class;
3946	if (device_create_file(&host->dev, &dev_attr_port))
3947		goto err_class;
3948
3949	return host;
3950
3951err_class:
3952	device_unregister(&host->dev);
3953
3954free_host:
3955	kfree(host);
3956
3957	return NULL;
3958}
3959
3960static void srp_rename_dev(struct ib_device *device, void *client_data)
3961{
3962	struct srp_device *srp_dev = client_data;
3963	struct srp_host *host, *tmp_host;
3964
3965	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
3966		char name[IB_DEVICE_NAME_MAX + 8];
3967
3968		snprintf(name, sizeof(name), "srp-%s-%d",
3969			 dev_name(&device->dev), host->port);
3970		device_rename(&host->dev, name);
3971	}
3972}
3973
3974static int srp_add_one(struct ib_device *device)
3975{
3976	struct srp_device *srp_dev;
3977	struct ib_device_attr *attr = &device->attrs;
3978	struct srp_host *host;
3979	int mr_page_shift;
3980	unsigned int p;
3981	u64 max_pages_per_mr;
3982	unsigned int flags = 0;
3983
3984	srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
3985	if (!srp_dev)
3986		return -ENOMEM;
3987
3988	/*
3989	 * Use the smallest page size supported by the HCA, down to a
3990	 * minimum of 4096 bytes. We're unlikely to build large sglists
3991	 * out of smaller entries.
3992	 */
3993	mr_page_shift		= max(12, ffs(attr->page_size_cap) - 1);
3994	srp_dev->mr_page_size	= 1 << mr_page_shift;
3995	srp_dev->mr_page_mask	= ~((u64) srp_dev->mr_page_size - 1);
3996	max_pages_per_mr	= attr->max_mr_size;
3997	do_div(max_pages_per_mr, srp_dev->mr_page_size);
3998	pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
3999		 attr->max_mr_size, srp_dev->mr_page_size,
4000		 max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4001	srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4002					  max_pages_per_mr);
4003
4004	srp_dev->has_fr = (attr->device_cap_flags &
4005			   IB_DEVICE_MEM_MGT_EXTENSIONS);
4006	if (!never_register && !srp_dev->has_fr)
4007		dev_warn(&device->dev, "FR is not supported\n");
4008	else if (!never_register &&
4009		 attr->max_mr_size >= 2 * srp_dev->mr_page_size)
4010		srp_dev->use_fast_reg = srp_dev->has_fr;
4011
4012	if (never_register || !register_always || !srp_dev->has_fr)
4013		flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4014
4015	if (srp_dev->use_fast_reg) {
4016		srp_dev->max_pages_per_mr =
4017			min_t(u32, srp_dev->max_pages_per_mr,
4018			      attr->max_fast_reg_page_list_len);
4019	}
4020	srp_dev->mr_max_size	= srp_dev->mr_page_size *
4021				   srp_dev->max_pages_per_mr;
4022	pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4023		 dev_name(&device->dev), mr_page_shift, attr->max_mr_size,
4024		 attr->max_fast_reg_page_list_len,
4025		 srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4026
4027	INIT_LIST_HEAD(&srp_dev->dev_list);
4028
4029	srp_dev->dev = device;
4030	srp_dev->pd  = ib_alloc_pd(device, flags);
4031	if (IS_ERR(srp_dev->pd)) {
4032		int ret = PTR_ERR(srp_dev->pd);
4033
4034		kfree(srp_dev);
4035		return ret;
4036	}
4037
4038	if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4039		srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4040		WARN_ON_ONCE(srp_dev->global_rkey == 0);
4041	}
4042
4043	rdma_for_each_port (device, p) {
4044		host = srp_add_port(srp_dev, p);
4045		if (host)
4046			list_add_tail(&host->list, &srp_dev->dev_list);
4047	}
4048
4049	ib_set_client_data(device, &srp_client, srp_dev);
4050	return 0;
4051}
4052
4053static void srp_remove_one(struct ib_device *device, void *client_data)
4054{
4055	struct srp_device *srp_dev;
4056	struct srp_host *host, *tmp_host;
4057	struct srp_target_port *target;
4058
4059	srp_dev = client_data;
4060
4061	list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4062		device_unregister(&host->dev);
4063		/*
4064		 * Wait for the sysfs entry to go away, so that no new
4065		 * target ports can be created.
4066		 */
4067		wait_for_completion(&host->released);
4068
4069		/*
4070		 * Remove all target ports.
4071		 */
4072		spin_lock(&host->target_lock);
4073		list_for_each_entry(target, &host->target_list, list)
4074			srp_queue_remove_work(target);
4075		spin_unlock(&host->target_lock);
4076
4077		/*
4078		 * srp_queue_remove_work() queues a call to
4079		 * srp_remove_target(). The latter function cancels
4080		 * target->tl_err_work so waiting for the remove works to
4081		 * finish is sufficient.
4082		 */
4083		flush_workqueue(srp_remove_wq);
4084
4085		kfree(host);
4086	}
4087
4088	ib_dealloc_pd(srp_dev->pd);
4089
4090	kfree(srp_dev);
4091}
4092
4093static struct srp_function_template ib_srp_transport_functions = {
4094	.has_rport_state	 = true,
4095	.reset_timer_if_blocked	 = true,
4096	.reconnect_delay	 = &srp_reconnect_delay,
4097	.fast_io_fail_tmo	 = &srp_fast_io_fail_tmo,
4098	.dev_loss_tmo		 = &srp_dev_loss_tmo,
4099	.reconnect		 = srp_rport_reconnect,
4100	.rport_delete		 = srp_rport_delete,
4101	.terminate_rport_io	 = srp_terminate_io,
4102};
4103
4104static int __init srp_init_module(void)
4105{
4106	int ret;
4107
4108	BUILD_BUG_ON(sizeof(struct srp_imm_buf) != 4);
4109	BUILD_BUG_ON(sizeof(struct srp_login_req) != 64);
4110	BUILD_BUG_ON(sizeof(struct srp_login_req_rdma) != 56);
4111	BUILD_BUG_ON(sizeof(struct srp_cmd) != 48);
4112
4113	if (srp_sg_tablesize) {
4114		pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4115		if (!cmd_sg_entries)
4116			cmd_sg_entries = srp_sg_tablesize;
4117	}
4118
4119	if (!cmd_sg_entries)
4120		cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4121
4122	if (cmd_sg_entries > 255) {
4123		pr_warn("Clamping cmd_sg_entries to 255\n");
4124		cmd_sg_entries = 255;
4125	}
4126
4127	if (!indirect_sg_entries)
4128		indirect_sg_entries = cmd_sg_entries;
4129	else if (indirect_sg_entries < cmd_sg_entries) {
4130		pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4131			cmd_sg_entries);
4132		indirect_sg_entries = cmd_sg_entries;
4133	}
4134
4135	if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4136		pr_warn("Clamping indirect_sg_entries to %u\n",
4137			SG_MAX_SEGMENTS);
4138		indirect_sg_entries = SG_MAX_SEGMENTS;
4139	}
4140
4141	srp_remove_wq = create_workqueue("srp_remove");
4142	if (!srp_remove_wq) {
4143		ret = -ENOMEM;
4144		goto out;
4145	}
4146
4147	ret = -ENOMEM;
4148	ib_srp_transport_template =
4149		srp_attach_transport(&ib_srp_transport_functions);
4150	if (!ib_srp_transport_template)
4151		goto destroy_wq;
4152
4153	ret = class_register(&srp_class);
4154	if (ret) {
4155		pr_err("couldn't register class infiniband_srp\n");
4156		goto release_tr;
4157	}
4158
4159	ib_sa_register_client(&srp_sa_client);
4160
4161	ret = ib_register_client(&srp_client);
4162	if (ret) {
4163		pr_err("couldn't register IB client\n");
4164		goto unreg_sa;
4165	}
4166
4167out:
4168	return ret;
4169
4170unreg_sa:
4171	ib_sa_unregister_client(&srp_sa_client);
4172	class_unregister(&srp_class);
4173
4174release_tr:
4175	srp_release_transport(ib_srp_transport_template);
4176
4177destroy_wq:
4178	destroy_workqueue(srp_remove_wq);
4179	goto out;
4180}
4181
4182static void __exit srp_cleanup_module(void)
4183{
4184	ib_unregister_client(&srp_client);
4185	ib_sa_unregister_client(&srp_sa_client);
4186	class_unregister(&srp_class);
4187	srp_release_transport(ib_srp_transport_template);
4188	destroy_workqueue(srp_remove_wq);
4189}
4190
4191module_init(srp_init_module);
4192module_exit(srp_cleanup_module);
4193