1/*
2 * Copyright (c) 2009-2014 Chelsio, Inc. All rights reserved.
3 *
4 * This software is available to you under a choice of one of two
5 * licenses.  You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
9 *
10 *     Redistribution and use in source and binary forms, with or
11 *     without modification, are permitted provided that the following
12 *     conditions are met:
13 *
14 *      - Redistributions of source code must retain the above
15 *	  copyright notice, this list of conditions and the following
16 *	  disclaimer.
17 *
18 *      - Redistributions in binary form must reproduce the above
19 *	  copyright notice, this list of conditions and the following
20 *	  disclaimer in the documentation and/or other materials
21 *	  provided with the distribution.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30 * SOFTWARE.
31 */
32#include <linux/module.h>
33#include <linux/list.h>
34#include <linux/workqueue.h>
35#include <linux/skbuff.h>
36#include <linux/timer.h>
37#include <linux/notifier.h>
38#include <linux/inetdevice.h>
39#include <linux/ip.h>
40#include <linux/tcp.h>
41#include <linux/if_vlan.h>
42
43#include <net/neighbour.h>
44#include <net/netevent.h>
45#include <net/route.h>
46#include <net/tcp.h>
47#include <net/ip6_route.h>
48#include <net/addrconf.h>
49
50#include <rdma/ib_addr.h>
51
52#include <libcxgb_cm.h>
53#include "iw_cxgb4.h"
54#include "clip_tbl.h"
55
56static char *states[] = {
57	"idle",
58	"listen",
59	"connecting",
60	"mpa_wait_req",
61	"mpa_req_sent",
62	"mpa_req_rcvd",
63	"mpa_rep_sent",
64	"fpdu_mode",
65	"aborting",
66	"closing",
67	"moribund",
68	"dead",
69	NULL,
70};
71
72static int nocong;
73module_param(nocong, int, 0644);
74MODULE_PARM_DESC(nocong, "Turn of congestion control (default=0)");
75
76static int enable_ecn;
77module_param(enable_ecn, int, 0644);
78MODULE_PARM_DESC(enable_ecn, "Enable ECN (default=0/disabled)");
79
80static int dack_mode;
81module_param(dack_mode, int, 0644);
82MODULE_PARM_DESC(dack_mode, "Delayed ack mode (default=0)");
83
84uint c4iw_max_read_depth = 32;
85module_param(c4iw_max_read_depth, int, 0644);
86MODULE_PARM_DESC(c4iw_max_read_depth,
87		 "Per-connection max ORD/IRD (default=32)");
88
89static int enable_tcp_timestamps;
90module_param(enable_tcp_timestamps, int, 0644);
91MODULE_PARM_DESC(enable_tcp_timestamps, "Enable tcp timestamps (default=0)");
92
93static int enable_tcp_sack;
94module_param(enable_tcp_sack, int, 0644);
95MODULE_PARM_DESC(enable_tcp_sack, "Enable tcp SACK (default=0)");
96
97static int enable_tcp_window_scaling = 1;
98module_param(enable_tcp_window_scaling, int, 0644);
99MODULE_PARM_DESC(enable_tcp_window_scaling,
100		 "Enable tcp window scaling (default=1)");
101
102static int peer2peer = 1;
103module_param(peer2peer, int, 0644);
104MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=1)");
105
106static int p2p_type = FW_RI_INIT_P2PTYPE_READ_REQ;
107module_param(p2p_type, int, 0644);
108MODULE_PARM_DESC(p2p_type, "RDMAP opcode to use for the RTR message: "
109			   "1=RDMA_READ 0=RDMA_WRITE (default 1)");
110
111static int ep_timeout_secs = 60;
112module_param(ep_timeout_secs, int, 0644);
113MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
114				   "in seconds (default=60)");
115
116static int mpa_rev = 2;
117module_param(mpa_rev, int, 0644);
118MODULE_PARM_DESC(mpa_rev, "MPA Revision, 0 supports amso1100, "
119		"1 is RFC5044 spec compliant, 2 is IETF MPA Peer Connect Draft"
120		" compliant (default=2)");
121
122static int markers_enabled;
123module_param(markers_enabled, int, 0644);
124MODULE_PARM_DESC(markers_enabled, "Enable MPA MARKERS (default(0)=disabled)");
125
126static int crc_enabled = 1;
127module_param(crc_enabled, int, 0644);
128MODULE_PARM_DESC(crc_enabled, "Enable MPA CRC (default(1)=enabled)");
129
130static int rcv_win = 256 * 1024;
131module_param(rcv_win, int, 0644);
132MODULE_PARM_DESC(rcv_win, "TCP receive window in bytes (default=256KB)");
133
134static int snd_win = 128 * 1024;
135module_param(snd_win, int, 0644);
136MODULE_PARM_DESC(snd_win, "TCP send window in bytes (default=128KB)");
137
138static struct workqueue_struct *workq;
139
140static struct sk_buff_head rxq;
141
142static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp);
143static void ep_timeout(struct timer_list *t);
144static void connect_reply_upcall(struct c4iw_ep *ep, int status);
145static int sched(struct c4iw_dev *dev, struct sk_buff *skb);
146
147static LIST_HEAD(timeout_list);
148static spinlock_t timeout_lock;
149
150static void deref_cm_id(struct c4iw_ep_common *epc)
151{
152	epc->cm_id->rem_ref(epc->cm_id);
153	epc->cm_id = NULL;
154	set_bit(CM_ID_DEREFED, &epc->history);
155}
156
157static void ref_cm_id(struct c4iw_ep_common *epc)
158{
159	set_bit(CM_ID_REFED, &epc->history);
160	epc->cm_id->add_ref(epc->cm_id);
161}
162
163static void deref_qp(struct c4iw_ep *ep)
164{
165	c4iw_qp_rem_ref(&ep->com.qp->ibqp);
166	clear_bit(QP_REFERENCED, &ep->com.flags);
167	set_bit(QP_DEREFED, &ep->com.history);
168}
169
170static void ref_qp(struct c4iw_ep *ep)
171{
172	set_bit(QP_REFERENCED, &ep->com.flags);
173	set_bit(QP_REFED, &ep->com.history);
174	c4iw_qp_add_ref(&ep->com.qp->ibqp);
175}
176
177static void start_ep_timer(struct c4iw_ep *ep)
178{
179	pr_debug("ep %p\n", ep);
180	if (timer_pending(&ep->timer)) {
181		pr_err("%s timer already started! ep %p\n",
182		       __func__, ep);
183		return;
184	}
185	clear_bit(TIMEOUT, &ep->com.flags);
186	c4iw_get_ep(&ep->com);
187	ep->timer.expires = jiffies + ep_timeout_secs * HZ;
188	add_timer(&ep->timer);
189}
190
191static int stop_ep_timer(struct c4iw_ep *ep)
192{
193	pr_debug("ep %p stopping\n", ep);
194	del_timer_sync(&ep->timer);
195	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
196		c4iw_put_ep(&ep->com);
197		return 0;
198	}
199	return 1;
200}
201
202static int c4iw_l2t_send(struct c4iw_rdev *rdev, struct sk_buff *skb,
203		  struct l2t_entry *l2e)
204{
205	int	error = 0;
206
207	if (c4iw_fatal_error(rdev)) {
208		kfree_skb(skb);
209		pr_err("%s - device in error state - dropping\n", __func__);
210		return -EIO;
211	}
212	error = cxgb4_l2t_send(rdev->lldi.ports[0], skb, l2e);
213	if (error < 0)
214		kfree_skb(skb);
215	else if (error == NET_XMIT_DROP)
216		return -ENOMEM;
217	return error < 0 ? error : 0;
218}
219
220int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb)
221{
222	int	error = 0;
223
224	if (c4iw_fatal_error(rdev)) {
225		kfree_skb(skb);
226		pr_err("%s - device in error state - dropping\n", __func__);
227		return -EIO;
228	}
229	error = cxgb4_ofld_send(rdev->lldi.ports[0], skb);
230	if (error < 0)
231		kfree_skb(skb);
232	return error < 0 ? error : 0;
233}
234
235static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb)
236{
237	u32 len = roundup(sizeof(struct cpl_tid_release), 16);
238
239	skb = get_skb(skb, len, GFP_KERNEL);
240	if (!skb)
241		return;
242
243	cxgb_mk_tid_release(skb, len, hwtid, 0);
244	c4iw_ofld_send(rdev, skb);
245	return;
246}
247
248static void set_emss(struct c4iw_ep *ep, u16 opt)
249{
250	ep->emss = ep->com.dev->rdev.lldi.mtus[TCPOPT_MSS_G(opt)] -
251		   ((AF_INET == ep->com.remote_addr.ss_family) ?
252		    sizeof(struct iphdr) : sizeof(struct ipv6hdr)) -
253		   sizeof(struct tcphdr);
254	ep->mss = ep->emss;
255	if (TCPOPT_TSTAMP_G(opt))
256		ep->emss -= round_up(TCPOLEN_TIMESTAMP, 4);
257	if (ep->emss < 128)
258		ep->emss = 128;
259	if (ep->emss & 7)
260		pr_debug("Warning: misaligned mtu idx %u mss %u emss=%u\n",
261			 TCPOPT_MSS_G(opt), ep->mss, ep->emss);
262	pr_debug("mss_idx %u mss %u emss=%u\n", TCPOPT_MSS_G(opt), ep->mss,
263		 ep->emss);
264}
265
266static enum c4iw_ep_state state_read(struct c4iw_ep_common *epc)
267{
268	enum c4iw_ep_state state;
269
270	mutex_lock(&epc->mutex);
271	state = epc->state;
272	mutex_unlock(&epc->mutex);
273	return state;
274}
275
276static void __state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
277{
278	epc->state = new;
279}
280
281static void state_set(struct c4iw_ep_common *epc, enum c4iw_ep_state new)
282{
283	mutex_lock(&epc->mutex);
284	pr_debug("%s -> %s\n", states[epc->state], states[new]);
285	__state_set(epc, new);
286	mutex_unlock(&epc->mutex);
287	return;
288}
289
290static int alloc_ep_skb_list(struct sk_buff_head *ep_skb_list, int size)
291{
292	struct sk_buff *skb;
293	unsigned int i;
294	size_t len;
295
296	len = roundup(sizeof(union cpl_wr_size), 16);
297	for (i = 0; i < size; i++) {
298		skb = alloc_skb(len, GFP_KERNEL);
299		if (!skb)
300			goto fail;
301		skb_queue_tail(ep_skb_list, skb);
302	}
303	return 0;
304fail:
305	skb_queue_purge(ep_skb_list);
306	return -ENOMEM;
307}
308
309static void *alloc_ep(int size, gfp_t gfp)
310{
311	struct c4iw_ep_common *epc;
312
313	epc = kzalloc(size, gfp);
314	if (epc) {
315		epc->wr_waitp = c4iw_alloc_wr_wait(gfp);
316		if (!epc->wr_waitp) {
317			kfree(epc);
318			epc = NULL;
319			goto out;
320		}
321		kref_init(&epc->kref);
322		mutex_init(&epc->mutex);
323		c4iw_init_wr_wait(epc->wr_waitp);
324	}
325	pr_debug("alloc ep %p\n", epc);
326out:
327	return epc;
328}
329
330static void remove_ep_tid(struct c4iw_ep *ep)
331{
332	unsigned long flags;
333
334	xa_lock_irqsave(&ep->com.dev->hwtids, flags);
335	__xa_erase(&ep->com.dev->hwtids, ep->hwtid);
336	if (xa_empty(&ep->com.dev->hwtids))
337		wake_up(&ep->com.dev->wait);
338	xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
339}
340
341static int insert_ep_tid(struct c4iw_ep *ep)
342{
343	unsigned long flags;
344	int err;
345
346	xa_lock_irqsave(&ep->com.dev->hwtids, flags);
347	err = __xa_insert(&ep->com.dev->hwtids, ep->hwtid, ep, GFP_KERNEL);
348	xa_unlock_irqrestore(&ep->com.dev->hwtids, flags);
349
350	return err;
351}
352
353/*
354 * Atomically lookup the ep ptr given the tid and grab a reference on the ep.
355 */
356static struct c4iw_ep *get_ep_from_tid(struct c4iw_dev *dev, unsigned int tid)
357{
358	struct c4iw_ep *ep;
359	unsigned long flags;
360
361	xa_lock_irqsave(&dev->hwtids, flags);
362	ep = xa_load(&dev->hwtids, tid);
363	if (ep)
364		c4iw_get_ep(&ep->com);
365	xa_unlock_irqrestore(&dev->hwtids, flags);
366	return ep;
367}
368
369/*
370 * Atomically lookup the ep ptr given the stid and grab a reference on the ep.
371 */
372static struct c4iw_listen_ep *get_ep_from_stid(struct c4iw_dev *dev,
373					       unsigned int stid)
374{
375	struct c4iw_listen_ep *ep;
376	unsigned long flags;
377
378	xa_lock_irqsave(&dev->stids, flags);
379	ep = xa_load(&dev->stids, stid);
380	if (ep)
381		c4iw_get_ep(&ep->com);
382	xa_unlock_irqrestore(&dev->stids, flags);
383	return ep;
384}
385
386void _c4iw_free_ep(struct kref *kref)
387{
388	struct c4iw_ep *ep;
389
390	ep = container_of(kref, struct c4iw_ep, com.kref);
391	pr_debug("ep %p state %s\n", ep, states[ep->com.state]);
392	if (test_bit(QP_REFERENCED, &ep->com.flags))
393		deref_qp(ep);
394	if (test_bit(RELEASE_RESOURCES, &ep->com.flags)) {
395		if (ep->com.remote_addr.ss_family == AF_INET6) {
396			struct sockaddr_in6 *sin6 =
397					(struct sockaddr_in6 *)
398					&ep->com.local_addr;
399
400			cxgb4_clip_release(
401					ep->com.dev->rdev.lldi.ports[0],
402					(const u32 *)&sin6->sin6_addr.s6_addr,
403					1);
404		}
405		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
406				 ep->com.local_addr.ss_family);
407		dst_release(ep->dst);
408		cxgb4_l2t_release(ep->l2t);
409		kfree_skb(ep->mpa_skb);
410	}
411	if (!skb_queue_empty(&ep->com.ep_skb_list))
412		skb_queue_purge(&ep->com.ep_skb_list);
413	c4iw_put_wr_wait(ep->com.wr_waitp);
414	kfree(ep);
415}
416
417static void release_ep_resources(struct c4iw_ep *ep)
418{
419	set_bit(RELEASE_RESOURCES, &ep->com.flags);
420
421	/*
422	 * If we have a hwtid, then remove it from the idr table
423	 * so lookups will no longer find this endpoint.  Otherwise
424	 * we have a race where one thread finds the ep ptr just
425	 * before the other thread is freeing the ep memory.
426	 */
427	if (ep->hwtid != -1)
428		remove_ep_tid(ep);
429	c4iw_put_ep(&ep->com);
430}
431
432static int status2errno(int status)
433{
434	switch (status) {
435	case CPL_ERR_NONE:
436		return 0;
437	case CPL_ERR_CONN_RESET:
438		return -ECONNRESET;
439	case CPL_ERR_ARP_MISS:
440		return -EHOSTUNREACH;
441	case CPL_ERR_CONN_TIMEDOUT:
442		return -ETIMEDOUT;
443	case CPL_ERR_TCAM_FULL:
444		return -ENOMEM;
445	case CPL_ERR_CONN_EXIST:
446		return -EADDRINUSE;
447	default:
448		return -EIO;
449	}
450}
451
452/*
453 * Try and reuse skbs already allocated...
454 */
455static struct sk_buff *get_skb(struct sk_buff *skb, int len, gfp_t gfp)
456{
457	if (skb && !skb_is_nonlinear(skb) && !skb_cloned(skb)) {
458		skb_trim(skb, 0);
459		skb_get(skb);
460		skb_reset_transport_header(skb);
461	} else {
462		skb = alloc_skb(len, gfp);
463		if (!skb)
464			return NULL;
465	}
466	t4_set_arp_err_handler(skb, NULL, NULL);
467	return skb;
468}
469
470static struct net_device *get_real_dev(struct net_device *egress_dev)
471{
472	return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev;
473}
474
475static void arp_failure_discard(void *handle, struct sk_buff *skb)
476{
477	pr_err("ARP failure\n");
478	kfree_skb(skb);
479}
480
481static void mpa_start_arp_failure(void *handle, struct sk_buff *skb)
482{
483	pr_err("ARP failure during MPA Negotiation - Closing Connection\n");
484}
485
486enum {
487	NUM_FAKE_CPLS = 2,
488	FAKE_CPL_PUT_EP_SAFE = NUM_CPL_CMDS + 0,
489	FAKE_CPL_PASS_PUT_EP_SAFE = NUM_CPL_CMDS + 1,
490};
491
492static int _put_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
493{
494	struct c4iw_ep *ep;
495
496	ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
497	release_ep_resources(ep);
498	return 0;
499}
500
501static int _put_pass_ep_safe(struct c4iw_dev *dev, struct sk_buff *skb)
502{
503	struct c4iw_ep *ep;
504
505	ep = *((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *)));
506	c4iw_put_ep(&ep->parent_ep->com);
507	release_ep_resources(ep);
508	return 0;
509}
510
511/*
512 * Fake up a special CPL opcode and call sched() so process_work() will call
513 * _put_ep_safe() in a safe context to free the ep resources.  This is needed
514 * because ARP error handlers are called in an ATOMIC context, and
515 * _c4iw_free_ep() needs to block.
516 */
517static void queue_arp_failure_cpl(struct c4iw_ep *ep, struct sk_buff *skb,
518				  int cpl)
519{
520	struct cpl_act_establish *rpl = cplhdr(skb);
521
522	/* Set our special ARP_FAILURE opcode */
523	rpl->ot.opcode = cpl;
524
525	/*
526	 * Save ep in the skb->cb area, after where sched() will save the dev
527	 * ptr.
528	 */
529	*((struct c4iw_ep **)(skb->cb + 2 * sizeof(void *))) = ep;
530	sched(ep->com.dev, skb);
531}
532
533/* Handle an ARP failure for an accept */
534static void pass_accept_rpl_arp_failure(void *handle, struct sk_buff *skb)
535{
536	struct c4iw_ep *ep = handle;
537
538	pr_err("ARP failure during accept - tid %u - dropping connection\n",
539	       ep->hwtid);
540
541	__state_set(&ep->com, DEAD);
542	queue_arp_failure_cpl(ep, skb, FAKE_CPL_PASS_PUT_EP_SAFE);
543}
544
545/*
546 * Handle an ARP failure for an active open.
547 */
548static void act_open_req_arp_failure(void *handle, struct sk_buff *skb)
549{
550	struct c4iw_ep *ep = handle;
551
552	pr_err("ARP failure during connect\n");
553	connect_reply_upcall(ep, -EHOSTUNREACH);
554	__state_set(&ep->com, DEAD);
555	if (ep->com.remote_addr.ss_family == AF_INET6) {
556		struct sockaddr_in6 *sin6 =
557			(struct sockaddr_in6 *)&ep->com.local_addr;
558		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
559				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
560	}
561	xa_erase_irq(&ep->com.dev->atids, ep->atid);
562	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
563	queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
564}
565
566/*
567 * Handle an ARP failure for a CPL_ABORT_REQ.  Change it into a no RST variant
568 * and send it along.
569 */
570static void abort_arp_failure(void *handle, struct sk_buff *skb)
571{
572	int ret;
573	struct c4iw_ep *ep = handle;
574	struct c4iw_rdev *rdev = &ep->com.dev->rdev;
575	struct cpl_abort_req *req = cplhdr(skb);
576
577	pr_debug("rdev %p\n", rdev);
578	req->cmd = CPL_ABORT_NO_RST;
579	skb_get(skb);
580	ret = c4iw_ofld_send(rdev, skb);
581	if (ret) {
582		__state_set(&ep->com, DEAD);
583		queue_arp_failure_cpl(ep, skb, FAKE_CPL_PUT_EP_SAFE);
584	} else
585		kfree_skb(skb);
586}
587
588static int send_flowc(struct c4iw_ep *ep)
589{
590	struct fw_flowc_wr *flowc;
591	struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
592	u16 vlan = ep->l2t->vlan;
593	int nparams;
594	int flowclen, flowclen16;
595
596	if (WARN_ON(!skb))
597		return -ENOMEM;
598
599	if (vlan == CPL_L2T_VLAN_NONE)
600		nparams = 9;
601	else
602		nparams = 10;
603
604	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
605	flowclen16 = DIV_ROUND_UP(flowclen, 16);
606	flowclen = flowclen16 * 16;
607
608	flowc = __skb_put(skb, flowclen);
609	memset(flowc, 0, flowclen);
610
611	flowc->op_to_nparams = cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
612					   FW_FLOWC_WR_NPARAMS_V(nparams));
613	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
614					  FW_WR_FLOWID_V(ep->hwtid));
615
616	flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_PFNVFN;
617	flowc->mnemval[0].val = cpu_to_be32(FW_PFVF_CMD_PFN_V
618					    (ep->com.dev->rdev.lldi.pf));
619	flowc->mnemval[1].mnemonic = FW_FLOWC_MNEM_CH;
620	flowc->mnemval[1].val = cpu_to_be32(ep->tx_chan);
621	flowc->mnemval[2].mnemonic = FW_FLOWC_MNEM_PORT;
622	flowc->mnemval[2].val = cpu_to_be32(ep->tx_chan);
623	flowc->mnemval[3].mnemonic = FW_FLOWC_MNEM_IQID;
624	flowc->mnemval[3].val = cpu_to_be32(ep->rss_qid);
625	flowc->mnemval[4].mnemonic = FW_FLOWC_MNEM_SNDNXT;
626	flowc->mnemval[4].val = cpu_to_be32(ep->snd_seq);
627	flowc->mnemval[5].mnemonic = FW_FLOWC_MNEM_RCVNXT;
628	flowc->mnemval[5].val = cpu_to_be32(ep->rcv_seq);
629	flowc->mnemval[6].mnemonic = FW_FLOWC_MNEM_SNDBUF;
630	flowc->mnemval[6].val = cpu_to_be32(ep->snd_win);
631	flowc->mnemval[7].mnemonic = FW_FLOWC_MNEM_MSS;
632	flowc->mnemval[7].val = cpu_to_be32(ep->emss);
633	flowc->mnemval[8].mnemonic = FW_FLOWC_MNEM_RCV_SCALE;
634	flowc->mnemval[8].val = cpu_to_be32(ep->snd_wscale);
635	if (nparams == 10) {
636		u16 pri;
637		pri = (vlan & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
638		flowc->mnemval[9].mnemonic = FW_FLOWC_MNEM_SCHEDCLASS;
639		flowc->mnemval[9].val = cpu_to_be32(pri);
640	}
641
642	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
643	return c4iw_ofld_send(&ep->com.dev->rdev, skb);
644}
645
646static int send_halfclose(struct c4iw_ep *ep)
647{
648	struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list);
649	u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16);
650
651	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
652	if (WARN_ON(!skb))
653		return -ENOMEM;
654
655	cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx,
656			      NULL, arp_failure_discard);
657
658	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
659}
660
661static void read_tcb(struct c4iw_ep *ep)
662{
663	struct sk_buff *skb;
664	struct cpl_get_tcb *req;
665	int wrlen = roundup(sizeof(*req), 16);
666
667	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
668	if (WARN_ON(!skb))
669		return;
670
671	set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
672	req = (struct cpl_get_tcb *) skb_put(skb, wrlen);
673	memset(req, 0, wrlen);
674	INIT_TP_WR(req, ep->hwtid);
675	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_GET_TCB, ep->hwtid));
676	req->reply_ctrl = htons(REPLY_CHAN_V(0) | QUEUENO_V(ep->rss_qid));
677
678	/*
679	 * keep a ref on the ep so the tcb is not unlocked before this
680	 * cpl completes. The ref is released in read_tcb_rpl().
681	 */
682	c4iw_get_ep(&ep->com);
683	if (WARN_ON(c4iw_ofld_send(&ep->com.dev->rdev, skb)))
684		c4iw_put_ep(&ep->com);
685}
686
687static int send_abort_req(struct c4iw_ep *ep)
688{
689	u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16);
690	struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list);
691
692	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
693	if (WARN_ON(!req_skb))
694		return -ENOMEM;
695
696	cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx,
697			  ep, abort_arp_failure);
698
699	return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t);
700}
701
702static int send_abort(struct c4iw_ep *ep)
703{
704	if (!ep->com.qp || !ep->com.qp->srq) {
705		send_abort_req(ep);
706		return 0;
707	}
708	set_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags);
709	read_tcb(ep);
710	return 0;
711}
712
713static int send_connect(struct c4iw_ep *ep)
714{
715	struct cpl_act_open_req *req = NULL;
716	struct cpl_t5_act_open_req *t5req = NULL;
717	struct cpl_t6_act_open_req *t6req = NULL;
718	struct cpl_act_open_req6 *req6 = NULL;
719	struct cpl_t5_act_open_req6 *t5req6 = NULL;
720	struct cpl_t6_act_open_req6 *t6req6 = NULL;
721	struct sk_buff *skb;
722	u64 opt0;
723	u32 opt2;
724	unsigned int mtu_idx;
725	u32 wscale;
726	int win, sizev4, sizev6, wrlen;
727	struct sockaddr_in *la = (struct sockaddr_in *)
728				 &ep->com.local_addr;
729	struct sockaddr_in *ra = (struct sockaddr_in *)
730				 &ep->com.remote_addr;
731	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)
732				   &ep->com.local_addr;
733	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)
734				   &ep->com.remote_addr;
735	int ret;
736	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
737	u32 isn = (prandom_u32() & ~7UL) - 1;
738	struct net_device *netdev;
739	u64 params;
740
741	netdev = ep->com.dev->rdev.lldi.ports[0];
742
743	switch (CHELSIO_CHIP_VERSION(adapter_type)) {
744	case CHELSIO_T4:
745		sizev4 = sizeof(struct cpl_act_open_req);
746		sizev6 = sizeof(struct cpl_act_open_req6);
747		break;
748	case CHELSIO_T5:
749		sizev4 = sizeof(struct cpl_t5_act_open_req);
750		sizev6 = sizeof(struct cpl_t5_act_open_req6);
751		break;
752	case CHELSIO_T6:
753		sizev4 = sizeof(struct cpl_t6_act_open_req);
754		sizev6 = sizeof(struct cpl_t6_act_open_req6);
755		break;
756	default:
757		pr_err("T%d Chip is not supported\n",
758		       CHELSIO_CHIP_VERSION(adapter_type));
759		return -EINVAL;
760	}
761
762	wrlen = (ep->com.remote_addr.ss_family == AF_INET) ?
763			roundup(sizev4, 16) :
764			roundup(sizev6, 16);
765
766	pr_debug("ep %p atid %u\n", ep, ep->atid);
767
768	skb = get_skb(NULL, wrlen, GFP_KERNEL);
769	if (!skb) {
770		pr_err("%s - failed to alloc skb\n", __func__);
771		return -ENOMEM;
772	}
773	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
774
775	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
776		      enable_tcp_timestamps,
777		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
778	wscale = cxgb_compute_wscale(rcv_win);
779
780	/*
781	 * Specify the largest window that will fit in opt0. The
782	 * remainder will be specified in the rx_data_ack.
783	 */
784	win = ep->rcv_win >> 10;
785	if (win > RCV_BUFSIZ_M)
786		win = RCV_BUFSIZ_M;
787
788	opt0 = (nocong ? NO_CONG_F : 0) |
789	       KEEP_ALIVE_F |
790	       DELACK_F |
791	       WND_SCALE_V(wscale) |
792	       MSS_IDX_V(mtu_idx) |
793	       L2T_IDX_V(ep->l2t->idx) |
794	       TX_CHAN_V(ep->tx_chan) |
795	       SMAC_SEL_V(ep->smac_idx) |
796	       DSCP_V(ep->tos >> 2) |
797	       ULP_MODE_V(ULP_MODE_TCPDDP) |
798	       RCV_BUFSIZ_V(win);
799	opt2 = RX_CHANNEL_V(0) |
800	       CCTRL_ECN_V(enable_ecn) |
801	       RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
802	if (enable_tcp_timestamps)
803		opt2 |= TSTAMPS_EN_F;
804	if (enable_tcp_sack)
805		opt2 |= SACK_EN_F;
806	if (wscale && enable_tcp_window_scaling)
807		opt2 |= WND_SCALE_EN_F;
808	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
809		if (peer2peer)
810			isn += 4;
811
812		opt2 |= T5_OPT_2_VALID_F;
813		opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
814		opt2 |= T5_ISS_F;
815	}
816
817	params = cxgb4_select_ntuple(netdev, ep->l2t);
818
819	if (ep->com.remote_addr.ss_family == AF_INET6)
820		cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
821			       (const u32 *)&la6->sin6_addr.s6_addr, 1);
822
823	t4_set_arp_err_handler(skb, ep, act_open_req_arp_failure);
824
825	if (ep->com.remote_addr.ss_family == AF_INET) {
826		switch (CHELSIO_CHIP_VERSION(adapter_type)) {
827		case CHELSIO_T4:
828			req = skb_put(skb, wrlen);
829			INIT_TP_WR(req, 0);
830			break;
831		case CHELSIO_T5:
832			t5req = skb_put(skb, wrlen);
833			INIT_TP_WR(t5req, 0);
834			req = (struct cpl_act_open_req *)t5req;
835			break;
836		case CHELSIO_T6:
837			t6req = skb_put(skb, wrlen);
838			INIT_TP_WR(t6req, 0);
839			req = (struct cpl_act_open_req *)t6req;
840			t5req = (struct cpl_t5_act_open_req *)t6req;
841			break;
842		default:
843			pr_err("T%d Chip is not supported\n",
844			       CHELSIO_CHIP_VERSION(adapter_type));
845			ret = -EINVAL;
846			goto clip_release;
847		}
848
849		OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ,
850					((ep->rss_qid<<14) | ep->atid)));
851		req->local_port = la->sin_port;
852		req->peer_port = ra->sin_port;
853		req->local_ip = la->sin_addr.s_addr;
854		req->peer_ip = ra->sin_addr.s_addr;
855		req->opt0 = cpu_to_be64(opt0);
856
857		if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
858			req->params = cpu_to_be32(params);
859			req->opt2 = cpu_to_be32(opt2);
860		} else {
861			if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
862				t5req->params =
863					  cpu_to_be64(FILTER_TUPLE_V(params));
864				t5req->rsvd = cpu_to_be32(isn);
865				pr_debug("snd_isn %u\n", t5req->rsvd);
866				t5req->opt2 = cpu_to_be32(opt2);
867			} else {
868				t6req->params =
869					  cpu_to_be64(FILTER_TUPLE_V(params));
870				t6req->rsvd = cpu_to_be32(isn);
871				pr_debug("snd_isn %u\n", t6req->rsvd);
872				t6req->opt2 = cpu_to_be32(opt2);
873			}
874		}
875	} else {
876		switch (CHELSIO_CHIP_VERSION(adapter_type)) {
877		case CHELSIO_T4:
878			req6 = skb_put(skb, wrlen);
879			INIT_TP_WR(req6, 0);
880			break;
881		case CHELSIO_T5:
882			t5req6 = skb_put(skb, wrlen);
883			INIT_TP_WR(t5req6, 0);
884			req6 = (struct cpl_act_open_req6 *)t5req6;
885			break;
886		case CHELSIO_T6:
887			t6req6 = skb_put(skb, wrlen);
888			INIT_TP_WR(t6req6, 0);
889			req6 = (struct cpl_act_open_req6 *)t6req6;
890			t5req6 = (struct cpl_t5_act_open_req6 *)t6req6;
891			break;
892		default:
893			pr_err("T%d Chip is not supported\n",
894			       CHELSIO_CHIP_VERSION(adapter_type));
895			ret = -EINVAL;
896			goto clip_release;
897		}
898
899		OPCODE_TID(req6) = cpu_to_be32(MK_OPCODE_TID(CPL_ACT_OPEN_REQ6,
900					((ep->rss_qid<<14)|ep->atid)));
901		req6->local_port = la6->sin6_port;
902		req6->peer_port = ra6->sin6_port;
903		req6->local_ip_hi = *((__be64 *)(la6->sin6_addr.s6_addr));
904		req6->local_ip_lo = *((__be64 *)(la6->sin6_addr.s6_addr + 8));
905		req6->peer_ip_hi = *((__be64 *)(ra6->sin6_addr.s6_addr));
906		req6->peer_ip_lo = *((__be64 *)(ra6->sin6_addr.s6_addr + 8));
907		req6->opt0 = cpu_to_be64(opt0);
908
909		if (is_t4(ep->com.dev->rdev.lldi.adapter_type)) {
910			req6->params = cpu_to_be32(cxgb4_select_ntuple(netdev,
911								      ep->l2t));
912			req6->opt2 = cpu_to_be32(opt2);
913		} else {
914			if (is_t5(ep->com.dev->rdev.lldi.adapter_type)) {
915				t5req6->params =
916					    cpu_to_be64(FILTER_TUPLE_V(params));
917				t5req6->rsvd = cpu_to_be32(isn);
918				pr_debug("snd_isn %u\n", t5req6->rsvd);
919				t5req6->opt2 = cpu_to_be32(opt2);
920			} else {
921				t6req6->params =
922					    cpu_to_be64(FILTER_TUPLE_V(params));
923				t6req6->rsvd = cpu_to_be32(isn);
924				pr_debug("snd_isn %u\n", t6req6->rsvd);
925				t6req6->opt2 = cpu_to_be32(opt2);
926			}
927
928		}
929	}
930
931	set_bit(ACT_OPEN_REQ, &ep->com.history);
932	ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
933clip_release:
934	if (ret && ep->com.remote_addr.ss_family == AF_INET6)
935		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
936				   (const u32 *)&la6->sin6_addr.s6_addr, 1);
937	return ret;
938}
939
940static int send_mpa_req(struct c4iw_ep *ep, struct sk_buff *skb,
941			u8 mpa_rev_to_use)
942{
943	int mpalen, wrlen, ret;
944	struct fw_ofld_tx_data_wr *req;
945	struct mpa_message *mpa;
946	struct mpa_v2_conn_params mpa_v2_params;
947
948	pr_debug("ep %p tid %u pd_len %d\n",
949		 ep, ep->hwtid, ep->plen);
950
951	mpalen = sizeof(*mpa) + ep->plen;
952	if (mpa_rev_to_use == 2)
953		mpalen += sizeof(struct mpa_v2_conn_params);
954	wrlen = roundup(mpalen + sizeof(*req), 16);
955	skb = get_skb(skb, wrlen, GFP_KERNEL);
956	if (!skb) {
957		connect_reply_upcall(ep, -ENOMEM);
958		return -ENOMEM;
959	}
960	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
961
962	req = skb_put_zero(skb, wrlen);
963	req->op_to_immdlen = cpu_to_be32(
964		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
965		FW_WR_COMPL_F |
966		FW_WR_IMMDLEN_V(mpalen));
967	req->flowid_len16 = cpu_to_be32(
968		FW_WR_FLOWID_V(ep->hwtid) |
969		FW_WR_LEN16_V(wrlen >> 4));
970	req->plen = cpu_to_be32(mpalen);
971	req->tunnel_to_proxy = cpu_to_be32(
972		FW_OFLD_TX_DATA_WR_FLUSH_F |
973		FW_OFLD_TX_DATA_WR_SHOVE_F);
974
975	mpa = (struct mpa_message *)(req + 1);
976	memcpy(mpa->key, MPA_KEY_REQ, sizeof(mpa->key));
977
978	mpa->flags = 0;
979	if (crc_enabled)
980		mpa->flags |= MPA_CRC;
981	if (markers_enabled) {
982		mpa->flags |= MPA_MARKERS;
983		ep->mpa_attr.recv_marker_enabled = 1;
984	} else {
985		ep->mpa_attr.recv_marker_enabled = 0;
986	}
987	if (mpa_rev_to_use == 2)
988		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
989
990	mpa->private_data_size = htons(ep->plen);
991	mpa->revision = mpa_rev_to_use;
992	if (mpa_rev_to_use == 1) {
993		ep->tried_with_mpa_v1 = 1;
994		ep->retry_with_mpa_v1 = 0;
995	}
996
997	if (mpa_rev_to_use == 2) {
998		mpa->private_data_size =
999			htons(ntohs(mpa->private_data_size) +
1000			      sizeof(struct mpa_v2_conn_params));
1001		pr_debug("initiator ird %u ord %u\n", ep->ird,
1002			 ep->ord);
1003		mpa_v2_params.ird = htons((u16)ep->ird);
1004		mpa_v2_params.ord = htons((u16)ep->ord);
1005
1006		if (peer2peer) {
1007			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1008			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1009				mpa_v2_params.ord |=
1010					htons(MPA_V2_RDMA_WRITE_RTR);
1011			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1012				mpa_v2_params.ord |=
1013					htons(MPA_V2_RDMA_READ_RTR);
1014		}
1015		memcpy(mpa->private_data, &mpa_v2_params,
1016		       sizeof(struct mpa_v2_conn_params));
1017
1018		if (ep->plen)
1019			memcpy(mpa->private_data +
1020			       sizeof(struct mpa_v2_conn_params),
1021			       ep->mpa_pkt + sizeof(*mpa), ep->plen);
1022	} else
1023		if (ep->plen)
1024			memcpy(mpa->private_data,
1025					ep->mpa_pkt + sizeof(*mpa), ep->plen);
1026
1027	/*
1028	 * Reference the mpa skb.  This ensures the data area
1029	 * will remain in memory until the hw acks the tx.
1030	 * Function fw4_ack() will deref it.
1031	 */
1032	skb_get(skb);
1033	t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
1034	ep->mpa_skb = skb;
1035	ret = c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1036	if (ret)
1037		return ret;
1038	start_ep_timer(ep);
1039	__state_set(&ep->com, MPA_REQ_SENT);
1040	ep->mpa_attr.initiator = 1;
1041	ep->snd_seq += mpalen;
1042	return ret;
1043}
1044
1045static int send_mpa_reject(struct c4iw_ep *ep, const void *pdata, u8 plen)
1046{
1047	int mpalen, wrlen;
1048	struct fw_ofld_tx_data_wr *req;
1049	struct mpa_message *mpa;
1050	struct sk_buff *skb;
1051	struct mpa_v2_conn_params mpa_v2_params;
1052
1053	pr_debug("ep %p tid %u pd_len %d\n",
1054		 ep, ep->hwtid, ep->plen);
1055
1056	mpalen = sizeof(*mpa) + plen;
1057	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1058		mpalen += sizeof(struct mpa_v2_conn_params);
1059	wrlen = roundup(mpalen + sizeof(*req), 16);
1060
1061	skb = get_skb(NULL, wrlen, GFP_KERNEL);
1062	if (!skb) {
1063		pr_err("%s - cannot alloc skb!\n", __func__);
1064		return -ENOMEM;
1065	}
1066	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1067
1068	req = skb_put_zero(skb, wrlen);
1069	req->op_to_immdlen = cpu_to_be32(
1070		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1071		FW_WR_COMPL_F |
1072		FW_WR_IMMDLEN_V(mpalen));
1073	req->flowid_len16 = cpu_to_be32(
1074		FW_WR_FLOWID_V(ep->hwtid) |
1075		FW_WR_LEN16_V(wrlen >> 4));
1076	req->plen = cpu_to_be32(mpalen);
1077	req->tunnel_to_proxy = cpu_to_be32(
1078		FW_OFLD_TX_DATA_WR_FLUSH_F |
1079		FW_OFLD_TX_DATA_WR_SHOVE_F);
1080
1081	mpa = (struct mpa_message *)(req + 1);
1082	memset(mpa, 0, sizeof(*mpa));
1083	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1084	mpa->flags = MPA_REJECT;
1085	mpa->revision = ep->mpa_attr.version;
1086	mpa->private_data_size = htons(plen);
1087
1088	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1089		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1090		mpa->private_data_size =
1091			htons(ntohs(mpa->private_data_size) +
1092			      sizeof(struct mpa_v2_conn_params));
1093		mpa_v2_params.ird = htons(((u16)ep->ird) |
1094					  (peer2peer ? MPA_V2_PEER2PEER_MODEL :
1095					   0));
1096		mpa_v2_params.ord = htons(((u16)ep->ord) | (peer2peer ?
1097					  (p2p_type ==
1098					   FW_RI_INIT_P2PTYPE_RDMA_WRITE ?
1099					   MPA_V2_RDMA_WRITE_RTR : p2p_type ==
1100					   FW_RI_INIT_P2PTYPE_READ_REQ ?
1101					   MPA_V2_RDMA_READ_RTR : 0) : 0));
1102		memcpy(mpa->private_data, &mpa_v2_params,
1103		       sizeof(struct mpa_v2_conn_params));
1104
1105		if (ep->plen)
1106			memcpy(mpa->private_data +
1107			       sizeof(struct mpa_v2_conn_params), pdata, plen);
1108	} else
1109		if (plen)
1110			memcpy(mpa->private_data, pdata, plen);
1111
1112	/*
1113	 * Reference the mpa skb again.  This ensures the data area
1114	 * will remain in memory until the hw acks the tx.
1115	 * Function fw4_ack() will deref it.
1116	 */
1117	skb_get(skb);
1118	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1119	t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1120	ep->mpa_skb = skb;
1121	ep->snd_seq += mpalen;
1122	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1123}
1124
1125static int send_mpa_reply(struct c4iw_ep *ep, const void *pdata, u8 plen)
1126{
1127	int mpalen, wrlen;
1128	struct fw_ofld_tx_data_wr *req;
1129	struct mpa_message *mpa;
1130	struct sk_buff *skb;
1131	struct mpa_v2_conn_params mpa_v2_params;
1132
1133	pr_debug("ep %p tid %u pd_len %d\n",
1134		 ep, ep->hwtid, ep->plen);
1135
1136	mpalen = sizeof(*mpa) + plen;
1137	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn)
1138		mpalen += sizeof(struct mpa_v2_conn_params);
1139	wrlen = roundup(mpalen + sizeof(*req), 16);
1140
1141	skb = get_skb(NULL, wrlen, GFP_KERNEL);
1142	if (!skb) {
1143		pr_err("%s - cannot alloc skb!\n", __func__);
1144		return -ENOMEM;
1145	}
1146	set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx);
1147
1148	req = skb_put_zero(skb, wrlen);
1149	req->op_to_immdlen = cpu_to_be32(
1150		FW_WR_OP_V(FW_OFLD_TX_DATA_WR) |
1151		FW_WR_COMPL_F |
1152		FW_WR_IMMDLEN_V(mpalen));
1153	req->flowid_len16 = cpu_to_be32(
1154		FW_WR_FLOWID_V(ep->hwtid) |
1155		FW_WR_LEN16_V(wrlen >> 4));
1156	req->plen = cpu_to_be32(mpalen);
1157	req->tunnel_to_proxy = cpu_to_be32(
1158		FW_OFLD_TX_DATA_WR_FLUSH_F |
1159		FW_OFLD_TX_DATA_WR_SHOVE_F);
1160
1161	mpa = (struct mpa_message *)(req + 1);
1162	memset(mpa, 0, sizeof(*mpa));
1163	memcpy(mpa->key, MPA_KEY_REP, sizeof(mpa->key));
1164	mpa->flags = 0;
1165	if (ep->mpa_attr.crc_enabled)
1166		mpa->flags |= MPA_CRC;
1167	if (ep->mpa_attr.recv_marker_enabled)
1168		mpa->flags |= MPA_MARKERS;
1169	mpa->revision = ep->mpa_attr.version;
1170	mpa->private_data_size = htons(plen);
1171
1172	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
1173		mpa->flags |= MPA_ENHANCED_RDMA_CONN;
1174		mpa->private_data_size =
1175			htons(ntohs(mpa->private_data_size) +
1176			      sizeof(struct mpa_v2_conn_params));
1177		mpa_v2_params.ird = htons((u16)ep->ird);
1178		mpa_v2_params.ord = htons((u16)ep->ord);
1179		if (peer2peer && (ep->mpa_attr.p2p_type !=
1180					FW_RI_INIT_P2PTYPE_DISABLED)) {
1181			mpa_v2_params.ird |= htons(MPA_V2_PEER2PEER_MODEL);
1182
1183			if (p2p_type == FW_RI_INIT_P2PTYPE_RDMA_WRITE)
1184				mpa_v2_params.ord |=
1185					htons(MPA_V2_RDMA_WRITE_RTR);
1186			else if (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ)
1187				mpa_v2_params.ord |=
1188					htons(MPA_V2_RDMA_READ_RTR);
1189		}
1190
1191		memcpy(mpa->private_data, &mpa_v2_params,
1192		       sizeof(struct mpa_v2_conn_params));
1193
1194		if (ep->plen)
1195			memcpy(mpa->private_data +
1196			       sizeof(struct mpa_v2_conn_params), pdata, plen);
1197	} else
1198		if (plen)
1199			memcpy(mpa->private_data, pdata, plen);
1200
1201	/*
1202	 * Reference the mpa skb.  This ensures the data area
1203	 * will remain in memory until the hw acks the tx.
1204	 * Function fw4_ack() will deref it.
1205	 */
1206	skb_get(skb);
1207	t4_set_arp_err_handler(skb, NULL, mpa_start_arp_failure);
1208	ep->mpa_skb = skb;
1209	__state_set(&ep->com, MPA_REP_SENT);
1210	ep->snd_seq += mpalen;
1211	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
1212}
1213
1214static int act_establish(struct c4iw_dev *dev, struct sk_buff *skb)
1215{
1216	struct c4iw_ep *ep;
1217	struct cpl_act_establish *req = cplhdr(skb);
1218	unsigned short tcp_opt = ntohs(req->tcp_opt);
1219	unsigned int tid = GET_TID(req);
1220	unsigned int atid = TID_TID_G(ntohl(req->tos_atid));
1221	struct tid_info *t = dev->rdev.lldi.tids;
1222	int ret;
1223
1224	ep = lookup_atid(t, atid);
1225
1226	pr_debug("ep %p tid %u snd_isn %u rcv_isn %u\n", ep, tid,
1227		 be32_to_cpu(req->snd_isn), be32_to_cpu(req->rcv_isn));
1228
1229	mutex_lock(&ep->com.mutex);
1230	dst_confirm(ep->dst);
1231
1232	/* setup the hwtid for this connection */
1233	ep->hwtid = tid;
1234	cxgb4_insert_tid(t, ep, tid, ep->com.local_addr.ss_family);
1235	insert_ep_tid(ep);
1236
1237	ep->snd_seq = be32_to_cpu(req->snd_isn);
1238	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
1239	ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
1240
1241	set_emss(ep, tcp_opt);
1242
1243	/* dealloc the atid */
1244	xa_erase_irq(&ep->com.dev->atids, atid);
1245	cxgb4_free_atid(t, atid);
1246	set_bit(ACT_ESTAB, &ep->com.history);
1247
1248	/* start MPA negotiation */
1249	ret = send_flowc(ep);
1250	if (ret)
1251		goto err;
1252	if (ep->retry_with_mpa_v1)
1253		ret = send_mpa_req(ep, skb, 1);
1254	else
1255		ret = send_mpa_req(ep, skb, mpa_rev);
1256	if (ret)
1257		goto err;
1258	mutex_unlock(&ep->com.mutex);
1259	return 0;
1260err:
1261	mutex_unlock(&ep->com.mutex);
1262	connect_reply_upcall(ep, -ENOMEM);
1263	c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
1264	return 0;
1265}
1266
1267static void close_complete_upcall(struct c4iw_ep *ep, int status)
1268{
1269	struct iw_cm_event event;
1270
1271	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1272	memset(&event, 0, sizeof(event));
1273	event.event = IW_CM_EVENT_CLOSE;
1274	event.status = status;
1275	if (ep->com.cm_id) {
1276		pr_debug("close complete delivered ep %p cm_id %p tid %u\n",
1277			 ep, ep->com.cm_id, ep->hwtid);
1278		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1279		deref_cm_id(&ep->com);
1280		set_bit(CLOSE_UPCALL, &ep->com.history);
1281	}
1282}
1283
1284static void peer_close_upcall(struct c4iw_ep *ep)
1285{
1286	struct iw_cm_event event;
1287
1288	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1289	memset(&event, 0, sizeof(event));
1290	event.event = IW_CM_EVENT_DISCONNECT;
1291	if (ep->com.cm_id) {
1292		pr_debug("peer close delivered ep %p cm_id %p tid %u\n",
1293			 ep, ep->com.cm_id, ep->hwtid);
1294		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1295		set_bit(DISCONN_UPCALL, &ep->com.history);
1296	}
1297}
1298
1299static void peer_abort_upcall(struct c4iw_ep *ep)
1300{
1301	struct iw_cm_event event;
1302
1303	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1304	memset(&event, 0, sizeof(event));
1305	event.event = IW_CM_EVENT_CLOSE;
1306	event.status = -ECONNRESET;
1307	if (ep->com.cm_id) {
1308		pr_debug("abort delivered ep %p cm_id %p tid %u\n", ep,
1309			 ep->com.cm_id, ep->hwtid);
1310		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1311		deref_cm_id(&ep->com);
1312		set_bit(ABORT_UPCALL, &ep->com.history);
1313	}
1314}
1315
1316static void connect_reply_upcall(struct c4iw_ep *ep, int status)
1317{
1318	struct iw_cm_event event;
1319
1320	pr_debug("ep %p tid %u status %d\n",
1321		 ep, ep->hwtid, status);
1322	memset(&event, 0, sizeof(event));
1323	event.event = IW_CM_EVENT_CONNECT_REPLY;
1324	event.status = status;
1325	memcpy(&event.local_addr, &ep->com.local_addr,
1326	       sizeof(ep->com.local_addr));
1327	memcpy(&event.remote_addr, &ep->com.remote_addr,
1328	       sizeof(ep->com.remote_addr));
1329
1330	if ((status == 0) || (status == -ECONNREFUSED)) {
1331		if (!ep->tried_with_mpa_v1) {
1332			/* this means MPA_v2 is used */
1333			event.ord = ep->ird;
1334			event.ird = ep->ord;
1335			event.private_data_len = ep->plen -
1336				sizeof(struct mpa_v2_conn_params);
1337			event.private_data = ep->mpa_pkt +
1338				sizeof(struct mpa_message) +
1339				sizeof(struct mpa_v2_conn_params);
1340		} else {
1341			/* this means MPA_v1 is used */
1342			event.ord = cur_max_read_depth(ep->com.dev);
1343			event.ird = cur_max_read_depth(ep->com.dev);
1344			event.private_data_len = ep->plen;
1345			event.private_data = ep->mpa_pkt +
1346				sizeof(struct mpa_message);
1347		}
1348	}
1349
1350	pr_debug("ep %p tid %u status %d\n", ep,
1351		 ep->hwtid, status);
1352	set_bit(CONN_RPL_UPCALL, &ep->com.history);
1353	ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1354
1355	if (status < 0)
1356		deref_cm_id(&ep->com);
1357}
1358
1359static int connect_request_upcall(struct c4iw_ep *ep)
1360{
1361	struct iw_cm_event event;
1362	int ret;
1363
1364	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1365	memset(&event, 0, sizeof(event));
1366	event.event = IW_CM_EVENT_CONNECT_REQUEST;
1367	memcpy(&event.local_addr, &ep->com.local_addr,
1368	       sizeof(ep->com.local_addr));
1369	memcpy(&event.remote_addr, &ep->com.remote_addr,
1370	       sizeof(ep->com.remote_addr));
1371	event.provider_data = ep;
1372	if (!ep->tried_with_mpa_v1) {
1373		/* this means MPA_v2 is used */
1374		event.ord = ep->ord;
1375		event.ird = ep->ird;
1376		event.private_data_len = ep->plen -
1377			sizeof(struct mpa_v2_conn_params);
1378		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message) +
1379			sizeof(struct mpa_v2_conn_params);
1380	} else {
1381		/* this means MPA_v1 is used. Send max supported */
1382		event.ord = cur_max_read_depth(ep->com.dev);
1383		event.ird = cur_max_read_depth(ep->com.dev);
1384		event.private_data_len = ep->plen;
1385		event.private_data = ep->mpa_pkt + sizeof(struct mpa_message);
1386	}
1387	c4iw_get_ep(&ep->com);
1388	ret = ep->parent_ep->com.cm_id->event_handler(ep->parent_ep->com.cm_id,
1389						      &event);
1390	if (ret)
1391		c4iw_put_ep(&ep->com);
1392	set_bit(CONNREQ_UPCALL, &ep->com.history);
1393	c4iw_put_ep(&ep->parent_ep->com);
1394	return ret;
1395}
1396
1397static void established_upcall(struct c4iw_ep *ep)
1398{
1399	struct iw_cm_event event;
1400
1401	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1402	memset(&event, 0, sizeof(event));
1403	event.event = IW_CM_EVENT_ESTABLISHED;
1404	event.ird = ep->ord;
1405	event.ord = ep->ird;
1406	if (ep->com.cm_id) {
1407		pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1408		ep->com.cm_id->event_handler(ep->com.cm_id, &event);
1409		set_bit(ESTAB_UPCALL, &ep->com.history);
1410	}
1411}
1412
1413static int update_rx_credits(struct c4iw_ep *ep, u32 credits)
1414{
1415	struct sk_buff *skb;
1416	u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16);
1417	u32 credit_dack;
1418
1419	pr_debug("ep %p tid %u credits %u\n",
1420		 ep, ep->hwtid, credits);
1421	skb = get_skb(NULL, wrlen, GFP_KERNEL);
1422	if (!skb) {
1423		pr_err("update_rx_credits - cannot alloc skb!\n");
1424		return 0;
1425	}
1426
1427	/*
1428	 * If we couldn't specify the entire rcv window at connection setup
1429	 * due to the limit in the number of bits in the RCV_BUFSIZ field,
1430	 * then add the overage in to the credits returned.
1431	 */
1432	if (ep->rcv_win > RCV_BUFSIZ_M * 1024)
1433		credits += ep->rcv_win - RCV_BUFSIZ_M * 1024;
1434
1435	credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F |
1436		      RX_DACK_MODE_V(dack_mode);
1437
1438	cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx,
1439			    credit_dack);
1440
1441	c4iw_ofld_send(&ep->com.dev->rdev, skb);
1442	return credits;
1443}
1444
1445#define RELAXED_IRD_NEGOTIATION 1
1446
1447/*
1448 * process_mpa_reply - process streaming mode MPA reply
1449 *
1450 * Returns:
1451 *
1452 * 0 upon success indicating a connect request was delivered to the ULP
1453 * or the mpa request is incomplete but valid so far.
1454 *
1455 * 1 if a failure requires the caller to close the connection.
1456 *
1457 * 2 if a failure requires the caller to abort the connection.
1458 */
1459static int process_mpa_reply(struct c4iw_ep *ep, struct sk_buff *skb)
1460{
1461	struct mpa_message *mpa;
1462	struct mpa_v2_conn_params *mpa_v2_params;
1463	u16 plen;
1464	u16 resp_ird, resp_ord;
1465	u8 rtr_mismatch = 0, insuff_ird = 0;
1466	struct c4iw_qp_attributes attrs;
1467	enum c4iw_qp_attr_mask mask;
1468	int err;
1469	int disconnect = 0;
1470
1471	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1472
1473	/*
1474	 * If we get more than the supported amount of private data
1475	 * then we must fail this connection.
1476	 */
1477	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt)) {
1478		err = -EINVAL;
1479		goto err_stop_timer;
1480	}
1481
1482	/*
1483	 * copy the new data into our accumulation buffer.
1484	 */
1485	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1486				  skb->len);
1487	ep->mpa_pkt_len += skb->len;
1488
1489	/*
1490	 * if we don't even have the mpa message, then bail.
1491	 */
1492	if (ep->mpa_pkt_len < sizeof(*mpa))
1493		return 0;
1494	mpa = (struct mpa_message *) ep->mpa_pkt;
1495
1496	/* Validate MPA header. */
1497	if (mpa->revision > mpa_rev) {
1498		pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1499		       __func__, mpa_rev, mpa->revision);
1500		err = -EPROTO;
1501		goto err_stop_timer;
1502	}
1503	if (memcmp(mpa->key, MPA_KEY_REP, sizeof(mpa->key))) {
1504		err = -EPROTO;
1505		goto err_stop_timer;
1506	}
1507
1508	plen = ntohs(mpa->private_data_size);
1509
1510	/*
1511	 * Fail if there's too much private data.
1512	 */
1513	if (plen > MPA_MAX_PRIVATE_DATA) {
1514		err = -EPROTO;
1515		goto err_stop_timer;
1516	}
1517
1518	/*
1519	 * If plen does not account for pkt size
1520	 */
1521	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen)) {
1522		err = -EPROTO;
1523		goto err_stop_timer;
1524	}
1525
1526	ep->plen = (u8) plen;
1527
1528	/*
1529	 * If we don't have all the pdata yet, then bail.
1530	 * We'll continue process when more data arrives.
1531	 */
1532	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1533		return 0;
1534
1535	if (mpa->flags & MPA_REJECT) {
1536		err = -ECONNREFUSED;
1537		goto err_stop_timer;
1538	}
1539
1540	/*
1541	 * Stop mpa timer.  If it expired, then
1542	 * we ignore the MPA reply.  process_timeout()
1543	 * will abort the connection.
1544	 */
1545	if (stop_ep_timer(ep))
1546		return 0;
1547
1548	/*
1549	 * If we get here we have accumulated the entire mpa
1550	 * start reply message including private data. And
1551	 * the MPA header is valid.
1552	 */
1553	__state_set(&ep->com, FPDU_MODE);
1554	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1555	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1556	ep->mpa_attr.version = mpa->revision;
1557	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1558
1559	if (mpa->revision == 2) {
1560		ep->mpa_attr.enhanced_rdma_conn =
1561			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1562		if (ep->mpa_attr.enhanced_rdma_conn) {
1563			mpa_v2_params = (struct mpa_v2_conn_params *)
1564				(ep->mpa_pkt + sizeof(*mpa));
1565			resp_ird = ntohs(mpa_v2_params->ird) &
1566				MPA_V2_IRD_ORD_MASK;
1567			resp_ord = ntohs(mpa_v2_params->ord) &
1568				MPA_V2_IRD_ORD_MASK;
1569			pr_debug("responder ird %u ord %u ep ird %u ord %u\n",
1570				 resp_ird, resp_ord, ep->ird, ep->ord);
1571
1572			/*
1573			 * This is a double-check. Ideally, below checks are
1574			 * not required since ird/ord stuff has been taken
1575			 * care of in c4iw_accept_cr
1576			 */
1577			if (ep->ird < resp_ord) {
1578				if (RELAXED_IRD_NEGOTIATION && resp_ord <=
1579				    ep->com.dev->rdev.lldi.max_ordird_qp)
1580					ep->ird = resp_ord;
1581				else
1582					insuff_ird = 1;
1583			} else if (ep->ird > resp_ord) {
1584				ep->ird = resp_ord;
1585			}
1586			if (ep->ord > resp_ird) {
1587				if (RELAXED_IRD_NEGOTIATION)
1588					ep->ord = resp_ird;
1589				else
1590					insuff_ird = 1;
1591			}
1592			if (insuff_ird) {
1593				err = -ENOMEM;
1594				ep->ird = resp_ord;
1595				ep->ord = resp_ird;
1596			}
1597
1598			if (ntohs(mpa_v2_params->ird) &
1599					MPA_V2_PEER2PEER_MODEL) {
1600				if (ntohs(mpa_v2_params->ord) &
1601						MPA_V2_RDMA_WRITE_RTR)
1602					ep->mpa_attr.p2p_type =
1603						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1604				else if (ntohs(mpa_v2_params->ord) &
1605						MPA_V2_RDMA_READ_RTR)
1606					ep->mpa_attr.p2p_type =
1607						FW_RI_INIT_P2PTYPE_READ_REQ;
1608			}
1609		}
1610	} else if (mpa->revision == 1)
1611		if (peer2peer)
1612			ep->mpa_attr.p2p_type = p2p_type;
1613
1614	pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d local-p2p_type = %d\n",
1615		 ep->mpa_attr.crc_enabled,
1616		 ep->mpa_attr.recv_marker_enabled,
1617		 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1618		 ep->mpa_attr.p2p_type, p2p_type);
1619
1620	/*
1621	 * If responder's RTR does not match with that of initiator, assign
1622	 * FW_RI_INIT_P2PTYPE_DISABLED in mpa attributes so that RTR is not
1623	 * generated when moving QP to RTS state.
1624	 * A TERM message will be sent after QP has moved to RTS state
1625	 */
1626	if ((ep->mpa_attr.version == 2) && peer2peer &&
1627			(ep->mpa_attr.p2p_type != p2p_type)) {
1628		ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1629		rtr_mismatch = 1;
1630	}
1631
1632	attrs.mpa_attr = ep->mpa_attr;
1633	attrs.max_ird = ep->ird;
1634	attrs.max_ord = ep->ord;
1635	attrs.llp_stream_handle = ep;
1636	attrs.next_state = C4IW_QP_STATE_RTS;
1637
1638	mask = C4IW_QP_ATTR_NEXT_STATE |
1639	    C4IW_QP_ATTR_LLP_STREAM_HANDLE | C4IW_QP_ATTR_MPA_ATTR |
1640	    C4IW_QP_ATTR_MAX_IRD | C4IW_QP_ATTR_MAX_ORD;
1641
1642	/* bind QP and TID with INIT_WR */
1643	err = c4iw_modify_qp(ep->com.qp->rhp,
1644			     ep->com.qp, mask, &attrs, 1);
1645	if (err)
1646		goto err;
1647
1648	/*
1649	 * If responder's RTR requirement did not match with what initiator
1650	 * supports, generate TERM message
1651	 */
1652	if (rtr_mismatch) {
1653		pr_err("%s: RTR mismatch, sending TERM\n", __func__);
1654		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1655		attrs.ecode = MPA_NOMATCH_RTR;
1656		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1657		attrs.send_term = 1;
1658		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1659				C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1660		err = -ENOMEM;
1661		disconnect = 1;
1662		goto out;
1663	}
1664
1665	/*
1666	 * Generate TERM if initiator IRD is not sufficient for responder
1667	 * provided ORD. Currently, we do the same behaviour even when
1668	 * responder provided IRD is also not sufficient as regards to
1669	 * initiator ORD.
1670	 */
1671	if (insuff_ird) {
1672		pr_err("%s: Insufficient IRD, sending TERM\n", __func__);
1673		attrs.layer_etype = LAYER_MPA | DDP_LLP;
1674		attrs.ecode = MPA_INSUFF_IRD;
1675		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1676		attrs.send_term = 1;
1677		err = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1678				C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1679		err = -ENOMEM;
1680		disconnect = 1;
1681		goto out;
1682	}
1683	goto out;
1684err_stop_timer:
1685	stop_ep_timer(ep);
1686err:
1687	disconnect = 2;
1688out:
1689	connect_reply_upcall(ep, err);
1690	return disconnect;
1691}
1692
1693/*
1694 * process_mpa_request - process streaming mode MPA request
1695 *
1696 * Returns:
1697 *
1698 * 0 upon success indicating a connect request was delivered to the ULP
1699 * or the mpa request is incomplete but valid so far.
1700 *
1701 * 1 if a failure requires the caller to close the connection.
1702 *
1703 * 2 if a failure requires the caller to abort the connection.
1704 */
1705static int process_mpa_request(struct c4iw_ep *ep, struct sk_buff *skb)
1706{
1707	struct mpa_message *mpa;
1708	struct mpa_v2_conn_params *mpa_v2_params;
1709	u16 plen;
1710
1711	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1712
1713	/*
1714	 * If we get more than the supported amount of private data
1715	 * then we must fail this connection.
1716	 */
1717	if (ep->mpa_pkt_len + skb->len > sizeof(ep->mpa_pkt))
1718		goto err_stop_timer;
1719
1720	pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1721
1722	/*
1723	 * Copy the new data into our accumulation buffer.
1724	 */
1725	skb_copy_from_linear_data(skb, &(ep->mpa_pkt[ep->mpa_pkt_len]),
1726				  skb->len);
1727	ep->mpa_pkt_len += skb->len;
1728
1729	/*
1730	 * If we don't even have the mpa message, then bail.
1731	 * We'll continue process when more data arrives.
1732	 */
1733	if (ep->mpa_pkt_len < sizeof(*mpa))
1734		return 0;
1735
1736	pr_debug("enter (%s line %u)\n", __FILE__, __LINE__);
1737	mpa = (struct mpa_message *) ep->mpa_pkt;
1738
1739	/*
1740	 * Validate MPA Header.
1741	 */
1742	if (mpa->revision > mpa_rev) {
1743		pr_err("%s MPA version mismatch. Local = %d, Received = %d\n",
1744		       __func__, mpa_rev, mpa->revision);
1745		goto err_stop_timer;
1746	}
1747
1748	if (memcmp(mpa->key, MPA_KEY_REQ, sizeof(mpa->key)))
1749		goto err_stop_timer;
1750
1751	plen = ntohs(mpa->private_data_size);
1752
1753	/*
1754	 * Fail if there's too much private data.
1755	 */
1756	if (plen > MPA_MAX_PRIVATE_DATA)
1757		goto err_stop_timer;
1758
1759	/*
1760	 * If plen does not account for pkt size
1761	 */
1762	if (ep->mpa_pkt_len > (sizeof(*mpa) + plen))
1763		goto err_stop_timer;
1764	ep->plen = (u8) plen;
1765
1766	/*
1767	 * If we don't have all the pdata yet, then bail.
1768	 */
1769	if (ep->mpa_pkt_len < (sizeof(*mpa) + plen))
1770		return 0;
1771
1772	/*
1773	 * If we get here we have accumulated the entire mpa
1774	 * start reply message including private data.
1775	 */
1776	ep->mpa_attr.initiator = 0;
1777	ep->mpa_attr.crc_enabled = (mpa->flags & MPA_CRC) | crc_enabled ? 1 : 0;
1778	ep->mpa_attr.recv_marker_enabled = markers_enabled;
1779	ep->mpa_attr.xmit_marker_enabled = mpa->flags & MPA_MARKERS ? 1 : 0;
1780	ep->mpa_attr.version = mpa->revision;
1781	if (mpa->revision == 1)
1782		ep->tried_with_mpa_v1 = 1;
1783	ep->mpa_attr.p2p_type = FW_RI_INIT_P2PTYPE_DISABLED;
1784
1785	if (mpa->revision == 2) {
1786		ep->mpa_attr.enhanced_rdma_conn =
1787			mpa->flags & MPA_ENHANCED_RDMA_CONN ? 1 : 0;
1788		if (ep->mpa_attr.enhanced_rdma_conn) {
1789			mpa_v2_params = (struct mpa_v2_conn_params *)
1790				(ep->mpa_pkt + sizeof(*mpa));
1791			ep->ird = ntohs(mpa_v2_params->ird) &
1792				MPA_V2_IRD_ORD_MASK;
1793			ep->ird = min_t(u32, ep->ird,
1794					cur_max_read_depth(ep->com.dev));
1795			ep->ord = ntohs(mpa_v2_params->ord) &
1796				MPA_V2_IRD_ORD_MASK;
1797			ep->ord = min_t(u32, ep->ord,
1798					cur_max_read_depth(ep->com.dev));
1799			pr_debug("initiator ird %u ord %u\n",
1800				 ep->ird, ep->ord);
1801			if (ntohs(mpa_v2_params->ird) & MPA_V2_PEER2PEER_MODEL)
1802				if (peer2peer) {
1803					if (ntohs(mpa_v2_params->ord) &
1804							MPA_V2_RDMA_WRITE_RTR)
1805						ep->mpa_attr.p2p_type =
1806						FW_RI_INIT_P2PTYPE_RDMA_WRITE;
1807					else if (ntohs(mpa_v2_params->ord) &
1808							MPA_V2_RDMA_READ_RTR)
1809						ep->mpa_attr.p2p_type =
1810						FW_RI_INIT_P2PTYPE_READ_REQ;
1811				}
1812		}
1813	} else if (mpa->revision == 1)
1814		if (peer2peer)
1815			ep->mpa_attr.p2p_type = p2p_type;
1816
1817	pr_debug("crc_enabled=%d, recv_marker_enabled=%d, xmit_marker_enabled=%d, version=%d p2p_type=%d\n",
1818		 ep->mpa_attr.crc_enabled, ep->mpa_attr.recv_marker_enabled,
1819		 ep->mpa_attr.xmit_marker_enabled, ep->mpa_attr.version,
1820		 ep->mpa_attr.p2p_type);
1821
1822	__state_set(&ep->com, MPA_REQ_RCVD);
1823
1824	/* drive upcall */
1825	mutex_lock_nested(&ep->parent_ep->com.mutex, SINGLE_DEPTH_NESTING);
1826	if (ep->parent_ep->com.state != DEAD) {
1827		if (connect_request_upcall(ep))
1828			goto err_unlock_parent;
1829	} else {
1830		goto err_unlock_parent;
1831	}
1832	mutex_unlock(&ep->parent_ep->com.mutex);
1833	return 0;
1834
1835err_unlock_parent:
1836	mutex_unlock(&ep->parent_ep->com.mutex);
1837	goto err_out;
1838err_stop_timer:
1839	(void)stop_ep_timer(ep);
1840err_out:
1841	return 2;
1842}
1843
1844static int rx_data(struct c4iw_dev *dev, struct sk_buff *skb)
1845{
1846	struct c4iw_ep *ep;
1847	struct cpl_rx_data *hdr = cplhdr(skb);
1848	unsigned int dlen = ntohs(hdr->len);
1849	unsigned int tid = GET_TID(hdr);
1850	__u8 status = hdr->status;
1851	int disconnect = 0;
1852
1853	ep = get_ep_from_tid(dev, tid);
1854	if (!ep)
1855		return 0;
1856	pr_debug("ep %p tid %u dlen %u\n", ep, ep->hwtid, dlen);
1857	skb_pull(skb, sizeof(*hdr));
1858	skb_trim(skb, dlen);
1859	mutex_lock(&ep->com.mutex);
1860
1861	switch (ep->com.state) {
1862	case MPA_REQ_SENT:
1863		update_rx_credits(ep, dlen);
1864		ep->rcv_seq += dlen;
1865		disconnect = process_mpa_reply(ep, skb);
1866		break;
1867	case MPA_REQ_WAIT:
1868		update_rx_credits(ep, dlen);
1869		ep->rcv_seq += dlen;
1870		disconnect = process_mpa_request(ep, skb);
1871		break;
1872	case FPDU_MODE: {
1873		struct c4iw_qp_attributes attrs;
1874
1875		update_rx_credits(ep, dlen);
1876		if (status)
1877			pr_err("%s Unexpected streaming data." \
1878			       " qpid %u ep %p state %d tid %u status %d\n",
1879			       __func__, ep->com.qp->wq.sq.qid, ep,
1880			       ep->com.state, ep->hwtid, status);
1881		attrs.next_state = C4IW_QP_STATE_TERMINATE;
1882		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
1883			       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
1884		disconnect = 1;
1885		break;
1886	}
1887	default:
1888		break;
1889	}
1890	mutex_unlock(&ep->com.mutex);
1891	if (disconnect)
1892		c4iw_ep_disconnect(ep, disconnect == 2, GFP_KERNEL);
1893	c4iw_put_ep(&ep->com);
1894	return 0;
1895}
1896
1897static void complete_cached_srq_buffers(struct c4iw_ep *ep, u32 srqidx)
1898{
1899	enum chip_type adapter_type;
1900
1901	adapter_type = ep->com.dev->rdev.lldi.adapter_type;
1902
1903	/*
1904	 * If this TCB had a srq buffer cached, then we must complete
1905	 * it. For user mode, that means saving the srqidx in the
1906	 * user/kernel status page for this qp.  For kernel mode, just
1907	 * synthesize the CQE now.
1908	 */
1909	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T5 && srqidx) {
1910		if (ep->com.qp->ibqp.uobject)
1911			t4_set_wq_in_error(&ep->com.qp->wq, srqidx);
1912		else
1913			c4iw_flush_srqidx(ep->com.qp, srqidx);
1914	}
1915}
1916
1917static int abort_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
1918{
1919	u32 srqidx;
1920	struct c4iw_ep *ep;
1921	struct cpl_abort_rpl_rss6 *rpl = cplhdr(skb);
1922	int release = 0;
1923	unsigned int tid = GET_TID(rpl);
1924
1925	ep = get_ep_from_tid(dev, tid);
1926	if (!ep) {
1927		pr_warn("Abort rpl to freed endpoint\n");
1928		return 0;
1929	}
1930
1931	if (ep->com.qp && ep->com.qp->srq) {
1932		srqidx = ABORT_RSS_SRQIDX_G(be32_to_cpu(rpl->srqidx_status));
1933		complete_cached_srq_buffers(ep, srqidx ? srqidx : ep->srqe_idx);
1934	}
1935
1936	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
1937	mutex_lock(&ep->com.mutex);
1938	switch (ep->com.state) {
1939	case ABORTING:
1940		c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
1941		__state_set(&ep->com, DEAD);
1942		release = 1;
1943		break;
1944	default:
1945		pr_err("%s ep %p state %d\n", __func__, ep, ep->com.state);
1946		break;
1947	}
1948	mutex_unlock(&ep->com.mutex);
1949
1950	if (release) {
1951		close_complete_upcall(ep, -ECONNRESET);
1952		release_ep_resources(ep);
1953	}
1954	c4iw_put_ep(&ep->com);
1955	return 0;
1956}
1957
1958static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid)
1959{
1960	struct sk_buff *skb;
1961	struct fw_ofld_connection_wr *req;
1962	unsigned int mtu_idx;
1963	u32 wscale;
1964	struct sockaddr_in *sin;
1965	int win;
1966
1967	skb = get_skb(NULL, sizeof(*req), GFP_KERNEL);
1968	if (!skb)
1969		return -ENOMEM;
1970
1971	req = __skb_put_zero(skb, sizeof(*req));
1972	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR));
1973	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
1974	req->le.filter = cpu_to_be32(cxgb4_select_ntuple(
1975				     ep->com.dev->rdev.lldi.ports[0],
1976				     ep->l2t));
1977	sin = (struct sockaddr_in *)&ep->com.local_addr;
1978	req->le.lport = sin->sin_port;
1979	req->le.u.ipv4.lip = sin->sin_addr.s_addr;
1980	sin = (struct sockaddr_in *)&ep->com.remote_addr;
1981	req->le.pport = sin->sin_port;
1982	req->le.u.ipv4.pip = sin->sin_addr.s_addr;
1983	req->tcb.t_state_to_astid =
1984			htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_SENT) |
1985			FW_OFLD_CONNECTION_WR_ASTID_V(atid));
1986	req->tcb.cplrxdataack_cplpassacceptrpl =
1987			htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F);
1988	req->tcb.tx_max = (__force __be32) jiffies;
1989	req->tcb.rcv_adv = htons(1);
1990	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
1991		      enable_tcp_timestamps,
1992		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
1993	wscale = cxgb_compute_wscale(rcv_win);
1994
1995	/*
1996	 * Specify the largest window that will fit in opt0. The
1997	 * remainder will be specified in the rx_data_ack.
1998	 */
1999	win = ep->rcv_win >> 10;
2000	if (win > RCV_BUFSIZ_M)
2001		win = RCV_BUFSIZ_M;
2002
2003	req->tcb.opt0 = (__force __be64) (TCAM_BYPASS_F |
2004		(nocong ? NO_CONG_F : 0) |
2005		KEEP_ALIVE_F |
2006		DELACK_F |
2007		WND_SCALE_V(wscale) |
2008		MSS_IDX_V(mtu_idx) |
2009		L2T_IDX_V(ep->l2t->idx) |
2010		TX_CHAN_V(ep->tx_chan) |
2011		SMAC_SEL_V(ep->smac_idx) |
2012		DSCP_V(ep->tos >> 2) |
2013		ULP_MODE_V(ULP_MODE_TCPDDP) |
2014		RCV_BUFSIZ_V(win));
2015	req->tcb.opt2 = (__force __be32) (PACE_V(1) |
2016		TX_QUEUE_V(ep->com.dev->rdev.lldi.tx_modq[ep->tx_chan]) |
2017		RX_CHANNEL_V(0) |
2018		CCTRL_ECN_V(enable_ecn) |
2019		RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid));
2020	if (enable_tcp_timestamps)
2021		req->tcb.opt2 |= (__force __be32)TSTAMPS_EN_F;
2022	if (enable_tcp_sack)
2023		req->tcb.opt2 |= (__force __be32)SACK_EN_F;
2024	if (wscale && enable_tcp_window_scaling)
2025		req->tcb.opt2 |= (__force __be32)WND_SCALE_EN_F;
2026	req->tcb.opt0 = cpu_to_be64((__force u64)req->tcb.opt0);
2027	req->tcb.opt2 = cpu_to_be32((__force u32)req->tcb.opt2);
2028	set_wr_txq(skb, CPL_PRIORITY_CONTROL, ep->ctrlq_idx);
2029	set_bit(ACT_OFLD_CONN, &ep->com.history);
2030	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2031}
2032
2033/*
2034 * Some of the error codes above implicitly indicate that there is no TID
2035 * allocated with the result of an ACT_OPEN.  We use this predicate to make
2036 * that explicit.
2037 */
2038static inline int act_open_has_tid(int status)
2039{
2040	return (status != CPL_ERR_TCAM_PARITY &&
2041		status != CPL_ERR_TCAM_MISS &&
2042		status != CPL_ERR_TCAM_FULL &&
2043		status != CPL_ERR_CONN_EXIST_SYNRECV &&
2044		status != CPL_ERR_CONN_EXIST);
2045}
2046
2047static char *neg_adv_str(unsigned int status)
2048{
2049	switch (status) {
2050	case CPL_ERR_RTX_NEG_ADVICE:
2051		return "Retransmit timeout";
2052	case CPL_ERR_PERSIST_NEG_ADVICE:
2053		return "Persist timeout";
2054	case CPL_ERR_KEEPALV_NEG_ADVICE:
2055		return "Keepalive timeout";
2056	default:
2057		return "Unknown";
2058	}
2059}
2060
2061static void set_tcp_window(struct c4iw_ep *ep, struct port_info *pi)
2062{
2063	ep->snd_win = snd_win;
2064	ep->rcv_win = rcv_win;
2065	pr_debug("snd_win %d rcv_win %d\n",
2066		 ep->snd_win, ep->rcv_win);
2067}
2068
2069#define ACT_OPEN_RETRY_COUNT 2
2070
2071static int import_ep(struct c4iw_ep *ep, int iptype, __u8 *peer_ip,
2072		     struct dst_entry *dst, struct c4iw_dev *cdev,
2073		     bool clear_mpa_v1, enum chip_type adapter_type, u8 tos)
2074{
2075	struct neighbour *n;
2076	int err, step;
2077	struct net_device *pdev;
2078
2079	n = dst_neigh_lookup(dst, peer_ip);
2080	if (!n)
2081		return -ENODEV;
2082
2083	rcu_read_lock();
2084	err = -ENOMEM;
2085	if (n->dev->flags & IFF_LOOPBACK) {
2086		if (iptype == 4)
2087			pdev = ip_dev_find(&init_net, *(__be32 *)peer_ip);
2088		else if (IS_ENABLED(CONFIG_IPV6))
2089			for_each_netdev(&init_net, pdev) {
2090				if (ipv6_chk_addr(&init_net,
2091						  (struct in6_addr *)peer_ip,
2092						  pdev, 1))
2093					break;
2094			}
2095		else
2096			pdev = NULL;
2097
2098		if (!pdev) {
2099			err = -ENODEV;
2100			goto out;
2101		}
2102		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2103					n, pdev, rt_tos2priority(tos));
2104		if (!ep->l2t) {
2105			dev_put(pdev);
2106			goto out;
2107		}
2108		ep->mtu = pdev->mtu;
2109		ep->tx_chan = cxgb4_port_chan(pdev);
2110		ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2111		step = cdev->rdev.lldi.ntxq /
2112			cdev->rdev.lldi.nchan;
2113		ep->txq_idx = cxgb4_port_idx(pdev) * step;
2114		step = cdev->rdev.lldi.nrxq /
2115			cdev->rdev.lldi.nchan;
2116		ep->ctrlq_idx = cxgb4_port_idx(pdev);
2117		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2118			cxgb4_port_idx(pdev) * step];
2119		set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2120		dev_put(pdev);
2121	} else {
2122		pdev = get_real_dev(n->dev);
2123		ep->l2t = cxgb4_l2t_get(cdev->rdev.lldi.l2t,
2124					n, pdev, rt_tos2priority(tos));
2125		if (!ep->l2t)
2126			goto out;
2127		ep->mtu = dst_mtu(dst);
2128		ep->tx_chan = cxgb4_port_chan(pdev);
2129		ep->smac_idx = ((struct port_info *)netdev_priv(pdev))->smt_idx;
2130		step = cdev->rdev.lldi.ntxq /
2131			cdev->rdev.lldi.nchan;
2132		ep->txq_idx = cxgb4_port_idx(pdev) * step;
2133		ep->ctrlq_idx = cxgb4_port_idx(pdev);
2134		step = cdev->rdev.lldi.nrxq /
2135			cdev->rdev.lldi.nchan;
2136		ep->rss_qid = cdev->rdev.lldi.rxq_ids[
2137			cxgb4_port_idx(pdev) * step];
2138		set_tcp_window(ep, (struct port_info *)netdev_priv(pdev));
2139
2140		if (clear_mpa_v1) {
2141			ep->retry_with_mpa_v1 = 0;
2142			ep->tried_with_mpa_v1 = 0;
2143		}
2144	}
2145	err = 0;
2146out:
2147	rcu_read_unlock();
2148
2149	neigh_release(n);
2150
2151	return err;
2152}
2153
2154static int c4iw_reconnect(struct c4iw_ep *ep)
2155{
2156	int err = 0;
2157	int size = 0;
2158	struct sockaddr_in *laddr = (struct sockaddr_in *)
2159				    &ep->com.cm_id->m_local_addr;
2160	struct sockaddr_in *raddr = (struct sockaddr_in *)
2161				    &ep->com.cm_id->m_remote_addr;
2162	struct sockaddr_in6 *laddr6 = (struct sockaddr_in6 *)
2163				      &ep->com.cm_id->m_local_addr;
2164	struct sockaddr_in6 *raddr6 = (struct sockaddr_in6 *)
2165				      &ep->com.cm_id->m_remote_addr;
2166	int iptype;
2167	__u8 *ra;
2168
2169	pr_debug("qp %p cm_id %p\n", ep->com.qp, ep->com.cm_id);
2170	c4iw_init_wr_wait(ep->com.wr_waitp);
2171
2172	/* When MPA revision is different on nodes, the node with MPA_rev=2
2173	 * tries to reconnect with MPA_rev 1 for the same EP through
2174	 * c4iw_reconnect(), where the same EP is assigned with new tid for
2175	 * further connection establishment. As we are using the same EP pointer
2176	 * for reconnect, few skbs are used during the previous c4iw_connect(),
2177	 * which leaves the EP with inadequate skbs for further
2178	 * c4iw_reconnect(), Further causing a crash due to an empty
2179	 * skb_list() during peer_abort(). Allocate skbs which is already used.
2180	 */
2181	size = (CN_MAX_CON_BUF - skb_queue_len(&ep->com.ep_skb_list));
2182	if (alloc_ep_skb_list(&ep->com.ep_skb_list, size)) {
2183		err = -ENOMEM;
2184		goto fail1;
2185	}
2186
2187	/*
2188	 * Allocate an active TID to initiate a TCP connection.
2189	 */
2190	ep->atid = cxgb4_alloc_atid(ep->com.dev->rdev.lldi.tids, ep);
2191	if (ep->atid == -1) {
2192		pr_err("%s - cannot alloc atid\n", __func__);
2193		err = -ENOMEM;
2194		goto fail2;
2195	}
2196	err = xa_insert_irq(&ep->com.dev->atids, ep->atid, ep, GFP_KERNEL);
2197	if (err)
2198		goto fail2a;
2199
2200	/* find a route */
2201	if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) {
2202		ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev,
2203					  laddr->sin_addr.s_addr,
2204					  raddr->sin_addr.s_addr,
2205					  laddr->sin_port,
2206					  raddr->sin_port, ep->com.cm_id->tos);
2207		iptype = 4;
2208		ra = (__u8 *)&raddr->sin_addr;
2209	} else {
2210		ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi,
2211					   get_real_dev,
2212					   laddr6->sin6_addr.s6_addr,
2213					   raddr6->sin6_addr.s6_addr,
2214					   laddr6->sin6_port,
2215					   raddr6->sin6_port,
2216					   ep->com.cm_id->tos,
2217					   raddr6->sin6_scope_id);
2218		iptype = 6;
2219		ra = (__u8 *)&raddr6->sin6_addr;
2220	}
2221	if (!ep->dst) {
2222		pr_err("%s - cannot find route\n", __func__);
2223		err = -EHOSTUNREACH;
2224		goto fail3;
2225	}
2226	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, false,
2227			ep->com.dev->rdev.lldi.adapter_type,
2228			ep->com.cm_id->tos);
2229	if (err) {
2230		pr_err("%s - cannot alloc l2e\n", __func__);
2231		goto fail4;
2232	}
2233
2234	pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
2235		 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
2236		 ep->l2t->idx);
2237
2238	state_set(&ep->com, CONNECTING);
2239	ep->tos = ep->com.cm_id->tos;
2240
2241	/* send connect request to rnic */
2242	err = send_connect(ep);
2243	if (!err)
2244		goto out;
2245
2246	cxgb4_l2t_release(ep->l2t);
2247fail4:
2248	dst_release(ep->dst);
2249fail3:
2250	xa_erase_irq(&ep->com.dev->atids, ep->atid);
2251fail2a:
2252	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
2253fail2:
2254	/*
2255	 * remember to send notification to upper layer.
2256	 * We are in here so the upper layer is not aware that this is
2257	 * re-connect attempt and so, upper layer is still waiting for
2258	 * response of 1st connect request.
2259	 */
2260	connect_reply_upcall(ep, -ECONNRESET);
2261fail1:
2262	c4iw_put_ep(&ep->com);
2263out:
2264	return err;
2265}
2266
2267static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2268{
2269	struct c4iw_ep *ep;
2270	struct cpl_act_open_rpl *rpl = cplhdr(skb);
2271	unsigned int atid = TID_TID_G(AOPEN_ATID_G(
2272				      ntohl(rpl->atid_status)));
2273	struct tid_info *t = dev->rdev.lldi.tids;
2274	int status = AOPEN_STATUS_G(ntohl(rpl->atid_status));
2275	struct sockaddr_in *la;
2276	struct sockaddr_in *ra;
2277	struct sockaddr_in6 *la6;
2278	struct sockaddr_in6 *ra6;
2279	int ret = 0;
2280
2281	ep = lookup_atid(t, atid);
2282	la = (struct sockaddr_in *)&ep->com.local_addr;
2283	ra = (struct sockaddr_in *)&ep->com.remote_addr;
2284	la6 = (struct sockaddr_in6 *)&ep->com.local_addr;
2285	ra6 = (struct sockaddr_in6 *)&ep->com.remote_addr;
2286
2287	pr_debug("ep %p atid %u status %u errno %d\n", ep, atid,
2288		 status, status2errno(status));
2289
2290	if (cxgb_is_neg_adv(status)) {
2291		pr_debug("Connection problems for atid %u status %u (%s)\n",
2292			 atid, status, neg_adv_str(status));
2293		ep->stats.connect_neg_adv++;
2294		mutex_lock(&dev->rdev.stats.lock);
2295		dev->rdev.stats.neg_adv++;
2296		mutex_unlock(&dev->rdev.stats.lock);
2297		return 0;
2298	}
2299
2300	set_bit(ACT_OPEN_RPL, &ep->com.history);
2301
2302	/*
2303	 * Log interesting failures.
2304	 */
2305	switch (status) {
2306	case CPL_ERR_CONN_RESET:
2307	case CPL_ERR_CONN_TIMEDOUT:
2308		break;
2309	case CPL_ERR_TCAM_FULL:
2310		mutex_lock(&dev->rdev.stats.lock);
2311		dev->rdev.stats.tcam_full++;
2312		mutex_unlock(&dev->rdev.stats.lock);
2313		if (ep->com.local_addr.ss_family == AF_INET &&
2314		    dev->rdev.lldi.enable_fw_ofld_conn) {
2315			ret = send_fw_act_open_req(ep, TID_TID_G(AOPEN_ATID_G(
2316						   ntohl(rpl->atid_status))));
2317			if (ret)
2318				goto fail;
2319			return 0;
2320		}
2321		break;
2322	case CPL_ERR_CONN_EXIST:
2323		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
2324			set_bit(ACT_RETRY_INUSE, &ep->com.history);
2325			if (ep->com.remote_addr.ss_family == AF_INET6) {
2326				struct sockaddr_in6 *sin6 =
2327						(struct sockaddr_in6 *)
2328						&ep->com.local_addr;
2329				cxgb4_clip_release(
2330						ep->com.dev->rdev.lldi.ports[0],
2331						(const u32 *)
2332						&sin6->sin6_addr.s6_addr, 1);
2333			}
2334			xa_erase_irq(&ep->com.dev->atids, atid);
2335			cxgb4_free_atid(t, atid);
2336			dst_release(ep->dst);
2337			cxgb4_l2t_release(ep->l2t);
2338			c4iw_reconnect(ep);
2339			return 0;
2340		}
2341		break;
2342	default:
2343		if (ep->com.local_addr.ss_family == AF_INET) {
2344			pr_info("Active open failure - atid %u status %u errno %d %pI4:%u->%pI4:%u\n",
2345				atid, status, status2errno(status),
2346				&la->sin_addr.s_addr, ntohs(la->sin_port),
2347				&ra->sin_addr.s_addr, ntohs(ra->sin_port));
2348		} else {
2349			pr_info("Active open failure - atid %u status %u errno %d %pI6:%u->%pI6:%u\n",
2350				atid, status, status2errno(status),
2351				la6->sin6_addr.s6_addr, ntohs(la6->sin6_port),
2352				ra6->sin6_addr.s6_addr, ntohs(ra6->sin6_port));
2353		}
2354		break;
2355	}
2356
2357fail:
2358	connect_reply_upcall(ep, status2errno(status));
2359	state_set(&ep->com, DEAD);
2360
2361	if (ep->com.remote_addr.ss_family == AF_INET6) {
2362		struct sockaddr_in6 *sin6 =
2363			(struct sockaddr_in6 *)&ep->com.local_addr;
2364		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
2365				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2366	}
2367	if (status && act_open_has_tid(status))
2368		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, GET_TID(rpl),
2369				 ep->com.local_addr.ss_family);
2370
2371	xa_erase_irq(&ep->com.dev->atids, atid);
2372	cxgb4_free_atid(t, atid);
2373	dst_release(ep->dst);
2374	cxgb4_l2t_release(ep->l2t);
2375	c4iw_put_ep(&ep->com);
2376
2377	return 0;
2378}
2379
2380static int pass_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2381{
2382	struct cpl_pass_open_rpl *rpl = cplhdr(skb);
2383	unsigned int stid = GET_TID(rpl);
2384	struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2385
2386	if (!ep) {
2387		pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2388		goto out;
2389	}
2390	pr_debug("ep %p status %d error %d\n", ep,
2391		 rpl->status, status2errno(rpl->status));
2392	c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2393	c4iw_put_ep(&ep->com);
2394out:
2395	return 0;
2396}
2397
2398static int close_listsrv_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2399{
2400	struct cpl_close_listsvr_rpl *rpl = cplhdr(skb);
2401	unsigned int stid = GET_TID(rpl);
2402	struct c4iw_listen_ep *ep = get_ep_from_stid(dev, stid);
2403
2404	if (!ep) {
2405		pr_warn("%s stid %d lookup failure!\n", __func__, stid);
2406		goto out;
2407	}
2408	pr_debug("ep %p\n", ep);
2409	c4iw_wake_up_noref(ep->com.wr_waitp, status2errno(rpl->status));
2410	c4iw_put_ep(&ep->com);
2411out:
2412	return 0;
2413}
2414
2415static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb,
2416		     struct cpl_pass_accept_req *req)
2417{
2418	struct cpl_pass_accept_rpl *rpl;
2419	unsigned int mtu_idx;
2420	u64 opt0;
2421	u32 opt2;
2422	u32 wscale;
2423	struct cpl_t5_pass_accept_rpl *rpl5 = NULL;
2424	int win;
2425	enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type;
2426
2427	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2428	cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx,
2429		      enable_tcp_timestamps && req->tcpopt.tstamp,
2430		      (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1);
2431	wscale = cxgb_compute_wscale(rcv_win);
2432
2433	/*
2434	 * Specify the largest window that will fit in opt0. The
2435	 * remainder will be specified in the rx_data_ack.
2436	 */
2437	win = ep->rcv_win >> 10;
2438	if (win > RCV_BUFSIZ_M)
2439		win = RCV_BUFSIZ_M;
2440	opt0 = (nocong ? NO_CONG_F : 0) |
2441	       KEEP_ALIVE_F |
2442	       DELACK_F |
2443	       WND_SCALE_V(wscale) |
2444	       MSS_IDX_V(mtu_idx) |
2445	       L2T_IDX_V(ep->l2t->idx) |
2446	       TX_CHAN_V(ep->tx_chan) |
2447	       SMAC_SEL_V(ep->smac_idx) |
2448	       DSCP_V(ep->tos >> 2) |
2449	       ULP_MODE_V(ULP_MODE_TCPDDP) |
2450	       RCV_BUFSIZ_V(win);
2451	opt2 = RX_CHANNEL_V(0) |
2452	       RSS_QUEUE_VALID_F | RSS_QUEUE_V(ep->rss_qid);
2453
2454	if (enable_tcp_timestamps && req->tcpopt.tstamp)
2455		opt2 |= TSTAMPS_EN_F;
2456	if (enable_tcp_sack && req->tcpopt.sack)
2457		opt2 |= SACK_EN_F;
2458	if (wscale && enable_tcp_window_scaling)
2459		opt2 |= WND_SCALE_EN_F;
2460	if (enable_ecn) {
2461		const struct tcphdr *tcph;
2462		u32 hlen = ntohl(req->hdr_len);
2463
2464		if (CHELSIO_CHIP_VERSION(adapter_type) <= CHELSIO_T5)
2465			tcph = (const void *)(req + 1) + ETH_HDR_LEN_G(hlen) +
2466				IP_HDR_LEN_G(hlen);
2467		else
2468			tcph = (const void *)(req + 1) +
2469				T6_ETH_HDR_LEN_G(hlen) + T6_IP_HDR_LEN_G(hlen);
2470		if (tcph->ece && tcph->cwr)
2471			opt2 |= CCTRL_ECN_V(1);
2472	}
2473
2474	skb_get(skb);
2475	rpl = cplhdr(skb);
2476	if (!is_t4(adapter_type)) {
2477		skb_trim(skb, roundup(sizeof(*rpl5), 16));
2478		rpl5 = (void *)rpl;
2479		INIT_TP_WR(rpl5, ep->hwtid);
2480	} else {
2481		skb_trim(skb, sizeof(*rpl));
2482		INIT_TP_WR(rpl, ep->hwtid);
2483	}
2484	OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL,
2485						    ep->hwtid));
2486
2487	if (CHELSIO_CHIP_VERSION(adapter_type) > CHELSIO_T4) {
2488		u32 isn = (prandom_u32() & ~7UL) - 1;
2489		opt2 |= T5_OPT_2_VALID_F;
2490		opt2 |= CONG_CNTRL_V(CONG_ALG_TAHOE);
2491		opt2 |= T5_ISS_F;
2492		rpl5 = (void *)rpl;
2493		memset(&rpl5->iss, 0, roundup(sizeof(*rpl5)-sizeof(*rpl), 16));
2494		if (peer2peer)
2495			isn += 4;
2496		rpl5->iss = cpu_to_be32(isn);
2497		pr_debug("iss %u\n", be32_to_cpu(rpl5->iss));
2498	}
2499
2500	rpl->opt0 = cpu_to_be64(opt0);
2501	rpl->opt2 = cpu_to_be32(opt2);
2502	set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx);
2503	t4_set_arp_err_handler(skb, ep, pass_accept_rpl_arp_failure);
2504
2505	return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t);
2506}
2507
2508static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb)
2509{
2510	pr_debug("c4iw_dev %p tid %u\n", dev, hwtid);
2511	skb_trim(skb, sizeof(struct cpl_tid_release));
2512	release_tid(&dev->rdev, hwtid, skb);
2513	return;
2514}
2515
2516static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb)
2517{
2518	struct c4iw_ep *child_ep = NULL, *parent_ep;
2519	struct cpl_pass_accept_req *req = cplhdr(skb);
2520	unsigned int stid = PASS_OPEN_TID_G(ntohl(req->tos_stid));
2521	struct tid_info *t = dev->rdev.lldi.tids;
2522	unsigned int hwtid = GET_TID(req);
2523	struct dst_entry *dst;
2524	__u8 local_ip[16], peer_ip[16];
2525	__be16 local_port, peer_port;
2526	struct sockaddr_in6 *sin6;
2527	int err;
2528	u16 peer_mss = ntohs(req->tcpopt.mss);
2529	int iptype;
2530	unsigned short hdrs;
2531	u8 tos;
2532
2533	parent_ep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
2534	if (!parent_ep) {
2535		pr_err("%s connect request on invalid stid %d\n",
2536		       __func__, stid);
2537		goto reject;
2538	}
2539
2540	if (state_read(&parent_ep->com) != LISTEN) {
2541		pr_err("%s - listening ep not in LISTEN\n", __func__);
2542		goto reject;
2543	}
2544
2545	if (parent_ep->com.cm_id->tos_set)
2546		tos = parent_ep->com.cm_id->tos;
2547	else
2548		tos = PASS_OPEN_TOS_G(ntohl(req->tos_stid));
2549
2550	cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type,
2551			&iptype, local_ip, peer_ip, &local_port, &peer_port);
2552
2553	/* Find output route */
2554	if (iptype == 4)  {
2555		pr_debug("parent ep %p hwtid %u laddr %pI4 raddr %pI4 lport %d rport %d peer_mss %d\n"
2556			 , parent_ep, hwtid,
2557			 local_ip, peer_ip, ntohs(local_port),
2558			 ntohs(peer_port), peer_mss);
2559		dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
2560				      *(__be32 *)local_ip, *(__be32 *)peer_ip,
2561				      local_port, peer_port, tos);
2562	} else {
2563		pr_debug("parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n"
2564			 , parent_ep, hwtid,
2565			 local_ip, peer_ip, ntohs(local_port),
2566			 ntohs(peer_port), peer_mss);
2567		dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
2568				local_ip, peer_ip, local_port, peer_port,
2569				tos,
2570				((struct sockaddr_in6 *)
2571				 &parent_ep->com.local_addr)->sin6_scope_id);
2572	}
2573	if (!dst) {
2574		pr_err("%s - failed to find dst entry!\n", __func__);
2575		goto reject;
2576	}
2577
2578	child_ep = alloc_ep(sizeof(*child_ep), GFP_KERNEL);
2579	if (!child_ep) {
2580		pr_err("%s - failed to allocate ep entry!\n", __func__);
2581		dst_release(dst);
2582		goto reject;
2583	}
2584
2585	err = import_ep(child_ep, iptype, peer_ip, dst, dev, false,
2586			parent_ep->com.dev->rdev.lldi.adapter_type, tos);
2587	if (err) {
2588		pr_err("%s - failed to allocate l2t entry!\n", __func__);
2589		dst_release(dst);
2590		kfree(child_ep);
2591		goto reject;
2592	}
2593
2594	hdrs = ((iptype == 4) ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)) +
2595	       sizeof(struct tcphdr) +
2596	       ((enable_tcp_timestamps && req->tcpopt.tstamp) ? 12 : 0);
2597	if (peer_mss && child_ep->mtu > (peer_mss + hdrs))
2598		child_ep->mtu = peer_mss + hdrs;
2599
2600	skb_queue_head_init(&child_ep->com.ep_skb_list);
2601	if (alloc_ep_skb_list(&child_ep->com.ep_skb_list, CN_MAX_CON_BUF))
2602		goto fail;
2603
2604	state_set(&child_ep->com, CONNECTING);
2605	child_ep->com.dev = dev;
2606	child_ep->com.cm_id = NULL;
2607
2608	if (iptype == 4) {
2609		struct sockaddr_in *sin = (struct sockaddr_in *)
2610			&child_ep->com.local_addr;
2611
2612		sin->sin_family = AF_INET;
2613		sin->sin_port = local_port;
2614		sin->sin_addr.s_addr = *(__be32 *)local_ip;
2615
2616		sin = (struct sockaddr_in *)&child_ep->com.local_addr;
2617		sin->sin_family = AF_INET;
2618		sin->sin_port = ((struct sockaddr_in *)
2619				 &parent_ep->com.local_addr)->sin_port;
2620		sin->sin_addr.s_addr = *(__be32 *)local_ip;
2621
2622		sin = (struct sockaddr_in *)&child_ep->com.remote_addr;
2623		sin->sin_family = AF_INET;
2624		sin->sin_port = peer_port;
2625		sin->sin_addr.s_addr = *(__be32 *)peer_ip;
2626	} else {
2627		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2628		sin6->sin6_family = PF_INET6;
2629		sin6->sin6_port = local_port;
2630		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2631
2632		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2633		sin6->sin6_family = PF_INET6;
2634		sin6->sin6_port = ((struct sockaddr_in6 *)
2635				   &parent_ep->com.local_addr)->sin6_port;
2636		memcpy(sin6->sin6_addr.s6_addr, local_ip, 16);
2637
2638		sin6 = (struct sockaddr_in6 *)&child_ep->com.remote_addr;
2639		sin6->sin6_family = PF_INET6;
2640		sin6->sin6_port = peer_port;
2641		memcpy(sin6->sin6_addr.s6_addr, peer_ip, 16);
2642	}
2643
2644	c4iw_get_ep(&parent_ep->com);
2645	child_ep->parent_ep = parent_ep;
2646	child_ep->tos = tos;
2647	child_ep->dst = dst;
2648	child_ep->hwtid = hwtid;
2649
2650	pr_debug("tx_chan %u smac_idx %u rss_qid %u\n",
2651		 child_ep->tx_chan, child_ep->smac_idx, child_ep->rss_qid);
2652
2653	timer_setup(&child_ep->timer, ep_timeout, 0);
2654	cxgb4_insert_tid(t, child_ep, hwtid,
2655			 child_ep->com.local_addr.ss_family);
2656	insert_ep_tid(child_ep);
2657	if (accept_cr(child_ep, skb, req)) {
2658		c4iw_put_ep(&parent_ep->com);
2659		release_ep_resources(child_ep);
2660	} else {
2661		set_bit(PASS_ACCEPT_REQ, &child_ep->com.history);
2662	}
2663	if (iptype == 6) {
2664		sin6 = (struct sockaddr_in6 *)&child_ep->com.local_addr;
2665		cxgb4_clip_get(child_ep->com.dev->rdev.lldi.ports[0],
2666			       (const u32 *)&sin6->sin6_addr.s6_addr, 1);
2667	}
2668	goto out;
2669fail:
2670	c4iw_put_ep(&child_ep->com);
2671reject:
2672	reject_cr(dev, hwtid, skb);
2673out:
2674	if (parent_ep)
2675		c4iw_put_ep(&parent_ep->com);
2676	return 0;
2677}
2678
2679static int pass_establish(struct c4iw_dev *dev, struct sk_buff *skb)
2680{
2681	struct c4iw_ep *ep;
2682	struct cpl_pass_establish *req = cplhdr(skb);
2683	unsigned int tid = GET_TID(req);
2684	int ret;
2685	u16 tcp_opt = ntohs(req->tcp_opt);
2686
2687	ep = get_ep_from_tid(dev, tid);
2688	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2689	ep->snd_seq = be32_to_cpu(req->snd_isn);
2690	ep->rcv_seq = be32_to_cpu(req->rcv_isn);
2691	ep->snd_wscale = TCPOPT_SND_WSCALE_G(tcp_opt);
2692
2693	pr_debug("ep %p hwtid %u tcp_opt 0x%02x\n", ep, tid, tcp_opt);
2694
2695	set_emss(ep, tcp_opt);
2696
2697	dst_confirm(ep->dst);
2698	mutex_lock(&ep->com.mutex);
2699	ep->com.state = MPA_REQ_WAIT;
2700	start_ep_timer(ep);
2701	set_bit(PASS_ESTAB, &ep->com.history);
2702	ret = send_flowc(ep);
2703	mutex_unlock(&ep->com.mutex);
2704	if (ret)
2705		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
2706	c4iw_put_ep(&ep->com);
2707
2708	return 0;
2709}
2710
2711static int peer_close(struct c4iw_dev *dev, struct sk_buff *skb)
2712{
2713	struct cpl_peer_close *hdr = cplhdr(skb);
2714	struct c4iw_ep *ep;
2715	struct c4iw_qp_attributes attrs;
2716	int disconnect = 1;
2717	int release = 0;
2718	unsigned int tid = GET_TID(hdr);
2719	int ret;
2720
2721	ep = get_ep_from_tid(dev, tid);
2722	if (!ep)
2723		return 0;
2724
2725	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2726	dst_confirm(ep->dst);
2727
2728	set_bit(PEER_CLOSE, &ep->com.history);
2729	mutex_lock(&ep->com.mutex);
2730	switch (ep->com.state) {
2731	case MPA_REQ_WAIT:
2732		__state_set(&ep->com, CLOSING);
2733		break;
2734	case MPA_REQ_SENT:
2735		__state_set(&ep->com, CLOSING);
2736		connect_reply_upcall(ep, -ECONNRESET);
2737		break;
2738	case MPA_REQ_RCVD:
2739
2740		/*
2741		 * We're gonna mark this puppy DEAD, but keep
2742		 * the reference on it until the ULP accepts or
2743		 * rejects the CR. Also wake up anyone waiting
2744		 * in rdma connection migration (see c4iw_accept_cr()).
2745		 */
2746		__state_set(&ep->com, CLOSING);
2747		pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2748		c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2749		break;
2750	case MPA_REP_SENT:
2751		__state_set(&ep->com, CLOSING);
2752		pr_debug("waking up ep %p tid %u\n", ep, ep->hwtid);
2753		c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2754		break;
2755	case FPDU_MODE:
2756		start_ep_timer(ep);
2757		__state_set(&ep->com, CLOSING);
2758		attrs.next_state = C4IW_QP_STATE_CLOSING;
2759		ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2760				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2761		if (ret != -ECONNRESET) {
2762			peer_close_upcall(ep);
2763			disconnect = 1;
2764		}
2765		break;
2766	case ABORTING:
2767		disconnect = 0;
2768		break;
2769	case CLOSING:
2770		__state_set(&ep->com, MORIBUND);
2771		disconnect = 0;
2772		break;
2773	case MORIBUND:
2774		(void)stop_ep_timer(ep);
2775		if (ep->com.cm_id && ep->com.qp) {
2776			attrs.next_state = C4IW_QP_STATE_IDLE;
2777			c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2778				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
2779		}
2780		close_complete_upcall(ep, 0);
2781		__state_set(&ep->com, DEAD);
2782		release = 1;
2783		disconnect = 0;
2784		break;
2785	case DEAD:
2786		disconnect = 0;
2787		break;
2788	default:
2789		WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2790	}
2791	mutex_unlock(&ep->com.mutex);
2792	if (disconnect)
2793		c4iw_ep_disconnect(ep, 0, GFP_KERNEL);
2794	if (release)
2795		release_ep_resources(ep);
2796	c4iw_put_ep(&ep->com);
2797	return 0;
2798}
2799
2800static void finish_peer_abort(struct c4iw_dev *dev, struct c4iw_ep *ep)
2801{
2802	complete_cached_srq_buffers(ep, ep->srqe_idx);
2803	if (ep->com.cm_id && ep->com.qp) {
2804		struct c4iw_qp_attributes attrs;
2805
2806		attrs.next_state = C4IW_QP_STATE_ERROR;
2807		c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
2808			       C4IW_QP_ATTR_NEXT_STATE,	&attrs, 1);
2809	}
2810	peer_abort_upcall(ep);
2811	release_ep_resources(ep);
2812	c4iw_put_ep(&ep->com);
2813}
2814
2815static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb)
2816{
2817	struct cpl_abort_req_rss6 *req = cplhdr(skb);
2818	struct c4iw_ep *ep;
2819	struct sk_buff *rpl_skb;
2820	struct c4iw_qp_attributes attrs;
2821	int ret;
2822	int release = 0;
2823	unsigned int tid = GET_TID(req);
2824	u8 status;
2825	u32 srqidx;
2826
2827	u32 len = roundup(sizeof(struct cpl_abort_rpl), 16);
2828
2829	ep = get_ep_from_tid(dev, tid);
2830	if (!ep)
2831		return 0;
2832
2833	status = ABORT_RSS_STATUS_G(be32_to_cpu(req->srqidx_status));
2834
2835	if (cxgb_is_neg_adv(status)) {
2836		pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
2837			 ep->hwtid, status, neg_adv_str(status));
2838		ep->stats.abort_neg_adv++;
2839		mutex_lock(&dev->rdev.stats.lock);
2840		dev->rdev.stats.neg_adv++;
2841		mutex_unlock(&dev->rdev.stats.lock);
2842		goto deref_ep;
2843	}
2844
2845	pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid,
2846		 ep->com.state);
2847	set_bit(PEER_ABORT, &ep->com.history);
2848
2849	/*
2850	 * Wake up any threads in rdma_init() or rdma_fini().
2851	 * However, this is not needed if com state is just
2852	 * MPA_REQ_SENT
2853	 */
2854	if (ep->com.state != MPA_REQ_SENT)
2855		c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
2856
2857	mutex_lock(&ep->com.mutex);
2858	switch (ep->com.state) {
2859	case CONNECTING:
2860		c4iw_put_ep(&ep->parent_ep->com);
2861		break;
2862	case MPA_REQ_WAIT:
2863		(void)stop_ep_timer(ep);
2864		break;
2865	case MPA_REQ_SENT:
2866		(void)stop_ep_timer(ep);
2867		if (status != CPL_ERR_CONN_RESET || mpa_rev == 1 ||
2868		    (mpa_rev == 2 && ep->tried_with_mpa_v1))
2869			connect_reply_upcall(ep, -ECONNRESET);
2870		else {
2871			/*
2872			 * we just don't send notification upwards because we
2873			 * want to retry with mpa_v1 without upper layers even
2874			 * knowing it.
2875			 *
2876			 * do some housekeeping so as to re-initiate the
2877			 * connection
2878			 */
2879			pr_info("%s: mpa_rev=%d. Retrying with mpav1\n",
2880				__func__, mpa_rev);
2881			ep->retry_with_mpa_v1 = 1;
2882		}
2883		break;
2884	case MPA_REP_SENT:
2885		break;
2886	case MPA_REQ_RCVD:
2887		break;
2888	case MORIBUND:
2889	case CLOSING:
2890		stop_ep_timer(ep);
2891		fallthrough;
2892	case FPDU_MODE:
2893		if (ep->com.qp && ep->com.qp->srq) {
2894			srqidx = ABORT_RSS_SRQIDX_G(
2895					be32_to_cpu(req->srqidx_status));
2896			if (srqidx) {
2897				complete_cached_srq_buffers(ep, srqidx);
2898			} else {
2899				/* Hold ep ref until finish_peer_abort() */
2900				c4iw_get_ep(&ep->com);
2901				__state_set(&ep->com, ABORTING);
2902				set_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags);
2903				read_tcb(ep);
2904				break;
2905
2906			}
2907		}
2908
2909		if (ep->com.cm_id && ep->com.qp) {
2910			attrs.next_state = C4IW_QP_STATE_ERROR;
2911			ret = c4iw_modify_qp(ep->com.qp->rhp,
2912				     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
2913				     &attrs, 1);
2914			if (ret)
2915				pr_err("%s - qp <- error failed!\n", __func__);
2916		}
2917		peer_abort_upcall(ep);
2918		break;
2919	case ABORTING:
2920		break;
2921	case DEAD:
2922		pr_warn("%s PEER_ABORT IN DEAD STATE!!!!\n", __func__);
2923		mutex_unlock(&ep->com.mutex);
2924		goto deref_ep;
2925	default:
2926		WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
2927		break;
2928	}
2929	dst_confirm(ep->dst);
2930	if (ep->com.state != ABORTING) {
2931		__state_set(&ep->com, DEAD);
2932		/* we don't release if we want to retry with mpa_v1 */
2933		if (!ep->retry_with_mpa_v1)
2934			release = 1;
2935	}
2936	mutex_unlock(&ep->com.mutex);
2937
2938	rpl_skb = skb_dequeue(&ep->com.ep_skb_list);
2939	if (WARN_ON(!rpl_skb)) {
2940		release = 1;
2941		goto out;
2942	}
2943
2944	cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx);
2945
2946	c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb);
2947out:
2948	if (release)
2949		release_ep_resources(ep);
2950	else if (ep->retry_with_mpa_v1) {
2951		if (ep->com.remote_addr.ss_family == AF_INET6) {
2952			struct sockaddr_in6 *sin6 =
2953					(struct sockaddr_in6 *)
2954					&ep->com.local_addr;
2955			cxgb4_clip_release(
2956					ep->com.dev->rdev.lldi.ports[0],
2957					(const u32 *)&sin6->sin6_addr.s6_addr,
2958					1);
2959		}
2960		xa_erase_irq(&ep->com.dev->hwtids, ep->hwtid);
2961		cxgb4_remove_tid(ep->com.dev->rdev.lldi.tids, 0, ep->hwtid,
2962				 ep->com.local_addr.ss_family);
2963		dst_release(ep->dst);
2964		cxgb4_l2t_release(ep->l2t);
2965		c4iw_reconnect(ep);
2966	}
2967
2968deref_ep:
2969	c4iw_put_ep(&ep->com);
2970	/* Dereferencing ep, referenced in peer_abort_intr() */
2971	c4iw_put_ep(&ep->com);
2972	return 0;
2973}
2974
2975static int close_con_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
2976{
2977	struct c4iw_ep *ep;
2978	struct c4iw_qp_attributes attrs;
2979	struct cpl_close_con_rpl *rpl = cplhdr(skb);
2980	int release = 0;
2981	unsigned int tid = GET_TID(rpl);
2982
2983	ep = get_ep_from_tid(dev, tid);
2984	if (!ep)
2985		return 0;
2986
2987	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
2988
2989	/* The cm_id may be null if we failed to connect */
2990	mutex_lock(&ep->com.mutex);
2991	set_bit(CLOSE_CON_RPL, &ep->com.history);
2992	switch (ep->com.state) {
2993	case CLOSING:
2994		__state_set(&ep->com, MORIBUND);
2995		break;
2996	case MORIBUND:
2997		(void)stop_ep_timer(ep);
2998		if ((ep->com.cm_id) && (ep->com.qp)) {
2999			attrs.next_state = C4IW_QP_STATE_IDLE;
3000			c4iw_modify_qp(ep->com.qp->rhp,
3001					     ep->com.qp,
3002					     C4IW_QP_ATTR_NEXT_STATE,
3003					     &attrs, 1);
3004		}
3005		close_complete_upcall(ep, 0);
3006		__state_set(&ep->com, DEAD);
3007		release = 1;
3008		break;
3009	case ABORTING:
3010	case DEAD:
3011		break;
3012	default:
3013		WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3014		break;
3015	}
3016	mutex_unlock(&ep->com.mutex);
3017	if (release)
3018		release_ep_resources(ep);
3019	c4iw_put_ep(&ep->com);
3020	return 0;
3021}
3022
3023static int terminate(struct c4iw_dev *dev, struct sk_buff *skb)
3024{
3025	struct cpl_rdma_terminate *rpl = cplhdr(skb);
3026	unsigned int tid = GET_TID(rpl);
3027	struct c4iw_ep *ep;
3028	struct c4iw_qp_attributes attrs;
3029
3030	ep = get_ep_from_tid(dev, tid);
3031
3032	if (ep) {
3033		if (ep->com.qp) {
3034			pr_warn("TERM received tid %u qpid %u\n", tid,
3035				ep->com.qp->wq.sq.qid);
3036			attrs.next_state = C4IW_QP_STATE_TERMINATE;
3037			c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
3038				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
3039		}
3040
3041		/* As per draft-hilland-iwarp-verbs-v1.0, sec 6.2.3,
3042		 * when entering the TERM state the RNIC MUST initiate a CLOSE.
3043		 */
3044		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3045		c4iw_put_ep(&ep->com);
3046	} else
3047		pr_warn("TERM received tid %u no ep/qp\n", tid);
3048
3049	return 0;
3050}
3051
3052/*
3053 * Upcall from the adapter indicating data has been transmitted.
3054 * For us its just the single MPA request or reply.  We can now free
3055 * the skb holding the mpa message.
3056 */
3057static int fw4_ack(struct c4iw_dev *dev, struct sk_buff *skb)
3058{
3059	struct c4iw_ep *ep;
3060	struct cpl_fw4_ack *hdr = cplhdr(skb);
3061	u8 credits = hdr->credits;
3062	unsigned int tid = GET_TID(hdr);
3063
3064
3065	ep = get_ep_from_tid(dev, tid);
3066	if (!ep)
3067		return 0;
3068	pr_debug("ep %p tid %u credits %u\n",
3069		 ep, ep->hwtid, credits);
3070	if (credits == 0) {
3071		pr_debug("0 credit ack ep %p tid %u state %u\n",
3072			 ep, ep->hwtid, state_read(&ep->com));
3073		goto out;
3074	}
3075
3076	dst_confirm(ep->dst);
3077	if (ep->mpa_skb) {
3078		pr_debug("last streaming msg ack ep %p tid %u state %u initiator %u freeing skb\n",
3079			 ep, ep->hwtid, state_read(&ep->com),
3080			 ep->mpa_attr.initiator ? 1 : 0);
3081		mutex_lock(&ep->com.mutex);
3082		kfree_skb(ep->mpa_skb);
3083		ep->mpa_skb = NULL;
3084		if (test_bit(STOP_MPA_TIMER, &ep->com.flags))
3085			stop_ep_timer(ep);
3086		mutex_unlock(&ep->com.mutex);
3087	}
3088out:
3089	c4iw_put_ep(&ep->com);
3090	return 0;
3091}
3092
3093int c4iw_reject_cr(struct iw_cm_id *cm_id, const void *pdata, u8 pdata_len)
3094{
3095	int abort;
3096	struct c4iw_ep *ep = to_ep(cm_id);
3097
3098	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3099
3100	mutex_lock(&ep->com.mutex);
3101	if (ep->com.state != MPA_REQ_RCVD) {
3102		mutex_unlock(&ep->com.mutex);
3103		c4iw_put_ep(&ep->com);
3104		return -ECONNRESET;
3105	}
3106	set_bit(ULP_REJECT, &ep->com.history);
3107	if (mpa_rev == 0)
3108		abort = 1;
3109	else
3110		abort = send_mpa_reject(ep, pdata, pdata_len);
3111	mutex_unlock(&ep->com.mutex);
3112
3113	stop_ep_timer(ep);
3114	c4iw_ep_disconnect(ep, abort != 0, GFP_KERNEL);
3115	c4iw_put_ep(&ep->com);
3116	return 0;
3117}
3118
3119int c4iw_accept_cr(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3120{
3121	int err;
3122	struct c4iw_qp_attributes attrs;
3123	enum c4iw_qp_attr_mask mask;
3124	struct c4iw_ep *ep = to_ep(cm_id);
3125	struct c4iw_dev *h = to_c4iw_dev(cm_id->device);
3126	struct c4iw_qp *qp = get_qhp(h, conn_param->qpn);
3127	int abort = 0;
3128
3129	pr_debug("ep %p tid %u\n", ep, ep->hwtid);
3130
3131	mutex_lock(&ep->com.mutex);
3132	if (ep->com.state != MPA_REQ_RCVD) {
3133		err = -ECONNRESET;
3134		goto err_out;
3135	}
3136
3137	if (!qp) {
3138		err = -EINVAL;
3139		goto err_out;
3140	}
3141
3142	set_bit(ULP_ACCEPT, &ep->com.history);
3143	if ((conn_param->ord > cur_max_read_depth(ep->com.dev)) ||
3144	    (conn_param->ird > cur_max_read_depth(ep->com.dev))) {
3145		err = -EINVAL;
3146		goto err_abort;
3147	}
3148
3149	if (ep->mpa_attr.version == 2 && ep->mpa_attr.enhanced_rdma_conn) {
3150		if (conn_param->ord > ep->ird) {
3151			if (RELAXED_IRD_NEGOTIATION) {
3152				conn_param->ord = ep->ird;
3153			} else {
3154				ep->ird = conn_param->ird;
3155				ep->ord = conn_param->ord;
3156				send_mpa_reject(ep, conn_param->private_data,
3157						conn_param->private_data_len);
3158				err = -ENOMEM;
3159				goto err_abort;
3160			}
3161		}
3162		if (conn_param->ird < ep->ord) {
3163			if (RELAXED_IRD_NEGOTIATION &&
3164			    ep->ord <= h->rdev.lldi.max_ordird_qp) {
3165				conn_param->ird = ep->ord;
3166			} else {
3167				err = -ENOMEM;
3168				goto err_abort;
3169			}
3170		}
3171	}
3172	ep->ird = conn_param->ird;
3173	ep->ord = conn_param->ord;
3174
3175	if (ep->mpa_attr.version == 1) {
3176		if (peer2peer && ep->ird == 0)
3177			ep->ird = 1;
3178	} else {
3179		if (peer2peer &&
3180		    (ep->mpa_attr.p2p_type != FW_RI_INIT_P2PTYPE_DISABLED) &&
3181		    (p2p_type == FW_RI_INIT_P2PTYPE_READ_REQ) && ep->ird == 0)
3182			ep->ird = 1;
3183	}
3184
3185	pr_debug("ird %d ord %d\n", ep->ird, ep->ord);
3186
3187	ep->com.cm_id = cm_id;
3188	ref_cm_id(&ep->com);
3189	ep->com.qp = qp;
3190	ref_qp(ep);
3191
3192	/* bind QP to EP and move to RTS */
3193	attrs.mpa_attr = ep->mpa_attr;
3194	attrs.max_ird = ep->ird;
3195	attrs.max_ord = ep->ord;
3196	attrs.llp_stream_handle = ep;
3197	attrs.next_state = C4IW_QP_STATE_RTS;
3198
3199	/* bind QP and TID with INIT_WR */
3200	mask = C4IW_QP_ATTR_NEXT_STATE |
3201			     C4IW_QP_ATTR_LLP_STREAM_HANDLE |
3202			     C4IW_QP_ATTR_MPA_ATTR |
3203			     C4IW_QP_ATTR_MAX_IRD |
3204			     C4IW_QP_ATTR_MAX_ORD;
3205
3206	err = c4iw_modify_qp(ep->com.qp->rhp,
3207			     ep->com.qp, mask, &attrs, 1);
3208	if (err)
3209		goto err_deref_cm_id;
3210
3211	set_bit(STOP_MPA_TIMER, &ep->com.flags);
3212	err = send_mpa_reply(ep, conn_param->private_data,
3213			     conn_param->private_data_len);
3214	if (err)
3215		goto err_deref_cm_id;
3216
3217	__state_set(&ep->com, FPDU_MODE);
3218	established_upcall(ep);
3219	mutex_unlock(&ep->com.mutex);
3220	c4iw_put_ep(&ep->com);
3221	return 0;
3222err_deref_cm_id:
3223	deref_cm_id(&ep->com);
3224err_abort:
3225	abort = 1;
3226err_out:
3227	mutex_unlock(&ep->com.mutex);
3228	if (abort)
3229		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
3230	c4iw_put_ep(&ep->com);
3231	return err;
3232}
3233
3234static int pick_local_ipaddrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3235{
3236	struct in_device *ind;
3237	int found = 0;
3238	struct sockaddr_in *laddr = (struct sockaddr_in *)&cm_id->m_local_addr;
3239	struct sockaddr_in *raddr = (struct sockaddr_in *)&cm_id->m_remote_addr;
3240	const struct in_ifaddr *ifa;
3241
3242	ind = in_dev_get(dev->rdev.lldi.ports[0]);
3243	if (!ind)
3244		return -EADDRNOTAVAIL;
3245	rcu_read_lock();
3246	in_dev_for_each_ifa_rcu(ifa, ind) {
3247		if (ifa->ifa_flags & IFA_F_SECONDARY)
3248			continue;
3249		laddr->sin_addr.s_addr = ifa->ifa_address;
3250		raddr->sin_addr.s_addr = ifa->ifa_address;
3251		found = 1;
3252		break;
3253	}
3254	rcu_read_unlock();
3255
3256	in_dev_put(ind);
3257	return found ? 0 : -EADDRNOTAVAIL;
3258}
3259
3260static int get_lladdr(struct net_device *dev, struct in6_addr *addr,
3261		      unsigned char banned_flags)
3262{
3263	struct inet6_dev *idev;
3264	int err = -EADDRNOTAVAIL;
3265
3266	rcu_read_lock();
3267	idev = __in6_dev_get(dev);
3268	if (idev != NULL) {
3269		struct inet6_ifaddr *ifp;
3270
3271		read_lock_bh(&idev->lock);
3272		list_for_each_entry(ifp, &idev->addr_list, if_list) {
3273			if (ifp->scope == IFA_LINK &&
3274			    !(ifp->flags & banned_flags)) {
3275				memcpy(addr, &ifp->addr, 16);
3276				err = 0;
3277				break;
3278			}
3279		}
3280		read_unlock_bh(&idev->lock);
3281	}
3282	rcu_read_unlock();
3283	return err;
3284}
3285
3286static int pick_local_ip6addrs(struct c4iw_dev *dev, struct iw_cm_id *cm_id)
3287{
3288	struct in6_addr addr;
3289	struct sockaddr_in6 *la6 = (struct sockaddr_in6 *)&cm_id->m_local_addr;
3290	struct sockaddr_in6 *ra6 = (struct sockaddr_in6 *)&cm_id->m_remote_addr;
3291
3292	if (!get_lladdr(dev->rdev.lldi.ports[0], &addr, IFA_F_TENTATIVE)) {
3293		memcpy(la6->sin6_addr.s6_addr, &addr, 16);
3294		memcpy(ra6->sin6_addr.s6_addr, &addr, 16);
3295		return 0;
3296	}
3297	return -EADDRNOTAVAIL;
3298}
3299
3300int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param)
3301{
3302	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3303	struct c4iw_ep *ep;
3304	int err = 0;
3305	struct sockaddr_in *laddr;
3306	struct sockaddr_in *raddr;
3307	struct sockaddr_in6 *laddr6;
3308	struct sockaddr_in6 *raddr6;
3309	__u8 *ra;
3310	int iptype;
3311
3312	if ((conn_param->ord > cur_max_read_depth(dev)) ||
3313	    (conn_param->ird > cur_max_read_depth(dev))) {
3314		err = -EINVAL;
3315		goto out;
3316	}
3317	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3318	if (!ep) {
3319		pr_err("%s - cannot alloc ep\n", __func__);
3320		err = -ENOMEM;
3321		goto out;
3322	}
3323
3324	skb_queue_head_init(&ep->com.ep_skb_list);
3325	if (alloc_ep_skb_list(&ep->com.ep_skb_list, CN_MAX_CON_BUF)) {
3326		err = -ENOMEM;
3327		goto fail1;
3328	}
3329
3330	timer_setup(&ep->timer, ep_timeout, 0);
3331	ep->plen = conn_param->private_data_len;
3332	if (ep->plen)
3333		memcpy(ep->mpa_pkt + sizeof(struct mpa_message),
3334		       conn_param->private_data, ep->plen);
3335	ep->ird = conn_param->ird;
3336	ep->ord = conn_param->ord;
3337
3338	if (peer2peer && ep->ord == 0)
3339		ep->ord = 1;
3340
3341	ep->com.cm_id = cm_id;
3342	ref_cm_id(&ep->com);
3343	cm_id->provider_data = ep;
3344	ep->com.dev = dev;
3345	ep->com.qp = get_qhp(dev, conn_param->qpn);
3346	if (!ep->com.qp) {
3347		pr_warn("%s qpn 0x%x not found!\n", __func__, conn_param->qpn);
3348		err = -EINVAL;
3349		goto fail2;
3350	}
3351	ref_qp(ep);
3352	pr_debug("qpn 0x%x qp %p cm_id %p\n", conn_param->qpn,
3353		 ep->com.qp, cm_id);
3354
3355	/*
3356	 * Allocate an active TID to initiate a TCP connection.
3357	 */
3358	ep->atid = cxgb4_alloc_atid(dev->rdev.lldi.tids, ep);
3359	if (ep->atid == -1) {
3360		pr_err("%s - cannot alloc atid\n", __func__);
3361		err = -ENOMEM;
3362		goto fail2;
3363	}
3364	err = xa_insert_irq(&dev->atids, ep->atid, ep, GFP_KERNEL);
3365	if (err)
3366		goto fail5;
3367
3368	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3369	       sizeof(ep->com.local_addr));
3370	memcpy(&ep->com.remote_addr, &cm_id->m_remote_addr,
3371	       sizeof(ep->com.remote_addr));
3372
3373	laddr = (struct sockaddr_in *)&ep->com.local_addr;
3374	raddr = (struct sockaddr_in *)&ep->com.remote_addr;
3375	laddr6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3376	raddr6 = (struct sockaddr_in6 *) &ep->com.remote_addr;
3377
3378	if (cm_id->m_remote_addr.ss_family == AF_INET) {
3379		iptype = 4;
3380		ra = (__u8 *)&raddr->sin_addr;
3381
3382		/*
3383		 * Handle loopback requests to INADDR_ANY.
3384		 */
3385		if (raddr->sin_addr.s_addr == htonl(INADDR_ANY)) {
3386			err = pick_local_ipaddrs(dev, cm_id);
3387			if (err)
3388				goto fail3;
3389		}
3390
3391		/* find a route */
3392		pr_debug("saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n",
3393			 &laddr->sin_addr, ntohs(laddr->sin_port),
3394			 ra, ntohs(raddr->sin_port));
3395		ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
3396					  laddr->sin_addr.s_addr,
3397					  raddr->sin_addr.s_addr,
3398					  laddr->sin_port,
3399					  raddr->sin_port, cm_id->tos);
3400	} else {
3401		iptype = 6;
3402		ra = (__u8 *)&raddr6->sin6_addr;
3403
3404		/*
3405		 * Handle loopback requests to INADDR_ANY.
3406		 */
3407		if (ipv6_addr_type(&raddr6->sin6_addr) == IPV6_ADDR_ANY) {
3408			err = pick_local_ip6addrs(dev, cm_id);
3409			if (err)
3410				goto fail3;
3411		}
3412
3413		/* find a route */
3414		pr_debug("saddr %pI6 sport 0x%x raddr %pI6 rport 0x%x\n",
3415			 laddr6->sin6_addr.s6_addr,
3416			 ntohs(laddr6->sin6_port),
3417			 raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port));
3418		ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev,
3419					   laddr6->sin6_addr.s6_addr,
3420					   raddr6->sin6_addr.s6_addr,
3421					   laddr6->sin6_port,
3422					   raddr6->sin6_port, cm_id->tos,
3423					   raddr6->sin6_scope_id);
3424	}
3425	if (!ep->dst) {
3426		pr_err("%s - cannot find route\n", __func__);
3427		err = -EHOSTUNREACH;
3428		goto fail3;
3429	}
3430
3431	err = import_ep(ep, iptype, ra, ep->dst, ep->com.dev, true,
3432			ep->com.dev->rdev.lldi.adapter_type, cm_id->tos);
3433	if (err) {
3434		pr_err("%s - cannot alloc l2e\n", __func__);
3435		goto fail4;
3436	}
3437
3438	pr_debug("txq_idx %u tx_chan %u smac_idx %u rss_qid %u l2t_idx %u\n",
3439		 ep->txq_idx, ep->tx_chan, ep->smac_idx, ep->rss_qid,
3440		 ep->l2t->idx);
3441
3442	state_set(&ep->com, CONNECTING);
3443	ep->tos = cm_id->tos;
3444
3445	/* send connect request to rnic */
3446	err = send_connect(ep);
3447	if (!err)
3448		goto out;
3449
3450	cxgb4_l2t_release(ep->l2t);
3451fail4:
3452	dst_release(ep->dst);
3453fail3:
3454	xa_erase_irq(&ep->com.dev->atids, ep->atid);
3455fail5:
3456	cxgb4_free_atid(ep->com.dev->rdev.lldi.tids, ep->atid);
3457fail2:
3458	skb_queue_purge(&ep->com.ep_skb_list);
3459	deref_cm_id(&ep->com);
3460fail1:
3461	c4iw_put_ep(&ep->com);
3462out:
3463	return err;
3464}
3465
3466static int create_server6(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3467{
3468	int err;
3469	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)
3470				    &ep->com.local_addr;
3471
3472	if (ipv6_addr_type(&sin6->sin6_addr) != IPV6_ADDR_ANY) {
3473		err = cxgb4_clip_get(ep->com.dev->rdev.lldi.ports[0],
3474				     (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3475		if (err)
3476			return err;
3477	}
3478	c4iw_init_wr_wait(ep->com.wr_waitp);
3479	err = cxgb4_create_server6(ep->com.dev->rdev.lldi.ports[0],
3480				   ep->stid, &sin6->sin6_addr,
3481				   sin6->sin6_port,
3482				   ep->com.dev->rdev.lldi.rxq_ids[0]);
3483	if (!err)
3484		err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3485					  ep->com.wr_waitp,
3486					  0, 0, __func__);
3487	else if (err > 0)
3488		err = net_xmit_errno(err);
3489	if (err) {
3490		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3491				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3492		pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
3493		       err, ep->stid,
3494		       sin6->sin6_addr.s6_addr, ntohs(sin6->sin6_port));
3495	}
3496	return err;
3497}
3498
3499static int create_server4(struct c4iw_dev *dev, struct c4iw_listen_ep *ep)
3500{
3501	int err;
3502	struct sockaddr_in *sin = (struct sockaddr_in *)
3503				  &ep->com.local_addr;
3504
3505	if (dev->rdev.lldi.enable_fw_ofld_conn) {
3506		do {
3507			err = cxgb4_create_server_filter(
3508				ep->com.dev->rdev.lldi.ports[0], ep->stid,
3509				sin->sin_addr.s_addr, sin->sin_port, 0,
3510				ep->com.dev->rdev.lldi.rxq_ids[0], 0, 0);
3511			if (err == -EBUSY) {
3512				if (c4iw_fatal_error(&ep->com.dev->rdev)) {
3513					err = -EIO;
3514					break;
3515				}
3516				set_current_state(TASK_UNINTERRUPTIBLE);
3517				schedule_timeout(usecs_to_jiffies(100));
3518			}
3519		} while (err == -EBUSY);
3520	} else {
3521		c4iw_init_wr_wait(ep->com.wr_waitp);
3522		err = cxgb4_create_server(ep->com.dev->rdev.lldi.ports[0],
3523				ep->stid, sin->sin_addr.s_addr, sin->sin_port,
3524				0, ep->com.dev->rdev.lldi.rxq_ids[0]);
3525		if (!err)
3526			err = c4iw_wait_for_reply(&ep->com.dev->rdev,
3527						  ep->com.wr_waitp,
3528						  0, 0, __func__);
3529		else if (err > 0)
3530			err = net_xmit_errno(err);
3531	}
3532	if (err)
3533		pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"
3534		       , err, ep->stid,
3535		       &sin->sin_addr, ntohs(sin->sin_port));
3536	return err;
3537}
3538
3539int c4iw_create_listen(struct iw_cm_id *cm_id, int backlog)
3540{
3541	int err = 0;
3542	struct c4iw_dev *dev = to_c4iw_dev(cm_id->device);
3543	struct c4iw_listen_ep *ep;
3544
3545	might_sleep();
3546
3547	ep = alloc_ep(sizeof(*ep), GFP_KERNEL);
3548	if (!ep) {
3549		pr_err("%s - cannot alloc ep\n", __func__);
3550		err = -ENOMEM;
3551		goto fail1;
3552	}
3553	skb_queue_head_init(&ep->com.ep_skb_list);
3554	pr_debug("ep %p\n", ep);
3555	ep->com.cm_id = cm_id;
3556	ref_cm_id(&ep->com);
3557	ep->com.dev = dev;
3558	ep->backlog = backlog;
3559	memcpy(&ep->com.local_addr, &cm_id->m_local_addr,
3560	       sizeof(ep->com.local_addr));
3561
3562	/*
3563	 * Allocate a server TID.
3564	 */
3565	if (dev->rdev.lldi.enable_fw_ofld_conn &&
3566	    ep->com.local_addr.ss_family == AF_INET)
3567		ep->stid = cxgb4_alloc_sftid(dev->rdev.lldi.tids,
3568					     cm_id->m_local_addr.ss_family, ep);
3569	else
3570		ep->stid = cxgb4_alloc_stid(dev->rdev.lldi.tids,
3571					    cm_id->m_local_addr.ss_family, ep);
3572
3573	if (ep->stid == -1) {
3574		pr_err("%s - cannot alloc stid\n", __func__);
3575		err = -ENOMEM;
3576		goto fail2;
3577	}
3578	err = xa_insert_irq(&dev->stids, ep->stid, ep, GFP_KERNEL);
3579	if (err)
3580		goto fail3;
3581
3582	state_set(&ep->com, LISTEN);
3583	if (ep->com.local_addr.ss_family == AF_INET)
3584		err = create_server4(dev, ep);
3585	else
3586		err = create_server6(dev, ep);
3587	if (!err) {
3588		cm_id->provider_data = ep;
3589		goto out;
3590	}
3591	xa_erase_irq(&ep->com.dev->stids, ep->stid);
3592fail3:
3593	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3594			ep->com.local_addr.ss_family);
3595fail2:
3596	deref_cm_id(&ep->com);
3597	c4iw_put_ep(&ep->com);
3598fail1:
3599out:
3600	return err;
3601}
3602
3603int c4iw_destroy_listen(struct iw_cm_id *cm_id)
3604{
3605	int err;
3606	struct c4iw_listen_ep *ep = to_listen_ep(cm_id);
3607
3608	pr_debug("ep %p\n", ep);
3609
3610	might_sleep();
3611	state_set(&ep->com, DEAD);
3612	if (ep->com.dev->rdev.lldi.enable_fw_ofld_conn &&
3613	    ep->com.local_addr.ss_family == AF_INET) {
3614		err = cxgb4_remove_server_filter(
3615			ep->com.dev->rdev.lldi.ports[0], ep->stid,
3616			ep->com.dev->rdev.lldi.rxq_ids[0], false);
3617	} else {
3618		struct sockaddr_in6 *sin6;
3619		c4iw_init_wr_wait(ep->com.wr_waitp);
3620		err = cxgb4_remove_server(
3621				ep->com.dev->rdev.lldi.ports[0], ep->stid,
3622				ep->com.dev->rdev.lldi.rxq_ids[0],
3623				ep->com.local_addr.ss_family == AF_INET6);
3624		if (err)
3625			goto done;
3626		err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
3627					  0, 0, __func__);
3628		sin6 = (struct sockaddr_in6 *)&ep->com.local_addr;
3629		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3630				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3631	}
3632	xa_erase_irq(&ep->com.dev->stids, ep->stid);
3633	cxgb4_free_stid(ep->com.dev->rdev.lldi.tids, ep->stid,
3634			ep->com.local_addr.ss_family);
3635done:
3636	deref_cm_id(&ep->com);
3637	c4iw_put_ep(&ep->com);
3638	return err;
3639}
3640
3641int c4iw_ep_disconnect(struct c4iw_ep *ep, int abrupt, gfp_t gfp)
3642{
3643	int ret = 0;
3644	int close = 0;
3645	int fatal = 0;
3646	struct c4iw_rdev *rdev;
3647
3648	mutex_lock(&ep->com.mutex);
3649
3650	pr_debug("ep %p state %s, abrupt %d\n", ep,
3651		 states[ep->com.state], abrupt);
3652
3653	/*
3654	 * Ref the ep here in case we have fatal errors causing the
3655	 * ep to be released and freed.
3656	 */
3657	c4iw_get_ep(&ep->com);
3658
3659	rdev = &ep->com.dev->rdev;
3660	if (c4iw_fatal_error(rdev)) {
3661		fatal = 1;
3662		close_complete_upcall(ep, -EIO);
3663		ep->com.state = DEAD;
3664	}
3665	switch (ep->com.state) {
3666	case MPA_REQ_WAIT:
3667	case MPA_REQ_SENT:
3668	case MPA_REQ_RCVD:
3669	case MPA_REP_SENT:
3670	case FPDU_MODE:
3671	case CONNECTING:
3672		close = 1;
3673		if (abrupt)
3674			ep->com.state = ABORTING;
3675		else {
3676			ep->com.state = CLOSING;
3677
3678			/*
3679			 * if we close before we see the fw4_ack() then we fix
3680			 * up the timer state since we're reusing it.
3681			 */
3682			if (ep->mpa_skb &&
3683			    test_bit(STOP_MPA_TIMER, &ep->com.flags)) {
3684				clear_bit(STOP_MPA_TIMER, &ep->com.flags);
3685				stop_ep_timer(ep);
3686			}
3687			start_ep_timer(ep);
3688		}
3689		set_bit(CLOSE_SENT, &ep->com.flags);
3690		break;
3691	case CLOSING:
3692		if (!test_and_set_bit(CLOSE_SENT, &ep->com.flags)) {
3693			close = 1;
3694			if (abrupt) {
3695				(void)stop_ep_timer(ep);
3696				ep->com.state = ABORTING;
3697			} else
3698				ep->com.state = MORIBUND;
3699		}
3700		break;
3701	case MORIBUND:
3702	case ABORTING:
3703	case DEAD:
3704		pr_debug("ignoring disconnect ep %p state %u\n",
3705			 ep, ep->com.state);
3706		break;
3707	default:
3708		WARN_ONCE(1, "Bad endpoint state %u\n", ep->com.state);
3709		break;
3710	}
3711
3712	if (close) {
3713		if (abrupt) {
3714			set_bit(EP_DISC_ABORT, &ep->com.history);
3715			ret = send_abort(ep);
3716		} else {
3717			set_bit(EP_DISC_CLOSE, &ep->com.history);
3718			ret = send_halfclose(ep);
3719		}
3720		if (ret) {
3721			set_bit(EP_DISC_FAIL, &ep->com.history);
3722			if (!abrupt) {
3723				stop_ep_timer(ep);
3724				close_complete_upcall(ep, -EIO);
3725			}
3726			if (ep->com.qp) {
3727				struct c4iw_qp_attributes attrs;
3728
3729				attrs.next_state = C4IW_QP_STATE_ERROR;
3730				ret = c4iw_modify_qp(ep->com.qp->rhp,
3731						     ep->com.qp,
3732						     C4IW_QP_ATTR_NEXT_STATE,
3733						     &attrs, 1);
3734				if (ret)
3735					pr_err("%s - qp <- error failed!\n",
3736					       __func__);
3737			}
3738			fatal = 1;
3739		}
3740	}
3741	mutex_unlock(&ep->com.mutex);
3742	c4iw_put_ep(&ep->com);
3743	if (fatal)
3744		release_ep_resources(ep);
3745	return ret;
3746}
3747
3748static void active_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3749			struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3750{
3751	struct c4iw_ep *ep;
3752	int atid = be32_to_cpu(req->tid);
3753
3754	ep = (struct c4iw_ep *)lookup_atid(dev->rdev.lldi.tids,
3755					   (__force u32) req->tid);
3756	if (!ep)
3757		return;
3758
3759	switch (req->retval) {
3760	case FW_ENOMEM:
3761		set_bit(ACT_RETRY_NOMEM, &ep->com.history);
3762		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3763			send_fw_act_open_req(ep, atid);
3764			return;
3765		}
3766		fallthrough;
3767	case FW_EADDRINUSE:
3768		set_bit(ACT_RETRY_INUSE, &ep->com.history);
3769		if (ep->retry_count++ < ACT_OPEN_RETRY_COUNT) {
3770			send_fw_act_open_req(ep, atid);
3771			return;
3772		}
3773		break;
3774	default:
3775		pr_info("%s unexpected ofld conn wr retval %d\n",
3776		       __func__, req->retval);
3777		break;
3778	}
3779	pr_err("active ofld_connect_wr failure %d atid %d\n",
3780	       req->retval, atid);
3781	mutex_lock(&dev->rdev.stats.lock);
3782	dev->rdev.stats.act_ofld_conn_fails++;
3783	mutex_unlock(&dev->rdev.stats.lock);
3784	connect_reply_upcall(ep, status2errno(req->retval));
3785	state_set(&ep->com, DEAD);
3786	if (ep->com.remote_addr.ss_family == AF_INET6) {
3787		struct sockaddr_in6 *sin6 =
3788			(struct sockaddr_in6 *)&ep->com.local_addr;
3789		cxgb4_clip_release(ep->com.dev->rdev.lldi.ports[0],
3790				   (const u32 *)&sin6->sin6_addr.s6_addr, 1);
3791	}
3792	xa_erase_irq(&dev->atids, atid);
3793	cxgb4_free_atid(dev->rdev.lldi.tids, atid);
3794	dst_release(ep->dst);
3795	cxgb4_l2t_release(ep->l2t);
3796	c4iw_put_ep(&ep->com);
3797}
3798
3799static void passive_ofld_conn_reply(struct c4iw_dev *dev, struct sk_buff *skb,
3800			struct cpl_fw6_msg_ofld_connection_wr_rpl *req)
3801{
3802	struct sk_buff *rpl_skb;
3803	struct cpl_pass_accept_req *cpl;
3804	int ret;
3805
3806	rpl_skb = (struct sk_buff *)(unsigned long)req->cookie;
3807	if (req->retval) {
3808		pr_err("%s passive open failure %d\n", __func__, req->retval);
3809		mutex_lock(&dev->rdev.stats.lock);
3810		dev->rdev.stats.pas_ofld_conn_fails++;
3811		mutex_unlock(&dev->rdev.stats.lock);
3812		kfree_skb(rpl_skb);
3813	} else {
3814		cpl = (struct cpl_pass_accept_req *)cplhdr(rpl_skb);
3815		OPCODE_TID(cpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ,
3816					(__force u32) htonl(
3817					(__force u32) req->tid)));
3818		ret = pass_accept_req(dev, rpl_skb);
3819		if (!ret)
3820			kfree_skb(rpl_skb);
3821	}
3822	return;
3823}
3824
3825static inline u64 t4_tcb_get_field64(__be64 *tcb, u16 word)
3826{
3827	u64 tlo = be64_to_cpu(tcb[((31 - word) / 2)]);
3828	u64 thi = be64_to_cpu(tcb[((31 - word) / 2) - 1]);
3829	u64 t;
3830	u32 shift = 32;
3831
3832	t = (thi << shift) | (tlo >> shift);
3833
3834	return t;
3835}
3836
3837static inline u32 t4_tcb_get_field32(__be64 *tcb, u16 word, u32 mask, u32 shift)
3838{
3839	u32 v;
3840	u64 t = be64_to_cpu(tcb[(31 - word) / 2]);
3841
3842	if (word & 0x1)
3843		shift += 32;
3844	v = (t >> shift) & mask;
3845	return v;
3846}
3847
3848static int read_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
3849{
3850	struct cpl_get_tcb_rpl *rpl = cplhdr(skb);
3851	__be64 *tcb = (__be64 *)(rpl + 1);
3852	unsigned int tid = GET_TID(rpl);
3853	struct c4iw_ep *ep;
3854	u64 t_flags_64;
3855	u32 rx_pdu_out;
3856
3857	ep = get_ep_from_tid(dev, tid);
3858	if (!ep)
3859		return 0;
3860	/* Examine the TF_RX_PDU_OUT (bit 49 of the t_flags) in order to
3861	 * determine if there's a rx PDU feedback event pending.
3862	 *
3863	 * If that bit is set, it means we'll need to re-read the TCB's
3864	 * rq_start value. The final value is the one present in a TCB
3865	 * with the TF_RX_PDU_OUT bit cleared.
3866	 */
3867
3868	t_flags_64 = t4_tcb_get_field64(tcb, TCB_T_FLAGS_W);
3869	rx_pdu_out = (t_flags_64 & TF_RX_PDU_OUT_V(1)) >> TF_RX_PDU_OUT_S;
3870
3871	c4iw_put_ep(&ep->com); /* from get_ep_from_tid() */
3872	c4iw_put_ep(&ep->com); /* from read_tcb() */
3873
3874	/* If TF_RX_PDU_OUT bit is set, re-read the TCB */
3875	if (rx_pdu_out) {
3876		if (++ep->rx_pdu_out_cnt >= 2) {
3877			WARN_ONCE(1, "tcb re-read() reached the guard limit, finishing the cleanup\n");
3878			goto cleanup;
3879		}
3880		read_tcb(ep);
3881		return 0;
3882	}
3883
3884	ep->srqe_idx = t4_tcb_get_field32(tcb, TCB_RQ_START_W, TCB_RQ_START_M,
3885					  TCB_RQ_START_S);
3886cleanup:
3887	pr_debug("ep %p tid %u %016x\n", ep, ep->hwtid, ep->srqe_idx);
3888
3889	if (test_bit(PEER_ABORT_IN_PROGRESS, &ep->com.flags))
3890		finish_peer_abort(dev, ep);
3891	else if (test_bit(ABORT_REQ_IN_PROGRESS, &ep->com.flags))
3892		send_abort_req(ep);
3893	else
3894		WARN_ONCE(1, "unexpected state!");
3895
3896	return 0;
3897}
3898
3899static int deferred_fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
3900{
3901	struct cpl_fw6_msg *rpl = cplhdr(skb);
3902	struct cpl_fw6_msg_ofld_connection_wr_rpl *req;
3903
3904	switch (rpl->type) {
3905	case FW6_TYPE_CQE:
3906		c4iw_ev_dispatch(dev, (struct t4_cqe *)&rpl->data[0]);
3907		break;
3908	case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
3909		req = (struct cpl_fw6_msg_ofld_connection_wr_rpl *)rpl->data;
3910		switch (req->t_state) {
3911		case TCP_SYN_SENT:
3912			active_ofld_conn_reply(dev, skb, req);
3913			break;
3914		case TCP_SYN_RECV:
3915			passive_ofld_conn_reply(dev, skb, req);
3916			break;
3917		default:
3918			pr_err("%s unexpected ofld conn wr state %d\n",
3919			       __func__, req->t_state);
3920			break;
3921		}
3922		break;
3923	}
3924	return 0;
3925}
3926
3927static void build_cpl_pass_accept_req(struct sk_buff *skb, int stid , u8 tos)
3928{
3929	__be32 l2info;
3930	__be16 hdr_len, vlantag, len;
3931	u16 eth_hdr_len;
3932	int tcp_hdr_len, ip_hdr_len;
3933	u8 intf;
3934	struct cpl_rx_pkt *cpl = cplhdr(skb);
3935	struct cpl_pass_accept_req *req;
3936	struct tcp_options_received tmp_opt;
3937	struct c4iw_dev *dev;
3938	enum chip_type type;
3939
3940	dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
3941	/* Store values from cpl_rx_pkt in temporary location. */
3942	vlantag = cpl->vlan;
3943	len = cpl->len;
3944	l2info  = cpl->l2info;
3945	hdr_len = cpl->hdr_len;
3946	intf = cpl->iff;
3947
3948	__skb_pull(skb, sizeof(*req) + sizeof(struct rss_header));
3949
3950	/*
3951	 * We need to parse the TCP options from SYN packet.
3952	 * to generate cpl_pass_accept_req.
3953	 */
3954	memset(&tmp_opt, 0, sizeof(tmp_opt));
3955	tcp_clear_options(&tmp_opt);
3956	tcp_parse_options(&init_net, skb, &tmp_opt, 0, NULL);
3957
3958	req = __skb_push(skb, sizeof(*req));
3959	memset(req, 0, sizeof(*req));
3960	req->l2info = cpu_to_be16(SYN_INTF_V(intf) |
3961			 SYN_MAC_IDX_V(RX_MACIDX_G(
3962			 be32_to_cpu(l2info))) |
3963			 SYN_XACT_MATCH_F);
3964	type = dev->rdev.lldi.adapter_type;
3965	tcp_hdr_len = RX_TCPHDR_LEN_G(be16_to_cpu(hdr_len));
3966	ip_hdr_len = RX_IPHDR_LEN_G(be16_to_cpu(hdr_len));
3967	req->hdr_len =
3968		cpu_to_be32(SYN_RX_CHAN_V(RX_CHAN_G(be32_to_cpu(l2info))));
3969	if (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) {
3970		eth_hdr_len = is_t4(type) ?
3971				RX_ETHHDR_LEN_G(be32_to_cpu(l2info)) :
3972				RX_T5_ETHHDR_LEN_G(be32_to_cpu(l2info));
3973		req->hdr_len |= cpu_to_be32(TCP_HDR_LEN_V(tcp_hdr_len) |
3974					    IP_HDR_LEN_V(ip_hdr_len) |
3975					    ETH_HDR_LEN_V(eth_hdr_len));
3976	} else { /* T6 and later */
3977		eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(l2info));
3978		req->hdr_len |= cpu_to_be32(T6_TCP_HDR_LEN_V(tcp_hdr_len) |
3979					    T6_IP_HDR_LEN_V(ip_hdr_len) |
3980					    T6_ETH_HDR_LEN_V(eth_hdr_len));
3981	}
3982	req->vlan = vlantag;
3983	req->len = len;
3984	req->tos_stid = cpu_to_be32(PASS_OPEN_TID_V(stid) |
3985				    PASS_OPEN_TOS_V(tos));
3986	req->tcpopt.mss = htons(tmp_opt.mss_clamp);
3987	if (tmp_opt.wscale_ok)
3988		req->tcpopt.wsf = tmp_opt.snd_wscale;
3989	req->tcpopt.tstamp = tmp_opt.saw_tstamp;
3990	if (tmp_opt.sack_ok)
3991		req->tcpopt.sack = 1;
3992	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_REQ, 0));
3993	return;
3994}
3995
3996static void send_fw_pass_open_req(struct c4iw_dev *dev, struct sk_buff *skb,
3997				  __be32 laddr, __be16 lport,
3998				  __be32 raddr, __be16 rport,
3999				  u32 rcv_isn, u32 filter, u16 window,
4000				  u32 rss_qid, u8 port_id)
4001{
4002	struct sk_buff *req_skb;
4003	struct fw_ofld_connection_wr *req;
4004	struct cpl_pass_accept_req *cpl = cplhdr(skb);
4005	int ret;
4006
4007	req_skb = alloc_skb(sizeof(struct fw_ofld_connection_wr), GFP_KERNEL);
4008	if (!req_skb)
4009		return;
4010	req = __skb_put_zero(req_skb, sizeof(*req));
4011	req->op_compl = htonl(WR_OP_V(FW_OFLD_CONNECTION_WR) | FW_WR_COMPL_F);
4012	req->len16_pkd = htonl(FW_WR_LEN16_V(DIV_ROUND_UP(sizeof(*req), 16)));
4013	req->le.version_cpl = htonl(FW_OFLD_CONNECTION_WR_CPL_F);
4014	req->le.filter = (__force __be32) filter;
4015	req->le.lport = lport;
4016	req->le.pport = rport;
4017	req->le.u.ipv4.lip = laddr;
4018	req->le.u.ipv4.pip = raddr;
4019	req->tcb.rcv_nxt = htonl(rcv_isn + 1);
4020	req->tcb.rcv_adv = htons(window);
4021	req->tcb.t_state_to_astid =
4022		 htonl(FW_OFLD_CONNECTION_WR_T_STATE_V(TCP_SYN_RECV) |
4023			FW_OFLD_CONNECTION_WR_RCV_SCALE_V(cpl->tcpopt.wsf) |
4024			FW_OFLD_CONNECTION_WR_ASTID_V(
4025			PASS_OPEN_TID_G(ntohl(cpl->tos_stid))));
4026
4027	/*
4028	 * We store the qid in opt2 which will be used by the firmware
4029	 * to send us the wr response.
4030	 */
4031	req->tcb.opt2 = htonl(RSS_QUEUE_V(rss_qid));
4032
4033	/*
4034	 * We initialize the MSS index in TCB to 0xF.
4035	 * So that when driver sends cpl_pass_accept_rpl
4036	 * TCB picks up the correct value. If this was 0
4037	 * TP will ignore any value > 0 for MSS index.
4038	 */
4039	req->tcb.opt0 = cpu_to_be64(MSS_IDX_V(0xF));
4040	req->cookie = (uintptr_t)skb;
4041
4042	set_wr_txq(req_skb, CPL_PRIORITY_CONTROL, port_id);
4043	ret = cxgb4_ofld_send(dev->rdev.lldi.ports[0], req_skb);
4044	if (ret < 0) {
4045		pr_err("%s - cxgb4_ofld_send error %d - dropping\n", __func__,
4046		       ret);
4047		kfree_skb(skb);
4048		kfree_skb(req_skb);
4049	}
4050}
4051
4052/*
4053 * Handler for CPL_RX_PKT message. Need to handle cpl_rx_pkt
4054 * messages when a filter is being used instead of server to
4055 * redirect a syn packet. When packets hit filter they are redirected
4056 * to the offload queue and driver tries to establish the connection
4057 * using firmware work request.
4058 */
4059static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb)
4060{
4061	int stid;
4062	unsigned int filter;
4063	struct ethhdr *eh = NULL;
4064	struct vlan_ethhdr *vlan_eh = NULL;
4065	struct iphdr *iph;
4066	struct tcphdr *tcph;
4067	struct rss_header *rss = (void *)skb->data;
4068	struct cpl_rx_pkt *cpl = (void *)skb->data;
4069	struct cpl_pass_accept_req *req = (void *)(rss + 1);
4070	struct l2t_entry *e;
4071	struct dst_entry *dst;
4072	struct c4iw_ep *lep = NULL;
4073	u16 window;
4074	struct port_info *pi;
4075	struct net_device *pdev;
4076	u16 rss_qid, eth_hdr_len;
4077	int step;
4078	struct neighbour *neigh;
4079
4080	/* Drop all non-SYN packets */
4081	if (!(cpl->l2info & cpu_to_be32(RXF_SYN_F)))
4082		goto reject;
4083
4084	/*
4085	 * Drop all packets which did not hit the filter.
4086	 * Unlikely to happen.
4087	 */
4088	if (!(rss->filter_hit && rss->filter_tid))
4089		goto reject;
4090
4091	/*
4092	 * Calculate the server tid from filter hit index from cpl_rx_pkt.
4093	 */
4094	stid = (__force int) cpu_to_be32((__force u32) rss->hash_val);
4095
4096	lep = (struct c4iw_ep *)get_ep_from_stid(dev, stid);
4097	if (!lep) {
4098		pr_warn("%s connect request on invalid stid %d\n",
4099			__func__, stid);
4100		goto reject;
4101	}
4102
4103	switch (CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type)) {
4104	case CHELSIO_T4:
4105		eth_hdr_len = RX_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4106		break;
4107	case CHELSIO_T5:
4108		eth_hdr_len = RX_T5_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4109		break;
4110	case CHELSIO_T6:
4111		eth_hdr_len = RX_T6_ETHHDR_LEN_G(be32_to_cpu(cpl->l2info));
4112		break;
4113	default:
4114		pr_err("T%d Chip is not supported\n",
4115		       CHELSIO_CHIP_VERSION(dev->rdev.lldi.adapter_type));
4116		goto reject;
4117	}
4118
4119	if (eth_hdr_len == ETH_HLEN) {
4120		eh = (struct ethhdr *)(req + 1);
4121		iph = (struct iphdr *)(eh + 1);
4122	} else {
4123		vlan_eh = (struct vlan_ethhdr *)(req + 1);
4124		iph = (struct iphdr *)(vlan_eh + 1);
4125		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(cpl->vlan));
4126	}
4127
4128	if (iph->version != 0x4)
4129		goto reject;
4130
4131	tcph = (struct tcphdr *)(iph + 1);
4132	skb_set_network_header(skb, (void *)iph - (void *)rss);
4133	skb_set_transport_header(skb, (void *)tcph - (void *)rss);
4134	skb_get(skb);
4135
4136	pr_debug("lip 0x%x lport %u pip 0x%x pport %u tos %d\n",
4137		 ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr),
4138		 ntohs(tcph->source), iph->tos);
4139
4140	dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev,
4141			      iph->daddr, iph->saddr, tcph->dest,
4142			      tcph->source, iph->tos);
4143	if (!dst) {
4144		pr_err("%s - failed to find dst entry!\n", __func__);
4145		goto reject;
4146	}
4147	neigh = dst_neigh_lookup_skb(dst, skb);
4148
4149	if (!neigh) {
4150		pr_err("%s - failed to allocate neigh!\n", __func__);
4151		goto free_dst;
4152	}
4153
4154	if (neigh->dev->flags & IFF_LOOPBACK) {
4155		pdev = ip_dev_find(&init_net, iph->daddr);
4156		e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4157				    pdev, 0);
4158		pi = (struct port_info *)netdev_priv(pdev);
4159		dev_put(pdev);
4160	} else {
4161		pdev = get_real_dev(neigh->dev);
4162		e = cxgb4_l2t_get(dev->rdev.lldi.l2t, neigh,
4163					pdev, 0);
4164		pi = (struct port_info *)netdev_priv(pdev);
4165	}
4166	neigh_release(neigh);
4167	if (!e) {
4168		pr_err("%s - failed to allocate l2t entry!\n",
4169		       __func__);
4170		goto free_dst;
4171	}
4172
4173	step = dev->rdev.lldi.nrxq / dev->rdev.lldi.nchan;
4174	rss_qid = dev->rdev.lldi.rxq_ids[pi->port_id * step];
4175	window = (__force u16) htons((__force u16)tcph->window);
4176
4177	/* Calcuate filter portion for LE region. */
4178	filter = (__force unsigned int) cpu_to_be32(cxgb4_select_ntuple(
4179						    dev->rdev.lldi.ports[0],
4180						    e));
4181
4182	/*
4183	 * Synthesize the cpl_pass_accept_req. We have everything except the
4184	 * TID. Once firmware sends a reply with TID we update the TID field
4185	 * in cpl and pass it through the regular cpl_pass_accept_req path.
4186	 */
4187	build_cpl_pass_accept_req(skb, stid, iph->tos);
4188	send_fw_pass_open_req(dev, skb, iph->daddr, tcph->dest, iph->saddr,
4189			      tcph->source, ntohl(tcph->seq), filter, window,
4190			      rss_qid, pi->port_id);
4191	cxgb4_l2t_release(e);
4192free_dst:
4193	dst_release(dst);
4194reject:
4195	if (lep)
4196		c4iw_put_ep(&lep->com);
4197	return 0;
4198}
4199
4200/*
4201 * These are the real handlers that are called from a
4202 * work queue.
4203 */
4204static c4iw_handler_func work_handlers[NUM_CPL_CMDS + NUM_FAKE_CPLS] = {
4205	[CPL_ACT_ESTABLISH] = act_establish,
4206	[CPL_ACT_OPEN_RPL] = act_open_rpl,
4207	[CPL_RX_DATA] = rx_data,
4208	[CPL_ABORT_RPL_RSS] = abort_rpl,
4209	[CPL_ABORT_RPL] = abort_rpl,
4210	[CPL_PASS_OPEN_RPL] = pass_open_rpl,
4211	[CPL_CLOSE_LISTSRV_RPL] = close_listsrv_rpl,
4212	[CPL_PASS_ACCEPT_REQ] = pass_accept_req,
4213	[CPL_PASS_ESTABLISH] = pass_establish,
4214	[CPL_PEER_CLOSE] = peer_close,
4215	[CPL_ABORT_REQ_RSS] = peer_abort,
4216	[CPL_CLOSE_CON_RPL] = close_con_rpl,
4217	[CPL_RDMA_TERMINATE] = terminate,
4218	[CPL_FW4_ACK] = fw4_ack,
4219	[CPL_GET_TCB_RPL] = read_tcb_rpl,
4220	[CPL_FW6_MSG] = deferred_fw6_msg,
4221	[CPL_RX_PKT] = rx_pkt,
4222	[FAKE_CPL_PUT_EP_SAFE] = _put_ep_safe,
4223	[FAKE_CPL_PASS_PUT_EP_SAFE] = _put_pass_ep_safe
4224};
4225
4226static void process_timeout(struct c4iw_ep *ep)
4227{
4228	struct c4iw_qp_attributes attrs;
4229	int abort = 1;
4230
4231	mutex_lock(&ep->com.mutex);
4232	pr_debug("ep %p tid %u state %d\n", ep, ep->hwtid, ep->com.state);
4233	set_bit(TIMEDOUT, &ep->com.history);
4234	switch (ep->com.state) {
4235	case MPA_REQ_SENT:
4236		connect_reply_upcall(ep, -ETIMEDOUT);
4237		break;
4238	case MPA_REQ_WAIT:
4239	case MPA_REQ_RCVD:
4240	case MPA_REP_SENT:
4241	case FPDU_MODE:
4242		break;
4243	case CLOSING:
4244	case MORIBUND:
4245		if (ep->com.cm_id && ep->com.qp) {
4246			attrs.next_state = C4IW_QP_STATE_ERROR;
4247			c4iw_modify_qp(ep->com.qp->rhp,
4248				     ep->com.qp, C4IW_QP_ATTR_NEXT_STATE,
4249				     &attrs, 1);
4250		}
4251		close_complete_upcall(ep, -ETIMEDOUT);
4252		break;
4253	case ABORTING:
4254	case DEAD:
4255
4256		/*
4257		 * These states are expected if the ep timed out at the same
4258		 * time as another thread was calling stop_ep_timer().
4259		 * So we silently do nothing for these states.
4260		 */
4261		abort = 0;
4262		break;
4263	default:
4264		WARN(1, "%s unexpected state ep %p tid %u state %u\n",
4265			__func__, ep, ep->hwtid, ep->com.state);
4266		abort = 0;
4267	}
4268	mutex_unlock(&ep->com.mutex);
4269	if (abort)
4270		c4iw_ep_disconnect(ep, 1, GFP_KERNEL);
4271	c4iw_put_ep(&ep->com);
4272}
4273
4274static void process_timedout_eps(void)
4275{
4276	struct c4iw_ep *ep;
4277
4278	spin_lock_irq(&timeout_lock);
4279	while (!list_empty(&timeout_list)) {
4280		struct list_head *tmp;
4281
4282		tmp = timeout_list.next;
4283		list_del(tmp);
4284		tmp->next = NULL;
4285		tmp->prev = NULL;
4286		spin_unlock_irq(&timeout_lock);
4287		ep = list_entry(tmp, struct c4iw_ep, entry);
4288		process_timeout(ep);
4289		spin_lock_irq(&timeout_lock);
4290	}
4291	spin_unlock_irq(&timeout_lock);
4292}
4293
4294static void process_work(struct work_struct *work)
4295{
4296	struct sk_buff *skb = NULL;
4297	struct c4iw_dev *dev;
4298	struct cpl_act_establish *rpl;
4299	unsigned int opcode;
4300	int ret;
4301
4302	process_timedout_eps();
4303	while ((skb = skb_dequeue(&rxq))) {
4304		rpl = cplhdr(skb);
4305		dev = *((struct c4iw_dev **) (skb->cb + sizeof(void *)));
4306		opcode = rpl->ot.opcode;
4307
4308		if (opcode >= ARRAY_SIZE(work_handlers) ||
4309		    !work_handlers[opcode]) {
4310			pr_err("No handler for opcode 0x%x.\n", opcode);
4311			kfree_skb(skb);
4312		} else {
4313			ret = work_handlers[opcode](dev, skb);
4314			if (!ret)
4315				kfree_skb(skb);
4316		}
4317		process_timedout_eps();
4318	}
4319}
4320
4321static DECLARE_WORK(skb_work, process_work);
4322
4323static void ep_timeout(struct timer_list *t)
4324{
4325	struct c4iw_ep *ep = from_timer(ep, t, timer);
4326	int kickit = 0;
4327
4328	spin_lock(&timeout_lock);
4329	if (!test_and_set_bit(TIMEOUT, &ep->com.flags)) {
4330		/*
4331		 * Only insert if it is not already on the list.
4332		 */
4333		if (!ep->entry.next) {
4334			list_add_tail(&ep->entry, &timeout_list);
4335			kickit = 1;
4336		}
4337	}
4338	spin_unlock(&timeout_lock);
4339	if (kickit)
4340		queue_work(workq, &skb_work);
4341}
4342
4343/*
4344 * All the CM events are handled on a work queue to have a safe context.
4345 */
4346static int sched(struct c4iw_dev *dev, struct sk_buff *skb)
4347{
4348
4349	/*
4350	 * Save dev in the skb->cb area.
4351	 */
4352	*((struct c4iw_dev **) (skb->cb + sizeof(void *))) = dev;
4353
4354	/*
4355	 * Queue the skb and schedule the worker thread.
4356	 */
4357	skb_queue_tail(&rxq, skb);
4358	queue_work(workq, &skb_work);
4359	return 0;
4360}
4361
4362static int set_tcb_rpl(struct c4iw_dev *dev, struct sk_buff *skb)
4363{
4364	struct cpl_set_tcb_rpl *rpl = cplhdr(skb);
4365
4366	if (rpl->status != CPL_ERR_NONE) {
4367		pr_err("Unexpected SET_TCB_RPL status %u for tid %u\n",
4368		       rpl->status, GET_TID(rpl));
4369	}
4370	kfree_skb(skb);
4371	return 0;
4372}
4373
4374static int fw6_msg(struct c4iw_dev *dev, struct sk_buff *skb)
4375{
4376	struct cpl_fw6_msg *rpl = cplhdr(skb);
4377	struct c4iw_wr_wait *wr_waitp;
4378	int ret;
4379
4380	pr_debug("type %u\n", rpl->type);
4381
4382	switch (rpl->type) {
4383	case FW6_TYPE_WR_RPL:
4384		ret = (int)((be64_to_cpu(rpl->data[0]) >> 8) & 0xff);
4385		wr_waitp = (struct c4iw_wr_wait *)(__force unsigned long) rpl->data[1];
4386		pr_debug("wr_waitp %p ret %u\n", wr_waitp, ret);
4387		if (wr_waitp)
4388			c4iw_wake_up_deref(wr_waitp, ret ? -ret : 0);
4389		kfree_skb(skb);
4390		break;
4391	case FW6_TYPE_CQE:
4392	case FW6_TYPE_OFLD_CONNECTION_WR_RPL:
4393		sched(dev, skb);
4394		break;
4395	default:
4396		pr_err("%s unexpected fw6 msg type %u\n",
4397		       __func__, rpl->type);
4398		kfree_skb(skb);
4399		break;
4400	}
4401	return 0;
4402}
4403
4404static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
4405{
4406	struct cpl_abort_req_rss *req = cplhdr(skb);
4407	struct c4iw_ep *ep;
4408	unsigned int tid = GET_TID(req);
4409
4410	ep = get_ep_from_tid(dev, tid);
4411	/* This EP will be dereferenced in peer_abort() */
4412	if (!ep) {
4413		pr_warn("Abort on non-existent endpoint, tid %d\n", tid);
4414		kfree_skb(skb);
4415		return 0;
4416	}
4417	if (cxgb_is_neg_adv(req->status)) {
4418		pr_debug("Negative advice on abort- tid %u status %d (%s)\n",
4419			 ep->hwtid, req->status,
4420			 neg_adv_str(req->status));
4421		goto out;
4422	}
4423	pr_debug("ep %p tid %u state %u\n", ep, ep->hwtid, ep->com.state);
4424
4425	c4iw_wake_up_noref(ep->com.wr_waitp, -ECONNRESET);
4426out:
4427	sched(dev, skb);
4428	return 0;
4429}
4430
4431/*
4432 * Most upcalls from the T4 Core go to sched() to
4433 * schedule the processing on a work queue.
4434 */
4435c4iw_handler_func c4iw_handlers[NUM_CPL_CMDS] = {
4436	[CPL_ACT_ESTABLISH] = sched,
4437	[CPL_ACT_OPEN_RPL] = sched,
4438	[CPL_RX_DATA] = sched,
4439	[CPL_ABORT_RPL_RSS] = sched,
4440	[CPL_ABORT_RPL] = sched,
4441	[CPL_PASS_OPEN_RPL] = sched,
4442	[CPL_CLOSE_LISTSRV_RPL] = sched,
4443	[CPL_PASS_ACCEPT_REQ] = sched,
4444	[CPL_PASS_ESTABLISH] = sched,
4445	[CPL_PEER_CLOSE] = sched,
4446	[CPL_CLOSE_CON_RPL] = sched,
4447	[CPL_ABORT_REQ_RSS] = peer_abort_intr,
4448	[CPL_RDMA_TERMINATE] = sched,
4449	[CPL_FW4_ACK] = sched,
4450	[CPL_SET_TCB_RPL] = set_tcb_rpl,
4451	[CPL_GET_TCB_RPL] = sched,
4452	[CPL_FW6_MSG] = fw6_msg,
4453	[CPL_RX_PKT] = sched
4454};
4455
4456int __init c4iw_cm_init(void)
4457{
4458	spin_lock_init(&timeout_lock);
4459	skb_queue_head_init(&rxq);
4460
4461	workq = alloc_ordered_workqueue("iw_cxgb4", WQ_MEM_RECLAIM);
4462	if (!workq)
4463		return -ENOMEM;
4464
4465	return 0;
4466}
4467
4468void c4iw_cm_term(void)
4469{
4470	WARN_ON(!list_empty(&timeout_list));
4471	flush_workqueue(workq);
4472	destroy_workqueue(workq);
4473}
4474