162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */ 462306a36Sopenharmony_ci/* Copyright (c) 2008-2019, IBM Corporation */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/errno.h> 762306a36Sopenharmony_ci#include <linux/types.h> 862306a36Sopenharmony_ci#include <linux/net.h> 962306a36Sopenharmony_ci#include <linux/scatterlist.h> 1062306a36Sopenharmony_ci#include <linux/highmem.h> 1162306a36Sopenharmony_ci#include <net/tcp.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_ci#include <rdma/iw_cm.h> 1462306a36Sopenharmony_ci#include <rdma/ib_verbs.h> 1562306a36Sopenharmony_ci#include <rdma/ib_user_verbs.h> 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_ci#include "siw.h" 1862306a36Sopenharmony_ci#include "siw_verbs.h" 1962306a36Sopenharmony_ci#include "siw_mem.h" 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci#define MAX_HDR_INLINE \ 2262306a36Sopenharmony_ci (((uint32_t)(sizeof(struct siw_rreq_pkt) - \ 2362306a36Sopenharmony_ci sizeof(struct iwarp_send))) & 0xF8) 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_cistatic struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx) 2662306a36Sopenharmony_ci{ 2762306a36Sopenharmony_ci struct siw_pbl *pbl = mem->pbl; 2862306a36Sopenharmony_ci u64 offset = addr - mem->va; 2962306a36Sopenharmony_ci dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx); 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci if (paddr) 3262306a36Sopenharmony_ci return ib_virt_dma_to_page(paddr); 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci return NULL; 3562306a36Sopenharmony_ci} 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci/* 3862306a36Sopenharmony_ci * Copy short payload at provided destination payload address 3962306a36Sopenharmony_ci */ 4062306a36Sopenharmony_cistatic int siw_try_1seg(struct siw_iwarp_tx *c_tx, void *paddr) 4162306a36Sopenharmony_ci{ 4262306a36Sopenharmony_ci struct siw_wqe *wqe = &c_tx->wqe_active; 4362306a36Sopenharmony_ci struct siw_sge *sge = &wqe->sqe.sge[0]; 4462306a36Sopenharmony_ci u32 bytes = sge->length; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci if (bytes > MAX_HDR_INLINE || wqe->sqe.num_sge != 1) 4762306a36Sopenharmony_ci return MAX_HDR_INLINE + 1; 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci if (!bytes) 5062306a36Sopenharmony_ci return 0; 5162306a36Sopenharmony_ci 5262306a36Sopenharmony_ci if (tx_flags(wqe) & SIW_WQE_INLINE) { 5362306a36Sopenharmony_ci memcpy(paddr, &wqe->sqe.sge[1], bytes); 5462306a36Sopenharmony_ci } else { 5562306a36Sopenharmony_ci struct siw_mem *mem = wqe->mem[0]; 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_ci if (!mem->mem_obj) { 5862306a36Sopenharmony_ci /* Kernel client using kva */ 5962306a36Sopenharmony_ci memcpy(paddr, ib_virt_dma_to_ptr(sge->laddr), bytes); 6062306a36Sopenharmony_ci } else if (c_tx->in_syscall) { 6162306a36Sopenharmony_ci if (copy_from_user(paddr, u64_to_user_ptr(sge->laddr), 6262306a36Sopenharmony_ci bytes)) 6362306a36Sopenharmony_ci return -EFAULT; 6462306a36Sopenharmony_ci } else { 6562306a36Sopenharmony_ci unsigned int off = sge->laddr & ~PAGE_MASK; 6662306a36Sopenharmony_ci struct page *p; 6762306a36Sopenharmony_ci char *buffer; 6862306a36Sopenharmony_ci int pbl_idx = 0; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci if (!mem->is_pbl) 7162306a36Sopenharmony_ci p = siw_get_upage(mem->umem, sge->laddr); 7262306a36Sopenharmony_ci else 7362306a36Sopenharmony_ci p = siw_get_pblpage(mem, sge->laddr, &pbl_idx); 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci if (unlikely(!p)) 7662306a36Sopenharmony_ci return -EFAULT; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci buffer = kmap_local_page(p); 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci if (likely(PAGE_SIZE - off >= bytes)) { 8162306a36Sopenharmony_ci memcpy(paddr, buffer + off, bytes); 8262306a36Sopenharmony_ci } else { 8362306a36Sopenharmony_ci unsigned long part = bytes - (PAGE_SIZE - off); 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci memcpy(paddr, buffer + off, part); 8662306a36Sopenharmony_ci kunmap_local(buffer); 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci if (!mem->is_pbl) 8962306a36Sopenharmony_ci p = siw_get_upage(mem->umem, 9062306a36Sopenharmony_ci sge->laddr + part); 9162306a36Sopenharmony_ci else 9262306a36Sopenharmony_ci p = siw_get_pblpage(mem, 9362306a36Sopenharmony_ci sge->laddr + part, 9462306a36Sopenharmony_ci &pbl_idx); 9562306a36Sopenharmony_ci if (unlikely(!p)) 9662306a36Sopenharmony_ci return -EFAULT; 9762306a36Sopenharmony_ci 9862306a36Sopenharmony_ci buffer = kmap_local_page(p); 9962306a36Sopenharmony_ci memcpy(paddr + part, buffer, bytes - part); 10062306a36Sopenharmony_ci } 10162306a36Sopenharmony_ci kunmap_local(buffer); 10262306a36Sopenharmony_ci } 10362306a36Sopenharmony_ci } 10462306a36Sopenharmony_ci return (int)bytes; 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci#define PKT_FRAGMENTED 1 10862306a36Sopenharmony_ci#define PKT_COMPLETE 0 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci/* 11162306a36Sopenharmony_ci * siw_qp_prepare_tx() 11262306a36Sopenharmony_ci * 11362306a36Sopenharmony_ci * Prepare tx state for sending out one fpdu. Builds complete pkt 11462306a36Sopenharmony_ci * if no user data or only immediate data are present. 11562306a36Sopenharmony_ci * 11662306a36Sopenharmony_ci * returns PKT_COMPLETE if complete pkt built, PKT_FRAGMENTED otherwise. 11762306a36Sopenharmony_ci */ 11862306a36Sopenharmony_cistatic int siw_qp_prepare_tx(struct siw_iwarp_tx *c_tx) 11962306a36Sopenharmony_ci{ 12062306a36Sopenharmony_ci struct siw_wqe *wqe = &c_tx->wqe_active; 12162306a36Sopenharmony_ci char *crc = NULL; 12262306a36Sopenharmony_ci int data = 0; 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci switch (tx_type(wqe)) { 12562306a36Sopenharmony_ci case SIW_OP_READ: 12662306a36Sopenharmony_ci case SIW_OP_READ_LOCAL_INV: 12762306a36Sopenharmony_ci memcpy(&c_tx->pkt.ctrl, 12862306a36Sopenharmony_ci &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl, 12962306a36Sopenharmony_ci sizeof(struct iwarp_ctrl)); 13062306a36Sopenharmony_ci 13162306a36Sopenharmony_ci c_tx->pkt.rreq.rsvd = 0; 13262306a36Sopenharmony_ci c_tx->pkt.rreq.ddp_qn = htonl(RDMAP_UNTAGGED_QN_RDMA_READ); 13362306a36Sopenharmony_ci c_tx->pkt.rreq.ddp_msn = 13462306a36Sopenharmony_ci htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ]); 13562306a36Sopenharmony_ci c_tx->pkt.rreq.ddp_mo = 0; 13662306a36Sopenharmony_ci c_tx->pkt.rreq.sink_stag = htonl(wqe->sqe.sge[0].lkey); 13762306a36Sopenharmony_ci c_tx->pkt.rreq.sink_to = 13862306a36Sopenharmony_ci cpu_to_be64(wqe->sqe.sge[0].laddr); 13962306a36Sopenharmony_ci c_tx->pkt.rreq.source_stag = htonl(wqe->sqe.rkey); 14062306a36Sopenharmony_ci c_tx->pkt.rreq.source_to = cpu_to_be64(wqe->sqe.raddr); 14162306a36Sopenharmony_ci c_tx->pkt.rreq.read_size = htonl(wqe->sqe.sge[0].length); 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci c_tx->ctrl_len = sizeof(struct iwarp_rdma_rreq); 14462306a36Sopenharmony_ci crc = (char *)&c_tx->pkt.rreq_pkt.crc; 14562306a36Sopenharmony_ci break; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci case SIW_OP_SEND: 14862306a36Sopenharmony_ci if (tx_flags(wqe) & SIW_WQE_SOLICITED) 14962306a36Sopenharmony_ci memcpy(&c_tx->pkt.ctrl, 15062306a36Sopenharmony_ci &iwarp_pktinfo[RDMAP_SEND_SE].ctrl, 15162306a36Sopenharmony_ci sizeof(struct iwarp_ctrl)); 15262306a36Sopenharmony_ci else 15362306a36Sopenharmony_ci memcpy(&c_tx->pkt.ctrl, &iwarp_pktinfo[RDMAP_SEND].ctrl, 15462306a36Sopenharmony_ci sizeof(struct iwarp_ctrl)); 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci c_tx->pkt.send.ddp_qn = RDMAP_UNTAGGED_QN_SEND; 15762306a36Sopenharmony_ci c_tx->pkt.send.ddp_msn = 15862306a36Sopenharmony_ci htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]); 15962306a36Sopenharmony_ci c_tx->pkt.send.ddp_mo = 0; 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci c_tx->pkt.send_inv.inval_stag = 0; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci c_tx->ctrl_len = sizeof(struct iwarp_send); 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci crc = (char *)&c_tx->pkt.send_pkt.crc; 16662306a36Sopenharmony_ci data = siw_try_1seg(c_tx, crc); 16762306a36Sopenharmony_ci break; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci case SIW_OP_SEND_REMOTE_INV: 17062306a36Sopenharmony_ci if (tx_flags(wqe) & SIW_WQE_SOLICITED) 17162306a36Sopenharmony_ci memcpy(&c_tx->pkt.ctrl, 17262306a36Sopenharmony_ci &iwarp_pktinfo[RDMAP_SEND_SE_INVAL].ctrl, 17362306a36Sopenharmony_ci sizeof(struct iwarp_ctrl)); 17462306a36Sopenharmony_ci else 17562306a36Sopenharmony_ci memcpy(&c_tx->pkt.ctrl, 17662306a36Sopenharmony_ci &iwarp_pktinfo[RDMAP_SEND_INVAL].ctrl, 17762306a36Sopenharmony_ci sizeof(struct iwarp_ctrl)); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci c_tx->pkt.send.ddp_qn = RDMAP_UNTAGGED_QN_SEND; 18062306a36Sopenharmony_ci c_tx->pkt.send.ddp_msn = 18162306a36Sopenharmony_ci htonl(++c_tx->ddp_msn[RDMAP_UNTAGGED_QN_SEND]); 18262306a36Sopenharmony_ci c_tx->pkt.send.ddp_mo = 0; 18362306a36Sopenharmony_ci 18462306a36Sopenharmony_ci c_tx->pkt.send_inv.inval_stag = cpu_to_be32(wqe->sqe.rkey); 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci c_tx->ctrl_len = sizeof(struct iwarp_send_inv); 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci crc = (char *)&c_tx->pkt.send_pkt.crc; 18962306a36Sopenharmony_ci data = siw_try_1seg(c_tx, crc); 19062306a36Sopenharmony_ci break; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci case SIW_OP_WRITE: 19362306a36Sopenharmony_ci memcpy(&c_tx->pkt.ctrl, &iwarp_pktinfo[RDMAP_RDMA_WRITE].ctrl, 19462306a36Sopenharmony_ci sizeof(struct iwarp_ctrl)); 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci c_tx->pkt.rwrite.sink_stag = htonl(wqe->sqe.rkey); 19762306a36Sopenharmony_ci c_tx->pkt.rwrite.sink_to = cpu_to_be64(wqe->sqe.raddr); 19862306a36Sopenharmony_ci c_tx->ctrl_len = sizeof(struct iwarp_rdma_write); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci crc = (char *)&c_tx->pkt.write_pkt.crc; 20162306a36Sopenharmony_ci data = siw_try_1seg(c_tx, crc); 20262306a36Sopenharmony_ci break; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci case SIW_OP_READ_RESPONSE: 20562306a36Sopenharmony_ci memcpy(&c_tx->pkt.ctrl, 20662306a36Sopenharmony_ci &iwarp_pktinfo[RDMAP_RDMA_READ_RESP].ctrl, 20762306a36Sopenharmony_ci sizeof(struct iwarp_ctrl)); 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_ci /* NBO */ 21062306a36Sopenharmony_ci c_tx->pkt.rresp.sink_stag = cpu_to_be32(wqe->sqe.rkey); 21162306a36Sopenharmony_ci c_tx->pkt.rresp.sink_to = cpu_to_be64(wqe->sqe.raddr); 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci c_tx->ctrl_len = sizeof(struct iwarp_rdma_rresp); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci crc = (char *)&c_tx->pkt.write_pkt.crc; 21662306a36Sopenharmony_ci data = siw_try_1seg(c_tx, crc); 21762306a36Sopenharmony_ci break; 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_ci default: 22062306a36Sopenharmony_ci siw_dbg_qp(tx_qp(c_tx), "stale wqe type %d\n", tx_type(wqe)); 22162306a36Sopenharmony_ci return -EOPNOTSUPP; 22262306a36Sopenharmony_ci } 22362306a36Sopenharmony_ci if (unlikely(data < 0)) 22462306a36Sopenharmony_ci return data; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci c_tx->ctrl_sent = 0; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci if (data <= MAX_HDR_INLINE) { 22962306a36Sopenharmony_ci if (data) { 23062306a36Sopenharmony_ci wqe->processed = data; 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci c_tx->pkt.ctrl.mpa_len = 23362306a36Sopenharmony_ci htons(c_tx->ctrl_len + data - MPA_HDR_SIZE); 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci /* Add pad, if needed */ 23662306a36Sopenharmony_ci data += -(int)data & 0x3; 23762306a36Sopenharmony_ci /* advance CRC location after payload */ 23862306a36Sopenharmony_ci crc += data; 23962306a36Sopenharmony_ci c_tx->ctrl_len += data; 24062306a36Sopenharmony_ci 24162306a36Sopenharmony_ci if (!(c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)) 24262306a36Sopenharmony_ci c_tx->pkt.c_untagged.ddp_mo = 0; 24362306a36Sopenharmony_ci else 24462306a36Sopenharmony_ci c_tx->pkt.c_tagged.ddp_to = 24562306a36Sopenharmony_ci cpu_to_be64(wqe->sqe.raddr); 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci *(u32 *)crc = 0; 24962306a36Sopenharmony_ci /* 25062306a36Sopenharmony_ci * Do complete CRC if enabled and short packet 25162306a36Sopenharmony_ci */ 25262306a36Sopenharmony_ci if (c_tx->mpa_crc_hd) { 25362306a36Sopenharmony_ci crypto_shash_init(c_tx->mpa_crc_hd); 25462306a36Sopenharmony_ci if (crypto_shash_update(c_tx->mpa_crc_hd, 25562306a36Sopenharmony_ci (u8 *)&c_tx->pkt, 25662306a36Sopenharmony_ci c_tx->ctrl_len)) 25762306a36Sopenharmony_ci return -EINVAL; 25862306a36Sopenharmony_ci crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)crc); 25962306a36Sopenharmony_ci } 26062306a36Sopenharmony_ci c_tx->ctrl_len += MPA_CRC_SIZE; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci return PKT_COMPLETE; 26362306a36Sopenharmony_ci } 26462306a36Sopenharmony_ci c_tx->ctrl_len += MPA_CRC_SIZE; 26562306a36Sopenharmony_ci c_tx->sge_idx = 0; 26662306a36Sopenharmony_ci c_tx->sge_off = 0; 26762306a36Sopenharmony_ci c_tx->pbl_idx = 0; 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci /* 27062306a36Sopenharmony_ci * Allow direct sending out of user buffer if WR is non signalled 27162306a36Sopenharmony_ci * and payload is over threshold. 27262306a36Sopenharmony_ci * Per RDMA verbs, the application should not change the send buffer 27362306a36Sopenharmony_ci * until the work completed. In iWarp, work completion is only 27462306a36Sopenharmony_ci * local delivery to TCP. TCP may reuse the buffer for 27562306a36Sopenharmony_ci * retransmission. Changing unsent data also breaks the CRC, 27662306a36Sopenharmony_ci * if applied. 27762306a36Sopenharmony_ci */ 27862306a36Sopenharmony_ci if (c_tx->zcopy_tx && wqe->bytes >= SENDPAGE_THRESH && 27962306a36Sopenharmony_ci !(tx_flags(wqe) & SIW_WQE_SIGNALLED)) 28062306a36Sopenharmony_ci c_tx->use_sendpage = 1; 28162306a36Sopenharmony_ci else 28262306a36Sopenharmony_ci c_tx->use_sendpage = 0; 28362306a36Sopenharmony_ci 28462306a36Sopenharmony_ci return PKT_FRAGMENTED; 28562306a36Sopenharmony_ci} 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_ci/* 28862306a36Sopenharmony_ci * Send out one complete control type FPDU, or header of FPDU carrying 28962306a36Sopenharmony_ci * data. Used for fixed sized packets like Read.Requests or zero length 29062306a36Sopenharmony_ci * SENDs, WRITEs, READ.Responses, or header only. 29162306a36Sopenharmony_ci */ 29262306a36Sopenharmony_cistatic int siw_tx_ctrl(struct siw_iwarp_tx *c_tx, struct socket *s, 29362306a36Sopenharmony_ci int flags) 29462306a36Sopenharmony_ci{ 29562306a36Sopenharmony_ci struct msghdr msg = { .msg_flags = flags }; 29662306a36Sopenharmony_ci struct kvec iov = { .iov_base = 29762306a36Sopenharmony_ci (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent, 29862306a36Sopenharmony_ci .iov_len = c_tx->ctrl_len - c_tx->ctrl_sent }; 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci int rv = kernel_sendmsg(s, &msg, &iov, 1, 30162306a36Sopenharmony_ci c_tx->ctrl_len - c_tx->ctrl_sent); 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci if (rv >= 0) { 30462306a36Sopenharmony_ci c_tx->ctrl_sent += rv; 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci if (c_tx->ctrl_sent == c_tx->ctrl_len) 30762306a36Sopenharmony_ci rv = 0; 30862306a36Sopenharmony_ci else 30962306a36Sopenharmony_ci rv = -EAGAIN; 31062306a36Sopenharmony_ci } 31162306a36Sopenharmony_ci return rv; 31262306a36Sopenharmony_ci} 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci/* 31562306a36Sopenharmony_ci * 0copy TCP transmit interface: Use MSG_SPLICE_PAGES. 31662306a36Sopenharmony_ci * 31762306a36Sopenharmony_ci * Using sendpage to push page by page appears to be less efficient 31862306a36Sopenharmony_ci * than using sendmsg, even if data are copied. 31962306a36Sopenharmony_ci * 32062306a36Sopenharmony_ci * A general performance limitation might be the extra four bytes 32162306a36Sopenharmony_ci * trailer checksum segment to be pushed after user data. 32262306a36Sopenharmony_ci */ 32362306a36Sopenharmony_cistatic int siw_tcp_sendpages(struct socket *s, struct page **page, int offset, 32462306a36Sopenharmony_ci size_t size) 32562306a36Sopenharmony_ci{ 32662306a36Sopenharmony_ci struct bio_vec bvec; 32762306a36Sopenharmony_ci struct msghdr msg = { 32862306a36Sopenharmony_ci .msg_flags = (MSG_MORE | MSG_DONTWAIT | MSG_SPLICE_PAGES), 32962306a36Sopenharmony_ci }; 33062306a36Sopenharmony_ci struct sock *sk = s->sk; 33162306a36Sopenharmony_ci int i = 0, rv = 0, sent = 0; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci while (size) { 33462306a36Sopenharmony_ci size_t bytes = min_t(size_t, PAGE_SIZE - offset, size); 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci if (size + offset <= PAGE_SIZE) 33762306a36Sopenharmony_ci msg.msg_flags &= ~MSG_MORE; 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci tcp_rate_check_app_limited(sk); 34062306a36Sopenharmony_ci bvec_set_page(&bvec, page[i], bytes, offset); 34162306a36Sopenharmony_ci iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, size); 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_citry_page_again: 34462306a36Sopenharmony_ci lock_sock(sk); 34562306a36Sopenharmony_ci rv = tcp_sendmsg_locked(sk, &msg, size); 34662306a36Sopenharmony_ci release_sock(sk); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci if (rv > 0) { 34962306a36Sopenharmony_ci size -= rv; 35062306a36Sopenharmony_ci sent += rv; 35162306a36Sopenharmony_ci if (rv != bytes) { 35262306a36Sopenharmony_ci offset += rv; 35362306a36Sopenharmony_ci bytes -= rv; 35462306a36Sopenharmony_ci goto try_page_again; 35562306a36Sopenharmony_ci } 35662306a36Sopenharmony_ci offset = 0; 35762306a36Sopenharmony_ci } else { 35862306a36Sopenharmony_ci if (rv == -EAGAIN || rv == 0) 35962306a36Sopenharmony_ci break; 36062306a36Sopenharmony_ci return rv; 36162306a36Sopenharmony_ci } 36262306a36Sopenharmony_ci i++; 36362306a36Sopenharmony_ci } 36462306a36Sopenharmony_ci return sent; 36562306a36Sopenharmony_ci} 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci/* 36862306a36Sopenharmony_ci * siw_0copy_tx() 36962306a36Sopenharmony_ci * 37062306a36Sopenharmony_ci * Pushes list of pages to TCP socket. If pages from multiple 37162306a36Sopenharmony_ci * SGE's, all referenced pages of each SGE are pushed in one 37262306a36Sopenharmony_ci * shot. 37362306a36Sopenharmony_ci */ 37462306a36Sopenharmony_cistatic int siw_0copy_tx(struct socket *s, struct page **page, 37562306a36Sopenharmony_ci struct siw_sge *sge, unsigned int offset, 37662306a36Sopenharmony_ci unsigned int size) 37762306a36Sopenharmony_ci{ 37862306a36Sopenharmony_ci int i = 0, sent = 0, rv; 37962306a36Sopenharmony_ci int sge_bytes = min(sge->length - offset, size); 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci offset = (sge->laddr + offset) & ~PAGE_MASK; 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci while (sent != size) { 38462306a36Sopenharmony_ci rv = siw_tcp_sendpages(s, &page[i], offset, sge_bytes); 38562306a36Sopenharmony_ci if (rv >= 0) { 38662306a36Sopenharmony_ci sent += rv; 38762306a36Sopenharmony_ci if (size == sent || sge_bytes > rv) 38862306a36Sopenharmony_ci break; 38962306a36Sopenharmony_ci 39062306a36Sopenharmony_ci i += PAGE_ALIGN(sge_bytes + offset) >> PAGE_SHIFT; 39162306a36Sopenharmony_ci sge++; 39262306a36Sopenharmony_ci sge_bytes = min(sge->length, size - sent); 39362306a36Sopenharmony_ci offset = sge->laddr & ~PAGE_MASK; 39462306a36Sopenharmony_ci } else { 39562306a36Sopenharmony_ci sent = rv; 39662306a36Sopenharmony_ci break; 39762306a36Sopenharmony_ci } 39862306a36Sopenharmony_ci } 39962306a36Sopenharmony_ci return sent; 40062306a36Sopenharmony_ci} 40162306a36Sopenharmony_ci 40262306a36Sopenharmony_ci#define MAX_TRAILER (MPA_CRC_SIZE + 4) 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_cistatic void siw_unmap_pages(struct kvec *iov, unsigned long kmap_mask, int len) 40562306a36Sopenharmony_ci{ 40662306a36Sopenharmony_ci int i; 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci /* 40962306a36Sopenharmony_ci * Work backwards through the array to honor the kmap_local_page() 41062306a36Sopenharmony_ci * ordering requirements. 41162306a36Sopenharmony_ci */ 41262306a36Sopenharmony_ci for (i = (len-1); i >= 0; i--) { 41362306a36Sopenharmony_ci if (kmap_mask & BIT(i)) { 41462306a36Sopenharmony_ci unsigned long addr = (unsigned long)iov[i].iov_base; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci kunmap_local((void *)(addr & PAGE_MASK)); 41762306a36Sopenharmony_ci } 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci} 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci/* 42262306a36Sopenharmony_ci * siw_tx_hdt() tries to push a complete packet to TCP where all 42362306a36Sopenharmony_ci * packet fragments are referenced by the elements of one iovec. 42462306a36Sopenharmony_ci * For the data portion, each involved page must be referenced by 42562306a36Sopenharmony_ci * one extra element. All sge's data can be non-aligned to page 42662306a36Sopenharmony_ci * boundaries. Two more elements are referencing iWARP header 42762306a36Sopenharmony_ci * and trailer: 42862306a36Sopenharmony_ci * MAX_ARRAY = 64KB/PAGE_SIZE + 1 + (2 * (SIW_MAX_SGE - 1) + HDR + TRL 42962306a36Sopenharmony_ci */ 43062306a36Sopenharmony_ci#define MAX_ARRAY ((0xffff / PAGE_SIZE) + 1 + (2 * (SIW_MAX_SGE - 1) + 2)) 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci/* 43362306a36Sopenharmony_ci * Write out iov referencing hdr, data and trailer of current FPDU. 43462306a36Sopenharmony_ci * Update transmit state dependent on write return status 43562306a36Sopenharmony_ci */ 43662306a36Sopenharmony_cistatic int siw_tx_hdt(struct siw_iwarp_tx *c_tx, struct socket *s) 43762306a36Sopenharmony_ci{ 43862306a36Sopenharmony_ci struct siw_wqe *wqe = &c_tx->wqe_active; 43962306a36Sopenharmony_ci struct siw_sge *sge = &wqe->sqe.sge[c_tx->sge_idx]; 44062306a36Sopenharmony_ci struct kvec iov[MAX_ARRAY]; 44162306a36Sopenharmony_ci struct page *page_array[MAX_ARRAY]; 44262306a36Sopenharmony_ci struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR }; 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci int seg = 0, do_crc = c_tx->do_crc, is_kva = 0, rv; 44562306a36Sopenharmony_ci unsigned int data_len = c_tx->bytes_unsent, hdr_len = 0, trl_len = 0, 44662306a36Sopenharmony_ci sge_off = c_tx->sge_off, sge_idx = c_tx->sge_idx, 44762306a36Sopenharmony_ci pbl_idx = c_tx->pbl_idx; 44862306a36Sopenharmony_ci unsigned long kmap_mask = 0L; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci if (c_tx->state == SIW_SEND_HDR) { 45162306a36Sopenharmony_ci if (c_tx->use_sendpage) { 45262306a36Sopenharmony_ci rv = siw_tx_ctrl(c_tx, s, MSG_DONTWAIT | MSG_MORE); 45362306a36Sopenharmony_ci if (rv) 45462306a36Sopenharmony_ci goto done; 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci c_tx->state = SIW_SEND_DATA; 45762306a36Sopenharmony_ci } else { 45862306a36Sopenharmony_ci iov[0].iov_base = 45962306a36Sopenharmony_ci (char *)&c_tx->pkt.ctrl + c_tx->ctrl_sent; 46062306a36Sopenharmony_ci iov[0].iov_len = hdr_len = 46162306a36Sopenharmony_ci c_tx->ctrl_len - c_tx->ctrl_sent; 46262306a36Sopenharmony_ci seg = 1; 46362306a36Sopenharmony_ci } 46462306a36Sopenharmony_ci } 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci wqe->processed += data_len; 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci while (data_len) { /* walk the list of SGE's */ 46962306a36Sopenharmony_ci unsigned int sge_len = min(sge->length - sge_off, data_len); 47062306a36Sopenharmony_ci unsigned int fp_off = (sge->laddr + sge_off) & ~PAGE_MASK; 47162306a36Sopenharmony_ci struct siw_mem *mem; 47262306a36Sopenharmony_ci 47362306a36Sopenharmony_ci if (!(tx_flags(wqe) & SIW_WQE_INLINE)) { 47462306a36Sopenharmony_ci mem = wqe->mem[sge_idx]; 47562306a36Sopenharmony_ci is_kva = mem->mem_obj == NULL ? 1 : 0; 47662306a36Sopenharmony_ci } else { 47762306a36Sopenharmony_ci is_kva = 1; 47862306a36Sopenharmony_ci } 47962306a36Sopenharmony_ci if (is_kva && !c_tx->use_sendpage) { 48062306a36Sopenharmony_ci /* 48162306a36Sopenharmony_ci * tx from kernel virtual address: either inline data 48262306a36Sopenharmony_ci * or memory region with assigned kernel buffer 48362306a36Sopenharmony_ci */ 48462306a36Sopenharmony_ci iov[seg].iov_base = 48562306a36Sopenharmony_ci ib_virt_dma_to_ptr(sge->laddr + sge_off); 48662306a36Sopenharmony_ci iov[seg].iov_len = sge_len; 48762306a36Sopenharmony_ci 48862306a36Sopenharmony_ci if (do_crc) 48962306a36Sopenharmony_ci crypto_shash_update(c_tx->mpa_crc_hd, 49062306a36Sopenharmony_ci iov[seg].iov_base, 49162306a36Sopenharmony_ci sge_len); 49262306a36Sopenharmony_ci sge_off += sge_len; 49362306a36Sopenharmony_ci data_len -= sge_len; 49462306a36Sopenharmony_ci seg++; 49562306a36Sopenharmony_ci goto sge_done; 49662306a36Sopenharmony_ci } 49762306a36Sopenharmony_ci 49862306a36Sopenharmony_ci while (sge_len) { 49962306a36Sopenharmony_ci size_t plen = min((int)PAGE_SIZE - fp_off, sge_len); 50062306a36Sopenharmony_ci void *kaddr; 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci if (!is_kva) { 50362306a36Sopenharmony_ci struct page *p; 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci if (mem->is_pbl) 50662306a36Sopenharmony_ci p = siw_get_pblpage( 50762306a36Sopenharmony_ci mem, sge->laddr + sge_off, 50862306a36Sopenharmony_ci &pbl_idx); 50962306a36Sopenharmony_ci else 51062306a36Sopenharmony_ci p = siw_get_upage(mem->umem, 51162306a36Sopenharmony_ci sge->laddr + sge_off); 51262306a36Sopenharmony_ci if (unlikely(!p)) { 51362306a36Sopenharmony_ci siw_unmap_pages(iov, kmap_mask, seg); 51462306a36Sopenharmony_ci wqe->processed -= c_tx->bytes_unsent; 51562306a36Sopenharmony_ci rv = -EFAULT; 51662306a36Sopenharmony_ci goto done_crc; 51762306a36Sopenharmony_ci } 51862306a36Sopenharmony_ci page_array[seg] = p; 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci if (!c_tx->use_sendpage) { 52162306a36Sopenharmony_ci void *kaddr = kmap_local_page(p); 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci /* Remember for later kunmap() */ 52462306a36Sopenharmony_ci kmap_mask |= BIT(seg); 52562306a36Sopenharmony_ci iov[seg].iov_base = kaddr + fp_off; 52662306a36Sopenharmony_ci iov[seg].iov_len = plen; 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci if (do_crc) 52962306a36Sopenharmony_ci crypto_shash_update( 53062306a36Sopenharmony_ci c_tx->mpa_crc_hd, 53162306a36Sopenharmony_ci iov[seg].iov_base, 53262306a36Sopenharmony_ci plen); 53362306a36Sopenharmony_ci } else if (do_crc) { 53462306a36Sopenharmony_ci kaddr = kmap_local_page(p); 53562306a36Sopenharmony_ci crypto_shash_update(c_tx->mpa_crc_hd, 53662306a36Sopenharmony_ci kaddr + fp_off, 53762306a36Sopenharmony_ci plen); 53862306a36Sopenharmony_ci kunmap_local(kaddr); 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci } else { 54162306a36Sopenharmony_ci /* 54262306a36Sopenharmony_ci * Cast to an uintptr_t to preserve all 64 bits 54362306a36Sopenharmony_ci * in sge->laddr. 54462306a36Sopenharmony_ci */ 54562306a36Sopenharmony_ci u64 va = sge->laddr + sge_off; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci page_array[seg] = ib_virt_dma_to_page(va); 54862306a36Sopenharmony_ci if (do_crc) 54962306a36Sopenharmony_ci crypto_shash_update( 55062306a36Sopenharmony_ci c_tx->mpa_crc_hd, 55162306a36Sopenharmony_ci ib_virt_dma_to_ptr(va), 55262306a36Sopenharmony_ci plen); 55362306a36Sopenharmony_ci } 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci sge_len -= plen; 55662306a36Sopenharmony_ci sge_off += plen; 55762306a36Sopenharmony_ci data_len -= plen; 55862306a36Sopenharmony_ci fp_off = 0; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci if (++seg >= (int)MAX_ARRAY) { 56162306a36Sopenharmony_ci siw_dbg_qp(tx_qp(c_tx), "to many fragments\n"); 56262306a36Sopenharmony_ci siw_unmap_pages(iov, kmap_mask, seg-1); 56362306a36Sopenharmony_ci wqe->processed -= c_tx->bytes_unsent; 56462306a36Sopenharmony_ci rv = -EMSGSIZE; 56562306a36Sopenharmony_ci goto done_crc; 56662306a36Sopenharmony_ci } 56762306a36Sopenharmony_ci } 56862306a36Sopenharmony_cisge_done: 56962306a36Sopenharmony_ci /* Update SGE variables at end of SGE */ 57062306a36Sopenharmony_ci if (sge_off == sge->length && 57162306a36Sopenharmony_ci (data_len != 0 || wqe->processed < wqe->bytes)) { 57262306a36Sopenharmony_ci sge_idx++; 57362306a36Sopenharmony_ci sge++; 57462306a36Sopenharmony_ci sge_off = 0; 57562306a36Sopenharmony_ci } 57662306a36Sopenharmony_ci } 57762306a36Sopenharmony_ci /* trailer */ 57862306a36Sopenharmony_ci if (likely(c_tx->state != SIW_SEND_TRAILER)) { 57962306a36Sopenharmony_ci iov[seg].iov_base = &c_tx->trailer.pad[4 - c_tx->pad]; 58062306a36Sopenharmony_ci iov[seg].iov_len = trl_len = MAX_TRAILER - (4 - c_tx->pad); 58162306a36Sopenharmony_ci } else { 58262306a36Sopenharmony_ci iov[seg].iov_base = &c_tx->trailer.pad[c_tx->ctrl_sent]; 58362306a36Sopenharmony_ci iov[seg].iov_len = trl_len = MAX_TRAILER - c_tx->ctrl_sent; 58462306a36Sopenharmony_ci } 58562306a36Sopenharmony_ci 58662306a36Sopenharmony_ci if (c_tx->pad) { 58762306a36Sopenharmony_ci *(u32 *)c_tx->trailer.pad = 0; 58862306a36Sopenharmony_ci if (do_crc) 58962306a36Sopenharmony_ci crypto_shash_update(c_tx->mpa_crc_hd, 59062306a36Sopenharmony_ci (u8 *)&c_tx->trailer.crc - c_tx->pad, 59162306a36Sopenharmony_ci c_tx->pad); 59262306a36Sopenharmony_ci } 59362306a36Sopenharmony_ci if (!c_tx->mpa_crc_hd) 59462306a36Sopenharmony_ci c_tx->trailer.crc = 0; 59562306a36Sopenharmony_ci else if (do_crc) 59662306a36Sopenharmony_ci crypto_shash_final(c_tx->mpa_crc_hd, (u8 *)&c_tx->trailer.crc); 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci data_len = c_tx->bytes_unsent; 59962306a36Sopenharmony_ci 60062306a36Sopenharmony_ci if (c_tx->use_sendpage) { 60162306a36Sopenharmony_ci rv = siw_0copy_tx(s, page_array, &wqe->sqe.sge[c_tx->sge_idx], 60262306a36Sopenharmony_ci c_tx->sge_off, data_len); 60362306a36Sopenharmony_ci if (rv == data_len) { 60462306a36Sopenharmony_ci rv = kernel_sendmsg(s, &msg, &iov[seg], 1, trl_len); 60562306a36Sopenharmony_ci if (rv > 0) 60662306a36Sopenharmony_ci rv += data_len; 60762306a36Sopenharmony_ci else 60862306a36Sopenharmony_ci rv = data_len; 60962306a36Sopenharmony_ci } 61062306a36Sopenharmony_ci } else { 61162306a36Sopenharmony_ci rv = kernel_sendmsg(s, &msg, iov, seg + 1, 61262306a36Sopenharmony_ci hdr_len + data_len + trl_len); 61362306a36Sopenharmony_ci siw_unmap_pages(iov, kmap_mask, seg); 61462306a36Sopenharmony_ci } 61562306a36Sopenharmony_ci if (rv < (int)hdr_len) { 61662306a36Sopenharmony_ci /* Not even complete hdr pushed or negative rv */ 61762306a36Sopenharmony_ci wqe->processed -= data_len; 61862306a36Sopenharmony_ci if (rv >= 0) { 61962306a36Sopenharmony_ci c_tx->ctrl_sent += rv; 62062306a36Sopenharmony_ci rv = -EAGAIN; 62162306a36Sopenharmony_ci } 62262306a36Sopenharmony_ci goto done_crc; 62362306a36Sopenharmony_ci } 62462306a36Sopenharmony_ci rv -= hdr_len; 62562306a36Sopenharmony_ci 62662306a36Sopenharmony_ci if (rv >= (int)data_len) { 62762306a36Sopenharmony_ci /* all user data pushed to TCP or no data to push */ 62862306a36Sopenharmony_ci if (data_len > 0 && wqe->processed < wqe->bytes) { 62962306a36Sopenharmony_ci /* Save the current state for next tx */ 63062306a36Sopenharmony_ci c_tx->sge_idx = sge_idx; 63162306a36Sopenharmony_ci c_tx->sge_off = sge_off; 63262306a36Sopenharmony_ci c_tx->pbl_idx = pbl_idx; 63362306a36Sopenharmony_ci } 63462306a36Sopenharmony_ci rv -= data_len; 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci if (rv == trl_len) /* all pushed */ 63762306a36Sopenharmony_ci rv = 0; 63862306a36Sopenharmony_ci else { 63962306a36Sopenharmony_ci c_tx->state = SIW_SEND_TRAILER; 64062306a36Sopenharmony_ci c_tx->ctrl_len = MAX_TRAILER; 64162306a36Sopenharmony_ci c_tx->ctrl_sent = rv + 4 - c_tx->pad; 64262306a36Sopenharmony_ci c_tx->bytes_unsent = 0; 64362306a36Sopenharmony_ci rv = -EAGAIN; 64462306a36Sopenharmony_ci } 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci } else if (data_len > 0) { 64762306a36Sopenharmony_ci /* Maybe some user data pushed to TCP */ 64862306a36Sopenharmony_ci c_tx->state = SIW_SEND_DATA; 64962306a36Sopenharmony_ci wqe->processed -= data_len - rv; 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci if (rv) { 65262306a36Sopenharmony_ci /* 65362306a36Sopenharmony_ci * Some bytes out. Recompute tx state based 65462306a36Sopenharmony_ci * on old state and bytes pushed 65562306a36Sopenharmony_ci */ 65662306a36Sopenharmony_ci unsigned int sge_unsent; 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci c_tx->bytes_unsent -= rv; 65962306a36Sopenharmony_ci sge = &wqe->sqe.sge[c_tx->sge_idx]; 66062306a36Sopenharmony_ci sge_unsent = sge->length - c_tx->sge_off; 66162306a36Sopenharmony_ci 66262306a36Sopenharmony_ci while (sge_unsent <= rv) { 66362306a36Sopenharmony_ci rv -= sge_unsent; 66462306a36Sopenharmony_ci c_tx->sge_idx++; 66562306a36Sopenharmony_ci c_tx->sge_off = 0; 66662306a36Sopenharmony_ci sge++; 66762306a36Sopenharmony_ci sge_unsent = sge->length; 66862306a36Sopenharmony_ci } 66962306a36Sopenharmony_ci c_tx->sge_off += rv; 67062306a36Sopenharmony_ci } 67162306a36Sopenharmony_ci rv = -EAGAIN; 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_cidone_crc: 67462306a36Sopenharmony_ci c_tx->do_crc = 0; 67562306a36Sopenharmony_cidone: 67662306a36Sopenharmony_ci return rv; 67762306a36Sopenharmony_ci} 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_cistatic void siw_update_tcpseg(struct siw_iwarp_tx *c_tx, 68062306a36Sopenharmony_ci struct socket *s) 68162306a36Sopenharmony_ci{ 68262306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(s->sk); 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci if (tp->gso_segs) { 68562306a36Sopenharmony_ci if (c_tx->gso_seg_limit == 0) 68662306a36Sopenharmony_ci c_tx->tcp_seglen = tp->mss_cache * tp->gso_segs; 68762306a36Sopenharmony_ci else 68862306a36Sopenharmony_ci c_tx->tcp_seglen = 68962306a36Sopenharmony_ci tp->mss_cache * 69062306a36Sopenharmony_ci min_t(u16, c_tx->gso_seg_limit, tp->gso_segs); 69162306a36Sopenharmony_ci } else { 69262306a36Sopenharmony_ci c_tx->tcp_seglen = tp->mss_cache; 69362306a36Sopenharmony_ci } 69462306a36Sopenharmony_ci /* Loopback may give odd numbers */ 69562306a36Sopenharmony_ci c_tx->tcp_seglen &= 0xfffffff8; 69662306a36Sopenharmony_ci} 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci/* 69962306a36Sopenharmony_ci * siw_prepare_fpdu() 70062306a36Sopenharmony_ci * 70162306a36Sopenharmony_ci * Prepares transmit context to send out one FPDU if FPDU will contain 70262306a36Sopenharmony_ci * user data and user data are not immediate data. 70362306a36Sopenharmony_ci * Computes maximum FPDU length to fill up TCP MSS if possible. 70462306a36Sopenharmony_ci * 70562306a36Sopenharmony_ci * @qp: QP from which to transmit 70662306a36Sopenharmony_ci * @wqe: Current WQE causing transmission 70762306a36Sopenharmony_ci * 70862306a36Sopenharmony_ci * TODO: Take into account real available sendspace on socket 70962306a36Sopenharmony_ci * to avoid header misalignment due to send pausing within 71062306a36Sopenharmony_ci * fpdu transmission 71162306a36Sopenharmony_ci */ 71262306a36Sopenharmony_cistatic void siw_prepare_fpdu(struct siw_qp *qp, struct siw_wqe *wqe) 71362306a36Sopenharmony_ci{ 71462306a36Sopenharmony_ci struct siw_iwarp_tx *c_tx = &qp->tx_ctx; 71562306a36Sopenharmony_ci int data_len; 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci c_tx->ctrl_len = 71862306a36Sopenharmony_ci iwarp_pktinfo[__rdmap_get_opcode(&c_tx->pkt.ctrl)].hdr_len; 71962306a36Sopenharmony_ci c_tx->ctrl_sent = 0; 72062306a36Sopenharmony_ci 72162306a36Sopenharmony_ci /* 72262306a36Sopenharmony_ci * Update target buffer offset if any 72362306a36Sopenharmony_ci */ 72462306a36Sopenharmony_ci if (!(c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)) 72562306a36Sopenharmony_ci /* Untagged message */ 72662306a36Sopenharmony_ci c_tx->pkt.c_untagged.ddp_mo = cpu_to_be32(wqe->processed); 72762306a36Sopenharmony_ci else /* Tagged message */ 72862306a36Sopenharmony_ci c_tx->pkt.c_tagged.ddp_to = 72962306a36Sopenharmony_ci cpu_to_be64(wqe->sqe.raddr + wqe->processed); 73062306a36Sopenharmony_ci 73162306a36Sopenharmony_ci data_len = wqe->bytes - wqe->processed; 73262306a36Sopenharmony_ci if (data_len + c_tx->ctrl_len + MPA_CRC_SIZE > c_tx->tcp_seglen) { 73362306a36Sopenharmony_ci /* Trim DDP payload to fit into current TCP segment */ 73462306a36Sopenharmony_ci data_len = c_tx->tcp_seglen - (c_tx->ctrl_len + MPA_CRC_SIZE); 73562306a36Sopenharmony_ci c_tx->pkt.ctrl.ddp_rdmap_ctrl &= ~DDP_FLAG_LAST; 73662306a36Sopenharmony_ci c_tx->pad = 0; 73762306a36Sopenharmony_ci } else { 73862306a36Sopenharmony_ci c_tx->pkt.ctrl.ddp_rdmap_ctrl |= DDP_FLAG_LAST; 73962306a36Sopenharmony_ci c_tx->pad = -data_len & 0x3; 74062306a36Sopenharmony_ci } 74162306a36Sopenharmony_ci c_tx->bytes_unsent = data_len; 74262306a36Sopenharmony_ci 74362306a36Sopenharmony_ci c_tx->pkt.ctrl.mpa_len = 74462306a36Sopenharmony_ci htons(c_tx->ctrl_len + data_len - MPA_HDR_SIZE); 74562306a36Sopenharmony_ci 74662306a36Sopenharmony_ci /* 74762306a36Sopenharmony_ci * Init MPA CRC computation 74862306a36Sopenharmony_ci */ 74962306a36Sopenharmony_ci if (c_tx->mpa_crc_hd) { 75062306a36Sopenharmony_ci crypto_shash_init(c_tx->mpa_crc_hd); 75162306a36Sopenharmony_ci crypto_shash_update(c_tx->mpa_crc_hd, (u8 *)&c_tx->pkt, 75262306a36Sopenharmony_ci c_tx->ctrl_len); 75362306a36Sopenharmony_ci c_tx->do_crc = 1; 75462306a36Sopenharmony_ci } 75562306a36Sopenharmony_ci} 75662306a36Sopenharmony_ci 75762306a36Sopenharmony_ci/* 75862306a36Sopenharmony_ci * siw_check_sgl_tx() 75962306a36Sopenharmony_ci * 76062306a36Sopenharmony_ci * Check permissions for a list of SGE's (SGL). 76162306a36Sopenharmony_ci * A successful check will have all memory referenced 76262306a36Sopenharmony_ci * for transmission resolved and assigned to the WQE. 76362306a36Sopenharmony_ci * 76462306a36Sopenharmony_ci * @pd: Protection Domain SGL should belong to 76562306a36Sopenharmony_ci * @wqe: WQE to be checked 76662306a36Sopenharmony_ci * @perms: requested access permissions 76762306a36Sopenharmony_ci * 76862306a36Sopenharmony_ci */ 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_cistatic int siw_check_sgl_tx(struct ib_pd *pd, struct siw_wqe *wqe, 77162306a36Sopenharmony_ci enum ib_access_flags perms) 77262306a36Sopenharmony_ci{ 77362306a36Sopenharmony_ci struct siw_sge *sge = &wqe->sqe.sge[0]; 77462306a36Sopenharmony_ci int i, len, num_sge = wqe->sqe.num_sge; 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci if (unlikely(num_sge > SIW_MAX_SGE)) 77762306a36Sopenharmony_ci return -EINVAL; 77862306a36Sopenharmony_ci 77962306a36Sopenharmony_ci for (i = 0, len = 0; num_sge; num_sge--, i++, sge++) { 78062306a36Sopenharmony_ci /* 78162306a36Sopenharmony_ci * rdma verbs: do not check stag for a zero length sge 78262306a36Sopenharmony_ci */ 78362306a36Sopenharmony_ci if (sge->length) { 78462306a36Sopenharmony_ci int rv = siw_check_sge(pd, sge, &wqe->mem[i], perms, 0, 78562306a36Sopenharmony_ci sge->length); 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_ci if (unlikely(rv != E_ACCESS_OK)) 78862306a36Sopenharmony_ci return rv; 78962306a36Sopenharmony_ci } 79062306a36Sopenharmony_ci len += sge->length; 79162306a36Sopenharmony_ci } 79262306a36Sopenharmony_ci return len; 79362306a36Sopenharmony_ci} 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci/* 79662306a36Sopenharmony_ci * siw_qp_sq_proc_tx() 79762306a36Sopenharmony_ci * 79862306a36Sopenharmony_ci * Process one WQE which needs transmission on the wire. 79962306a36Sopenharmony_ci */ 80062306a36Sopenharmony_cistatic int siw_qp_sq_proc_tx(struct siw_qp *qp, struct siw_wqe *wqe) 80162306a36Sopenharmony_ci{ 80262306a36Sopenharmony_ci struct siw_iwarp_tx *c_tx = &qp->tx_ctx; 80362306a36Sopenharmony_ci struct socket *s = qp->attrs.sk; 80462306a36Sopenharmony_ci int rv = 0, burst_len = qp->tx_ctx.burst; 80562306a36Sopenharmony_ci enum rdmap_ecode ecode = RDMAP_ECODE_CATASTROPHIC_STREAM; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci if (unlikely(wqe->wr_status == SIW_WR_IDLE)) 80862306a36Sopenharmony_ci return 0; 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci if (!burst_len) 81162306a36Sopenharmony_ci burst_len = SQ_USER_MAXBURST; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci if (wqe->wr_status == SIW_WR_QUEUED) { 81462306a36Sopenharmony_ci if (!(wqe->sqe.flags & SIW_WQE_INLINE)) { 81562306a36Sopenharmony_ci if (tx_type(wqe) == SIW_OP_READ_RESPONSE) 81662306a36Sopenharmony_ci wqe->sqe.num_sge = 1; 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci if (tx_type(wqe) != SIW_OP_READ && 81962306a36Sopenharmony_ci tx_type(wqe) != SIW_OP_READ_LOCAL_INV) { 82062306a36Sopenharmony_ci /* 82162306a36Sopenharmony_ci * Reference memory to be tx'd w/o checking 82262306a36Sopenharmony_ci * access for LOCAL_READ permission, since 82362306a36Sopenharmony_ci * not defined in RDMA core. 82462306a36Sopenharmony_ci */ 82562306a36Sopenharmony_ci rv = siw_check_sgl_tx(qp->pd, wqe, 0); 82662306a36Sopenharmony_ci if (rv < 0) { 82762306a36Sopenharmony_ci if (tx_type(wqe) == 82862306a36Sopenharmony_ci SIW_OP_READ_RESPONSE) 82962306a36Sopenharmony_ci ecode = siw_rdmap_error(-rv); 83062306a36Sopenharmony_ci rv = -EINVAL; 83162306a36Sopenharmony_ci goto tx_error; 83262306a36Sopenharmony_ci } 83362306a36Sopenharmony_ci wqe->bytes = rv; 83462306a36Sopenharmony_ci } else { 83562306a36Sopenharmony_ci wqe->bytes = 0; 83662306a36Sopenharmony_ci } 83762306a36Sopenharmony_ci } else { 83862306a36Sopenharmony_ci wqe->bytes = wqe->sqe.sge[0].length; 83962306a36Sopenharmony_ci if (!rdma_is_kernel_res(&qp->base_qp.res)) { 84062306a36Sopenharmony_ci if (wqe->bytes > SIW_MAX_INLINE) { 84162306a36Sopenharmony_ci rv = -EINVAL; 84262306a36Sopenharmony_ci goto tx_error; 84362306a36Sopenharmony_ci } 84462306a36Sopenharmony_ci wqe->sqe.sge[0].laddr = 84562306a36Sopenharmony_ci (u64)(uintptr_t)&wqe->sqe.sge[1]; 84662306a36Sopenharmony_ci } 84762306a36Sopenharmony_ci } 84862306a36Sopenharmony_ci wqe->wr_status = SIW_WR_INPROGRESS; 84962306a36Sopenharmony_ci wqe->processed = 0; 85062306a36Sopenharmony_ci 85162306a36Sopenharmony_ci siw_update_tcpseg(c_tx, s); 85262306a36Sopenharmony_ci 85362306a36Sopenharmony_ci rv = siw_qp_prepare_tx(c_tx); 85462306a36Sopenharmony_ci if (rv == PKT_FRAGMENTED) { 85562306a36Sopenharmony_ci c_tx->state = SIW_SEND_HDR; 85662306a36Sopenharmony_ci siw_prepare_fpdu(qp, wqe); 85762306a36Sopenharmony_ci } else if (rv == PKT_COMPLETE) { 85862306a36Sopenharmony_ci c_tx->state = SIW_SEND_SHORT_FPDU; 85962306a36Sopenharmony_ci } else { 86062306a36Sopenharmony_ci goto tx_error; 86162306a36Sopenharmony_ci } 86262306a36Sopenharmony_ci } 86362306a36Sopenharmony_ci 86462306a36Sopenharmony_cinext_segment: 86562306a36Sopenharmony_ci siw_dbg_qp(qp, "wr type %d, state %d, data %u, sent %u, id %llx\n", 86662306a36Sopenharmony_ci tx_type(wqe), wqe->wr_status, wqe->bytes, wqe->processed, 86762306a36Sopenharmony_ci wqe->sqe.id); 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci if (--burst_len == 0) { 87062306a36Sopenharmony_ci rv = -EINPROGRESS; 87162306a36Sopenharmony_ci goto tx_done; 87262306a36Sopenharmony_ci } 87362306a36Sopenharmony_ci if (c_tx->state == SIW_SEND_SHORT_FPDU) { 87462306a36Sopenharmony_ci enum siw_opcode tx_type = tx_type(wqe); 87562306a36Sopenharmony_ci unsigned int msg_flags; 87662306a36Sopenharmony_ci 87762306a36Sopenharmony_ci if (siw_sq_empty(qp) || !siw_tcp_nagle || burst_len == 1) 87862306a36Sopenharmony_ci /* 87962306a36Sopenharmony_ci * End current TCP segment, if SQ runs empty, 88062306a36Sopenharmony_ci * or siw_tcp_nagle is not set, or we bail out 88162306a36Sopenharmony_ci * soon due to no burst credit left. 88262306a36Sopenharmony_ci */ 88362306a36Sopenharmony_ci msg_flags = MSG_DONTWAIT; 88462306a36Sopenharmony_ci else 88562306a36Sopenharmony_ci msg_flags = MSG_DONTWAIT | MSG_MORE; 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci rv = siw_tx_ctrl(c_tx, s, msg_flags); 88862306a36Sopenharmony_ci 88962306a36Sopenharmony_ci if (!rv && tx_type != SIW_OP_READ && 89062306a36Sopenharmony_ci tx_type != SIW_OP_READ_LOCAL_INV) 89162306a36Sopenharmony_ci wqe->processed = wqe->bytes; 89262306a36Sopenharmony_ci 89362306a36Sopenharmony_ci goto tx_done; 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci } else { 89662306a36Sopenharmony_ci rv = siw_tx_hdt(c_tx, s); 89762306a36Sopenharmony_ci } 89862306a36Sopenharmony_ci if (!rv) { 89962306a36Sopenharmony_ci /* 90062306a36Sopenharmony_ci * One segment sent. Processing completed if last 90162306a36Sopenharmony_ci * segment, Do next segment otherwise. 90262306a36Sopenharmony_ci */ 90362306a36Sopenharmony_ci if (unlikely(c_tx->tx_suspend)) { 90462306a36Sopenharmony_ci /* 90562306a36Sopenharmony_ci * Verbs, 6.4.: Try stopping sending after a full 90662306a36Sopenharmony_ci * DDP segment if the connection goes down 90762306a36Sopenharmony_ci * (== peer halfclose) 90862306a36Sopenharmony_ci */ 90962306a36Sopenharmony_ci rv = -ECONNABORTED; 91062306a36Sopenharmony_ci goto tx_done; 91162306a36Sopenharmony_ci } 91262306a36Sopenharmony_ci if (c_tx->pkt.ctrl.ddp_rdmap_ctrl & DDP_FLAG_LAST) { 91362306a36Sopenharmony_ci siw_dbg_qp(qp, "WQE completed\n"); 91462306a36Sopenharmony_ci goto tx_done; 91562306a36Sopenharmony_ci } 91662306a36Sopenharmony_ci c_tx->state = SIW_SEND_HDR; 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci siw_update_tcpseg(c_tx, s); 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci siw_prepare_fpdu(qp, wqe); 92162306a36Sopenharmony_ci goto next_segment; 92262306a36Sopenharmony_ci } 92362306a36Sopenharmony_citx_done: 92462306a36Sopenharmony_ci qp->tx_ctx.burst = burst_len; 92562306a36Sopenharmony_ci return rv; 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_citx_error: 92862306a36Sopenharmony_ci if (ecode != RDMAP_ECODE_CATASTROPHIC_STREAM) 92962306a36Sopenharmony_ci siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, 93062306a36Sopenharmony_ci RDMAP_ETYPE_REMOTE_PROTECTION, ecode, 1); 93162306a36Sopenharmony_ci else 93262306a36Sopenharmony_ci siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP, 93362306a36Sopenharmony_ci RDMAP_ETYPE_CATASTROPHIC, 93462306a36Sopenharmony_ci RDMAP_ECODE_UNSPECIFIED, 1); 93562306a36Sopenharmony_ci return rv; 93662306a36Sopenharmony_ci} 93762306a36Sopenharmony_ci 93862306a36Sopenharmony_cistatic int siw_fastreg_mr(struct ib_pd *pd, struct siw_sqe *sqe) 93962306a36Sopenharmony_ci{ 94062306a36Sopenharmony_ci struct ib_mr *base_mr = (struct ib_mr *)(uintptr_t)sqe->base_mr; 94162306a36Sopenharmony_ci struct siw_device *sdev = to_siw_dev(pd->device); 94262306a36Sopenharmony_ci struct siw_mem *mem; 94362306a36Sopenharmony_ci int rv = 0; 94462306a36Sopenharmony_ci 94562306a36Sopenharmony_ci siw_dbg_pd(pd, "STag 0x%08x\n", sqe->rkey); 94662306a36Sopenharmony_ci 94762306a36Sopenharmony_ci if (unlikely(!base_mr)) { 94862306a36Sopenharmony_ci pr_warn("siw: fastreg: STag 0x%08x unknown\n", sqe->rkey); 94962306a36Sopenharmony_ci return -EINVAL; 95062306a36Sopenharmony_ci } 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci if (unlikely(base_mr->rkey >> 8 != sqe->rkey >> 8)) { 95362306a36Sopenharmony_ci pr_warn("siw: fastreg: STag 0x%08x: bad MR\n", sqe->rkey); 95462306a36Sopenharmony_ci return -EINVAL; 95562306a36Sopenharmony_ci } 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci mem = siw_mem_id2obj(sdev, sqe->rkey >> 8); 95862306a36Sopenharmony_ci if (unlikely(!mem)) { 95962306a36Sopenharmony_ci pr_warn("siw: fastreg: STag 0x%08x unknown\n", sqe->rkey); 96062306a36Sopenharmony_ci return -EINVAL; 96162306a36Sopenharmony_ci } 96262306a36Sopenharmony_ci 96362306a36Sopenharmony_ci if (unlikely(mem->pd != pd)) { 96462306a36Sopenharmony_ci pr_warn("siw: fastreg: PD mismatch\n"); 96562306a36Sopenharmony_ci rv = -EINVAL; 96662306a36Sopenharmony_ci goto out; 96762306a36Sopenharmony_ci } 96862306a36Sopenharmony_ci if (unlikely(mem->stag_valid)) { 96962306a36Sopenharmony_ci pr_warn("siw: fastreg: STag 0x%08x already valid\n", sqe->rkey); 97062306a36Sopenharmony_ci rv = -EINVAL; 97162306a36Sopenharmony_ci goto out; 97262306a36Sopenharmony_ci } 97362306a36Sopenharmony_ci /* Refresh STag since user may have changed key part */ 97462306a36Sopenharmony_ci mem->stag = sqe->rkey; 97562306a36Sopenharmony_ci mem->perms = sqe->access; 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci siw_dbg_mem(mem, "STag 0x%08x now valid\n", sqe->rkey); 97862306a36Sopenharmony_ci mem->va = base_mr->iova; 97962306a36Sopenharmony_ci mem->stag_valid = 1; 98062306a36Sopenharmony_ciout: 98162306a36Sopenharmony_ci siw_mem_put(mem); 98262306a36Sopenharmony_ci return rv; 98362306a36Sopenharmony_ci} 98462306a36Sopenharmony_ci 98562306a36Sopenharmony_cistatic int siw_qp_sq_proc_local(struct siw_qp *qp, struct siw_wqe *wqe) 98662306a36Sopenharmony_ci{ 98762306a36Sopenharmony_ci int rv; 98862306a36Sopenharmony_ci 98962306a36Sopenharmony_ci switch (tx_type(wqe)) { 99062306a36Sopenharmony_ci case SIW_OP_REG_MR: 99162306a36Sopenharmony_ci rv = siw_fastreg_mr(qp->pd, &wqe->sqe); 99262306a36Sopenharmony_ci break; 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ci case SIW_OP_INVAL_STAG: 99562306a36Sopenharmony_ci rv = siw_invalidate_stag(qp->pd, wqe->sqe.rkey); 99662306a36Sopenharmony_ci break; 99762306a36Sopenharmony_ci 99862306a36Sopenharmony_ci default: 99962306a36Sopenharmony_ci rv = -EINVAL; 100062306a36Sopenharmony_ci } 100162306a36Sopenharmony_ci return rv; 100262306a36Sopenharmony_ci} 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci/* 100562306a36Sopenharmony_ci * siw_qp_sq_process() 100662306a36Sopenharmony_ci * 100762306a36Sopenharmony_ci * Core TX path routine for RDMAP/DDP/MPA using a TCP kernel socket. 100862306a36Sopenharmony_ci * Sends RDMAP payload for the current SQ WR @wqe of @qp in one or more 100962306a36Sopenharmony_ci * MPA FPDUs, each containing a DDP segment. 101062306a36Sopenharmony_ci * 101162306a36Sopenharmony_ci * SQ processing may occur in user context as a result of posting 101262306a36Sopenharmony_ci * new WQE's or from siw_sq_work_handler() context. Processing in 101362306a36Sopenharmony_ci * user context is limited to non-kernel verbs users. 101462306a36Sopenharmony_ci * 101562306a36Sopenharmony_ci * SQ processing may get paused anytime, possibly in the middle of a WR 101662306a36Sopenharmony_ci * or FPDU, if insufficient send space is available. SQ processing 101762306a36Sopenharmony_ci * gets resumed from siw_sq_work_handler(), if send space becomes 101862306a36Sopenharmony_ci * available again. 101962306a36Sopenharmony_ci * 102062306a36Sopenharmony_ci * Must be called with the QP state read-locked. 102162306a36Sopenharmony_ci * 102262306a36Sopenharmony_ci * Note: 102362306a36Sopenharmony_ci * An outbound RREQ can be satisfied by the corresponding RRESP 102462306a36Sopenharmony_ci * _before_ it gets assigned to the ORQ. This happens regularly 102562306a36Sopenharmony_ci * in RDMA READ via loopback case. Since both outbound RREQ and 102662306a36Sopenharmony_ci * inbound RRESP can be handled by the same CPU, locking the ORQ 102762306a36Sopenharmony_ci * is dead-lock prone and thus not an option. With that, the 102862306a36Sopenharmony_ci * RREQ gets assigned to the ORQ _before_ being sent - see 102962306a36Sopenharmony_ci * siw_activate_tx() - and pulled back in case of send failure. 103062306a36Sopenharmony_ci */ 103162306a36Sopenharmony_ciint siw_qp_sq_process(struct siw_qp *qp) 103262306a36Sopenharmony_ci{ 103362306a36Sopenharmony_ci struct siw_wqe *wqe = tx_wqe(qp); 103462306a36Sopenharmony_ci enum siw_opcode tx_type; 103562306a36Sopenharmony_ci unsigned long flags; 103662306a36Sopenharmony_ci int rv = 0; 103762306a36Sopenharmony_ci 103862306a36Sopenharmony_ci siw_dbg_qp(qp, "enter for type %d\n", tx_type(wqe)); 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_cinext_wqe: 104162306a36Sopenharmony_ci /* 104262306a36Sopenharmony_ci * Stop QP processing if SQ state changed 104362306a36Sopenharmony_ci */ 104462306a36Sopenharmony_ci if (unlikely(qp->tx_ctx.tx_suspend)) { 104562306a36Sopenharmony_ci siw_dbg_qp(qp, "tx suspended\n"); 104662306a36Sopenharmony_ci goto done; 104762306a36Sopenharmony_ci } 104862306a36Sopenharmony_ci tx_type = tx_type(wqe); 104962306a36Sopenharmony_ci 105062306a36Sopenharmony_ci if (tx_type <= SIW_OP_READ_RESPONSE) 105162306a36Sopenharmony_ci rv = siw_qp_sq_proc_tx(qp, wqe); 105262306a36Sopenharmony_ci else 105362306a36Sopenharmony_ci rv = siw_qp_sq_proc_local(qp, wqe); 105462306a36Sopenharmony_ci 105562306a36Sopenharmony_ci if (!rv) { 105662306a36Sopenharmony_ci /* 105762306a36Sopenharmony_ci * WQE processing done 105862306a36Sopenharmony_ci */ 105962306a36Sopenharmony_ci switch (tx_type) { 106062306a36Sopenharmony_ci case SIW_OP_SEND: 106162306a36Sopenharmony_ci case SIW_OP_SEND_REMOTE_INV: 106262306a36Sopenharmony_ci case SIW_OP_WRITE: 106362306a36Sopenharmony_ci siw_wqe_put_mem(wqe, tx_type); 106462306a36Sopenharmony_ci fallthrough; 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci case SIW_OP_INVAL_STAG: 106762306a36Sopenharmony_ci case SIW_OP_REG_MR: 106862306a36Sopenharmony_ci if (tx_flags(wqe) & SIW_WQE_SIGNALLED) 106962306a36Sopenharmony_ci siw_sqe_complete(qp, &wqe->sqe, wqe->bytes, 107062306a36Sopenharmony_ci SIW_WC_SUCCESS); 107162306a36Sopenharmony_ci break; 107262306a36Sopenharmony_ci 107362306a36Sopenharmony_ci case SIW_OP_READ: 107462306a36Sopenharmony_ci case SIW_OP_READ_LOCAL_INV: 107562306a36Sopenharmony_ci /* 107662306a36Sopenharmony_ci * already enqueued to ORQ queue 107762306a36Sopenharmony_ci */ 107862306a36Sopenharmony_ci break; 107962306a36Sopenharmony_ci 108062306a36Sopenharmony_ci case SIW_OP_READ_RESPONSE: 108162306a36Sopenharmony_ci siw_wqe_put_mem(wqe, tx_type); 108262306a36Sopenharmony_ci break; 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci default: 108562306a36Sopenharmony_ci WARN(1, "undefined WQE type %d\n", tx_type); 108662306a36Sopenharmony_ci rv = -EINVAL; 108762306a36Sopenharmony_ci goto done; 108862306a36Sopenharmony_ci } 108962306a36Sopenharmony_ci 109062306a36Sopenharmony_ci spin_lock_irqsave(&qp->sq_lock, flags); 109162306a36Sopenharmony_ci wqe->wr_status = SIW_WR_IDLE; 109262306a36Sopenharmony_ci rv = siw_activate_tx(qp); 109362306a36Sopenharmony_ci spin_unlock_irqrestore(&qp->sq_lock, flags); 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci if (rv <= 0) 109662306a36Sopenharmony_ci goto done; 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci goto next_wqe; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci } else if (rv == -EAGAIN) { 110162306a36Sopenharmony_ci siw_dbg_qp(qp, "sq paused: hd/tr %d of %d, data %d\n", 110262306a36Sopenharmony_ci qp->tx_ctx.ctrl_sent, qp->tx_ctx.ctrl_len, 110362306a36Sopenharmony_ci qp->tx_ctx.bytes_unsent); 110462306a36Sopenharmony_ci rv = 0; 110562306a36Sopenharmony_ci goto done; 110662306a36Sopenharmony_ci } else if (rv == -EINPROGRESS) { 110762306a36Sopenharmony_ci rv = siw_sq_start(qp); 110862306a36Sopenharmony_ci goto done; 110962306a36Sopenharmony_ci } else { 111062306a36Sopenharmony_ci /* 111162306a36Sopenharmony_ci * WQE processing failed. 111262306a36Sopenharmony_ci * Verbs 8.3.2: 111362306a36Sopenharmony_ci * o It turns any WQE into a signalled WQE. 111462306a36Sopenharmony_ci * o Local catastrophic error must be surfaced 111562306a36Sopenharmony_ci * o QP must be moved into Terminate state: done by code 111662306a36Sopenharmony_ci * doing socket state change processing 111762306a36Sopenharmony_ci * 111862306a36Sopenharmony_ci * o TODO: Termination message must be sent. 111962306a36Sopenharmony_ci * o TODO: Implement more precise work completion errors, 112062306a36Sopenharmony_ci * see enum ib_wc_status in ib_verbs.h 112162306a36Sopenharmony_ci */ 112262306a36Sopenharmony_ci siw_dbg_qp(qp, "wqe type %d processing failed: %d\n", 112362306a36Sopenharmony_ci tx_type(wqe), rv); 112462306a36Sopenharmony_ci 112562306a36Sopenharmony_ci spin_lock_irqsave(&qp->sq_lock, flags); 112662306a36Sopenharmony_ci /* 112762306a36Sopenharmony_ci * RREQ may have already been completed by inbound RRESP! 112862306a36Sopenharmony_ci */ 112962306a36Sopenharmony_ci if ((tx_type == SIW_OP_READ || 113062306a36Sopenharmony_ci tx_type == SIW_OP_READ_LOCAL_INV) && qp->attrs.orq_size) { 113162306a36Sopenharmony_ci /* Cleanup pending entry in ORQ */ 113262306a36Sopenharmony_ci qp->orq_put--; 113362306a36Sopenharmony_ci qp->orq[qp->orq_put % qp->attrs.orq_size].flags = 0; 113462306a36Sopenharmony_ci } 113562306a36Sopenharmony_ci spin_unlock_irqrestore(&qp->sq_lock, flags); 113662306a36Sopenharmony_ci /* 113762306a36Sopenharmony_ci * immediately suspends further TX processing 113862306a36Sopenharmony_ci */ 113962306a36Sopenharmony_ci if (!qp->tx_ctx.tx_suspend) 114062306a36Sopenharmony_ci siw_qp_cm_drop(qp, 0); 114162306a36Sopenharmony_ci 114262306a36Sopenharmony_ci switch (tx_type) { 114362306a36Sopenharmony_ci case SIW_OP_SEND: 114462306a36Sopenharmony_ci case SIW_OP_SEND_REMOTE_INV: 114562306a36Sopenharmony_ci case SIW_OP_SEND_WITH_IMM: 114662306a36Sopenharmony_ci case SIW_OP_WRITE: 114762306a36Sopenharmony_ci case SIW_OP_READ: 114862306a36Sopenharmony_ci case SIW_OP_READ_LOCAL_INV: 114962306a36Sopenharmony_ci siw_wqe_put_mem(wqe, tx_type); 115062306a36Sopenharmony_ci fallthrough; 115162306a36Sopenharmony_ci 115262306a36Sopenharmony_ci case SIW_OP_INVAL_STAG: 115362306a36Sopenharmony_ci case SIW_OP_REG_MR: 115462306a36Sopenharmony_ci siw_sqe_complete(qp, &wqe->sqe, wqe->bytes, 115562306a36Sopenharmony_ci SIW_WC_LOC_QP_OP_ERR); 115662306a36Sopenharmony_ci 115762306a36Sopenharmony_ci siw_qp_event(qp, IB_EVENT_QP_FATAL); 115862306a36Sopenharmony_ci 115962306a36Sopenharmony_ci break; 116062306a36Sopenharmony_ci 116162306a36Sopenharmony_ci case SIW_OP_READ_RESPONSE: 116262306a36Sopenharmony_ci siw_dbg_qp(qp, "proc. read.response failed: %d\n", rv); 116362306a36Sopenharmony_ci 116462306a36Sopenharmony_ci siw_qp_event(qp, IB_EVENT_QP_REQ_ERR); 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci siw_wqe_put_mem(wqe, SIW_OP_READ_RESPONSE); 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci break; 116962306a36Sopenharmony_ci 117062306a36Sopenharmony_ci default: 117162306a36Sopenharmony_ci WARN(1, "undefined WQE type %d\n", tx_type); 117262306a36Sopenharmony_ci rv = -EINVAL; 117362306a36Sopenharmony_ci } 117462306a36Sopenharmony_ci wqe->wr_status = SIW_WR_IDLE; 117562306a36Sopenharmony_ci } 117662306a36Sopenharmony_cidone: 117762306a36Sopenharmony_ci return rv; 117862306a36Sopenharmony_ci} 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_cistatic void siw_sq_resume(struct siw_qp *qp) 118162306a36Sopenharmony_ci{ 118262306a36Sopenharmony_ci if (down_read_trylock(&qp->state_lock)) { 118362306a36Sopenharmony_ci if (likely(qp->attrs.state == SIW_QP_STATE_RTS && 118462306a36Sopenharmony_ci !qp->tx_ctx.tx_suspend)) { 118562306a36Sopenharmony_ci int rv = siw_qp_sq_process(qp); 118662306a36Sopenharmony_ci 118762306a36Sopenharmony_ci up_read(&qp->state_lock); 118862306a36Sopenharmony_ci 118962306a36Sopenharmony_ci if (unlikely(rv < 0)) { 119062306a36Sopenharmony_ci siw_dbg_qp(qp, "SQ task failed: err %d\n", rv); 119162306a36Sopenharmony_ci 119262306a36Sopenharmony_ci if (!qp->tx_ctx.tx_suspend) 119362306a36Sopenharmony_ci siw_qp_cm_drop(qp, 0); 119462306a36Sopenharmony_ci } 119562306a36Sopenharmony_ci } else { 119662306a36Sopenharmony_ci up_read(&qp->state_lock); 119762306a36Sopenharmony_ci } 119862306a36Sopenharmony_ci } else { 119962306a36Sopenharmony_ci siw_dbg_qp(qp, "Resume SQ while QP locked\n"); 120062306a36Sopenharmony_ci } 120162306a36Sopenharmony_ci siw_qp_put(qp); 120262306a36Sopenharmony_ci} 120362306a36Sopenharmony_ci 120462306a36Sopenharmony_cistruct tx_task_t { 120562306a36Sopenharmony_ci struct llist_head active; 120662306a36Sopenharmony_ci wait_queue_head_t waiting; 120762306a36Sopenharmony_ci}; 120862306a36Sopenharmony_ci 120962306a36Sopenharmony_cistatic DEFINE_PER_CPU(struct tx_task_t, siw_tx_task_g); 121062306a36Sopenharmony_ci 121162306a36Sopenharmony_ciint siw_create_tx_threads(void) 121262306a36Sopenharmony_ci{ 121362306a36Sopenharmony_ci int cpu, assigned = 0; 121462306a36Sopenharmony_ci 121562306a36Sopenharmony_ci for_each_online_cpu(cpu) { 121662306a36Sopenharmony_ci struct tx_task_t *tx_task; 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci /* Skip HT cores */ 121962306a36Sopenharmony_ci if (cpu % cpumask_weight(topology_sibling_cpumask(cpu))) 122062306a36Sopenharmony_ci continue; 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci tx_task = &per_cpu(siw_tx_task_g, cpu); 122362306a36Sopenharmony_ci init_llist_head(&tx_task->active); 122462306a36Sopenharmony_ci init_waitqueue_head(&tx_task->waiting); 122562306a36Sopenharmony_ci 122662306a36Sopenharmony_ci siw_tx_thread[cpu] = 122762306a36Sopenharmony_ci kthread_run_on_cpu(siw_run_sq, 122862306a36Sopenharmony_ci (unsigned long *)(long)cpu, 122962306a36Sopenharmony_ci cpu, "siw_tx/%u"); 123062306a36Sopenharmony_ci if (IS_ERR(siw_tx_thread[cpu])) { 123162306a36Sopenharmony_ci siw_tx_thread[cpu] = NULL; 123262306a36Sopenharmony_ci continue; 123362306a36Sopenharmony_ci } 123462306a36Sopenharmony_ci assigned++; 123562306a36Sopenharmony_ci } 123662306a36Sopenharmony_ci return assigned; 123762306a36Sopenharmony_ci} 123862306a36Sopenharmony_ci 123962306a36Sopenharmony_civoid siw_stop_tx_threads(void) 124062306a36Sopenharmony_ci{ 124162306a36Sopenharmony_ci int cpu; 124262306a36Sopenharmony_ci 124362306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 124462306a36Sopenharmony_ci if (siw_tx_thread[cpu]) { 124562306a36Sopenharmony_ci kthread_stop(siw_tx_thread[cpu]); 124662306a36Sopenharmony_ci wake_up(&per_cpu(siw_tx_task_g, cpu).waiting); 124762306a36Sopenharmony_ci siw_tx_thread[cpu] = NULL; 124862306a36Sopenharmony_ci } 124962306a36Sopenharmony_ci } 125062306a36Sopenharmony_ci} 125162306a36Sopenharmony_ci 125262306a36Sopenharmony_ciint siw_run_sq(void *data) 125362306a36Sopenharmony_ci{ 125462306a36Sopenharmony_ci const int nr_cpu = (unsigned int)(long)data; 125562306a36Sopenharmony_ci struct llist_node *active; 125662306a36Sopenharmony_ci struct siw_qp *qp; 125762306a36Sopenharmony_ci struct tx_task_t *tx_task = &per_cpu(siw_tx_task_g, nr_cpu); 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci while (1) { 126062306a36Sopenharmony_ci struct llist_node *fifo_list = NULL; 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_ci wait_event_interruptible(tx_task->waiting, 126362306a36Sopenharmony_ci !llist_empty(&tx_task->active) || 126462306a36Sopenharmony_ci kthread_should_stop()); 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci if (kthread_should_stop()) 126762306a36Sopenharmony_ci break; 126862306a36Sopenharmony_ci 126962306a36Sopenharmony_ci active = llist_del_all(&tx_task->active); 127062306a36Sopenharmony_ci /* 127162306a36Sopenharmony_ci * llist_del_all returns a list with newest entry first. 127262306a36Sopenharmony_ci * Re-order list for fairness among QP's. 127362306a36Sopenharmony_ci */ 127462306a36Sopenharmony_ci fifo_list = llist_reverse_order(active); 127562306a36Sopenharmony_ci while (fifo_list) { 127662306a36Sopenharmony_ci qp = container_of(fifo_list, struct siw_qp, tx_list); 127762306a36Sopenharmony_ci fifo_list = llist_next(fifo_list); 127862306a36Sopenharmony_ci qp->tx_list.next = NULL; 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci siw_sq_resume(qp); 128162306a36Sopenharmony_ci } 128262306a36Sopenharmony_ci } 128362306a36Sopenharmony_ci active = llist_del_all(&tx_task->active); 128462306a36Sopenharmony_ci if (active) { 128562306a36Sopenharmony_ci llist_for_each_entry(qp, active, tx_list) { 128662306a36Sopenharmony_ci qp->tx_list.next = NULL; 128762306a36Sopenharmony_ci siw_sq_resume(qp); 128862306a36Sopenharmony_ci } 128962306a36Sopenharmony_ci } 129062306a36Sopenharmony_ci return 0; 129162306a36Sopenharmony_ci} 129262306a36Sopenharmony_ci 129362306a36Sopenharmony_ciint siw_sq_start(struct siw_qp *qp) 129462306a36Sopenharmony_ci{ 129562306a36Sopenharmony_ci if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) 129662306a36Sopenharmony_ci return 0; 129762306a36Sopenharmony_ci 129862306a36Sopenharmony_ci if (unlikely(!cpu_online(qp->tx_cpu))) { 129962306a36Sopenharmony_ci siw_put_tx_cpu(qp->tx_cpu); 130062306a36Sopenharmony_ci qp->tx_cpu = siw_get_tx_cpu(qp->sdev); 130162306a36Sopenharmony_ci if (qp->tx_cpu < 0) { 130262306a36Sopenharmony_ci pr_warn("siw: no tx cpu available\n"); 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci return -EIO; 130562306a36Sopenharmony_ci } 130662306a36Sopenharmony_ci } 130762306a36Sopenharmony_ci siw_qp_get(qp); 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci llist_add(&qp->tx_list, &per_cpu(siw_tx_task_g, qp->tx_cpu).active); 131062306a36Sopenharmony_ci 131162306a36Sopenharmony_ci wake_up(&per_cpu(siw_tx_task_g, qp->tx_cpu).waiting); 131262306a36Sopenharmony_ci 131362306a36Sopenharmony_ci return 0; 131462306a36Sopenharmony_ci} 1315