18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (c) 2006, 2020 Oracle and/or its affiliates. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or 118c2ecf20Sopenharmony_ci * without modification, are permitted provided that the following 128c2ecf20Sopenharmony_ci * conditions are met: 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above 158c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 168c2ecf20Sopenharmony_ci * disclaimer. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above 198c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 208c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials 218c2ecf20Sopenharmony_ci * provided with the distribution. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308c2ecf20Sopenharmony_ci * SOFTWARE. 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci */ 338c2ecf20Sopenharmony_ci#include <linux/kernel.h> 348c2ecf20Sopenharmony_ci#include <linux/slab.h> 358c2ecf20Sopenharmony_ci#include <linux/export.h> 368c2ecf20Sopenharmony_ci#include <linux/skbuff.h> 378c2ecf20Sopenharmony_ci#include <linux/list.h> 388c2ecf20Sopenharmony_ci#include <linux/errqueue.h> 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_ci#include "rds.h" 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_cistatic unsigned int rds_exthdr_size[__RDS_EXTHDR_MAX] = { 438c2ecf20Sopenharmony_ci[RDS_EXTHDR_NONE] = 0, 448c2ecf20Sopenharmony_ci[RDS_EXTHDR_VERSION] = sizeof(struct rds_ext_header_version), 458c2ecf20Sopenharmony_ci[RDS_EXTHDR_RDMA] = sizeof(struct rds_ext_header_rdma), 468c2ecf20Sopenharmony_ci[RDS_EXTHDR_RDMA_DEST] = sizeof(struct rds_ext_header_rdma_dest), 478c2ecf20Sopenharmony_ci[RDS_EXTHDR_NPATHS] = sizeof(u16), 488c2ecf20Sopenharmony_ci[RDS_EXTHDR_GEN_NUM] = sizeof(u32), 498c2ecf20Sopenharmony_ci}; 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_civoid rds_message_addref(struct rds_message *rm) 528c2ecf20Sopenharmony_ci{ 538c2ecf20Sopenharmony_ci rdsdebug("addref rm %p ref %d\n", rm, refcount_read(&rm->m_refcount)); 548c2ecf20Sopenharmony_ci refcount_inc(&rm->m_refcount); 558c2ecf20Sopenharmony_ci} 568c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_message_addref); 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_cistatic inline bool rds_zcookie_add(struct rds_msg_zcopy_info *info, u32 cookie) 598c2ecf20Sopenharmony_ci{ 608c2ecf20Sopenharmony_ci struct rds_zcopy_cookies *ck = &info->zcookies; 618c2ecf20Sopenharmony_ci int ncookies = ck->num; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci if (ncookies == RDS_MAX_ZCOOKIES) 648c2ecf20Sopenharmony_ci return false; 658c2ecf20Sopenharmony_ci ck->cookies[ncookies] = cookie; 668c2ecf20Sopenharmony_ci ck->num = ++ncookies; 678c2ecf20Sopenharmony_ci return true; 688c2ecf20Sopenharmony_ci} 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_cistatic struct rds_msg_zcopy_info *rds_info_from_znotifier(struct rds_znotifier *znotif) 718c2ecf20Sopenharmony_ci{ 728c2ecf20Sopenharmony_ci return container_of(znotif, struct rds_msg_zcopy_info, znotif); 738c2ecf20Sopenharmony_ci} 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_civoid rds_notify_msg_zcopy_purge(struct rds_msg_zcopy_queue *q) 768c2ecf20Sopenharmony_ci{ 778c2ecf20Sopenharmony_ci unsigned long flags; 788c2ecf20Sopenharmony_ci LIST_HEAD(copy); 798c2ecf20Sopenharmony_ci struct rds_msg_zcopy_info *info, *tmp; 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci spin_lock_irqsave(&q->lock, flags); 828c2ecf20Sopenharmony_ci list_splice(&q->zcookie_head, ©); 838c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&q->zcookie_head); 848c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&q->lock, flags); 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci list_for_each_entry_safe(info, tmp, ©, rs_zcookie_next) { 878c2ecf20Sopenharmony_ci list_del(&info->rs_zcookie_next); 888c2ecf20Sopenharmony_ci kfree(info); 898c2ecf20Sopenharmony_ci } 908c2ecf20Sopenharmony_ci} 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_cistatic void rds_rm_zerocopy_callback(struct rds_sock *rs, 938c2ecf20Sopenharmony_ci struct rds_znotifier *znotif) 948c2ecf20Sopenharmony_ci{ 958c2ecf20Sopenharmony_ci struct rds_msg_zcopy_info *info; 968c2ecf20Sopenharmony_ci struct rds_msg_zcopy_queue *q; 978c2ecf20Sopenharmony_ci u32 cookie = znotif->z_cookie; 988c2ecf20Sopenharmony_ci struct rds_zcopy_cookies *ck; 998c2ecf20Sopenharmony_ci struct list_head *head; 1008c2ecf20Sopenharmony_ci unsigned long flags; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci mm_unaccount_pinned_pages(&znotif->z_mmp); 1038c2ecf20Sopenharmony_ci q = &rs->rs_zcookie_queue; 1048c2ecf20Sopenharmony_ci spin_lock_irqsave(&q->lock, flags); 1058c2ecf20Sopenharmony_ci head = &q->zcookie_head; 1068c2ecf20Sopenharmony_ci if (!list_empty(head)) { 1078c2ecf20Sopenharmony_ci info = list_first_entry(head, struct rds_msg_zcopy_info, 1088c2ecf20Sopenharmony_ci rs_zcookie_next); 1098c2ecf20Sopenharmony_ci if (rds_zcookie_add(info, cookie)) { 1108c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&q->lock, flags); 1118c2ecf20Sopenharmony_ci kfree(rds_info_from_znotifier(znotif)); 1128c2ecf20Sopenharmony_ci /* caller invokes rds_wake_sk_sleep() */ 1138c2ecf20Sopenharmony_ci return; 1148c2ecf20Sopenharmony_ci } 1158c2ecf20Sopenharmony_ci } 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci info = rds_info_from_znotifier(znotif); 1188c2ecf20Sopenharmony_ci ck = &info->zcookies; 1198c2ecf20Sopenharmony_ci memset(ck, 0, sizeof(*ck)); 1208c2ecf20Sopenharmony_ci WARN_ON(!rds_zcookie_add(info, cookie)); 1218c2ecf20Sopenharmony_ci list_add_tail(&info->rs_zcookie_next, &q->zcookie_head); 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&q->lock, flags); 1248c2ecf20Sopenharmony_ci /* caller invokes rds_wake_sk_sleep() */ 1258c2ecf20Sopenharmony_ci} 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci/* 1288c2ecf20Sopenharmony_ci * This relies on dma_map_sg() not touching sg[].page during merging. 1298c2ecf20Sopenharmony_ci */ 1308c2ecf20Sopenharmony_cistatic void rds_message_purge(struct rds_message *rm) 1318c2ecf20Sopenharmony_ci{ 1328c2ecf20Sopenharmony_ci unsigned long i, flags; 1338c2ecf20Sopenharmony_ci bool zcopy = false; 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_ci if (unlikely(test_bit(RDS_MSG_PAGEVEC, &rm->m_flags))) 1368c2ecf20Sopenharmony_ci return; 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci spin_lock_irqsave(&rm->m_rs_lock, flags); 1398c2ecf20Sopenharmony_ci if (rm->m_rs) { 1408c2ecf20Sopenharmony_ci struct rds_sock *rs = rm->m_rs; 1418c2ecf20Sopenharmony_ci 1428c2ecf20Sopenharmony_ci if (rm->data.op_mmp_znotifier) { 1438c2ecf20Sopenharmony_ci zcopy = true; 1448c2ecf20Sopenharmony_ci rds_rm_zerocopy_callback(rs, rm->data.op_mmp_znotifier); 1458c2ecf20Sopenharmony_ci rds_wake_sk_sleep(rs); 1468c2ecf20Sopenharmony_ci rm->data.op_mmp_znotifier = NULL; 1478c2ecf20Sopenharmony_ci } 1488c2ecf20Sopenharmony_ci sock_put(rds_rs_to_sk(rs)); 1498c2ecf20Sopenharmony_ci rm->m_rs = NULL; 1508c2ecf20Sopenharmony_ci } 1518c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rm->m_rs_lock, flags); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci for (i = 0; i < rm->data.op_nents; i++) { 1548c2ecf20Sopenharmony_ci /* XXX will have to put_page for page refs */ 1558c2ecf20Sopenharmony_ci if (!zcopy) 1568c2ecf20Sopenharmony_ci __free_page(sg_page(&rm->data.op_sg[i])); 1578c2ecf20Sopenharmony_ci else 1588c2ecf20Sopenharmony_ci put_page(sg_page(&rm->data.op_sg[i])); 1598c2ecf20Sopenharmony_ci } 1608c2ecf20Sopenharmony_ci rm->data.op_nents = 0; 1618c2ecf20Sopenharmony_ci 1628c2ecf20Sopenharmony_ci if (rm->rdma.op_active) 1638c2ecf20Sopenharmony_ci rds_rdma_free_op(&rm->rdma); 1648c2ecf20Sopenharmony_ci if (rm->rdma.op_rdma_mr) 1658c2ecf20Sopenharmony_ci kref_put(&rm->rdma.op_rdma_mr->r_kref, __rds_put_mr_final); 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci if (rm->atomic.op_active) 1688c2ecf20Sopenharmony_ci rds_atomic_free_op(&rm->atomic); 1698c2ecf20Sopenharmony_ci if (rm->atomic.op_rdma_mr) 1708c2ecf20Sopenharmony_ci kref_put(&rm->atomic.op_rdma_mr->r_kref, __rds_put_mr_final); 1718c2ecf20Sopenharmony_ci} 1728c2ecf20Sopenharmony_ci 1738c2ecf20Sopenharmony_civoid rds_message_put(struct rds_message *rm) 1748c2ecf20Sopenharmony_ci{ 1758c2ecf20Sopenharmony_ci rdsdebug("put rm %p ref %d\n", rm, refcount_read(&rm->m_refcount)); 1768c2ecf20Sopenharmony_ci WARN(!refcount_read(&rm->m_refcount), "danger refcount zero on %p\n", rm); 1778c2ecf20Sopenharmony_ci if (refcount_dec_and_test(&rm->m_refcount)) { 1788c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&rm->m_sock_item)); 1798c2ecf20Sopenharmony_ci BUG_ON(!list_empty(&rm->m_conn_item)); 1808c2ecf20Sopenharmony_ci rds_message_purge(rm); 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ci kfree(rm); 1838c2ecf20Sopenharmony_ci } 1848c2ecf20Sopenharmony_ci} 1858c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_message_put); 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_civoid rds_message_populate_header(struct rds_header *hdr, __be16 sport, 1888c2ecf20Sopenharmony_ci __be16 dport, u64 seq) 1898c2ecf20Sopenharmony_ci{ 1908c2ecf20Sopenharmony_ci hdr->h_flags = 0; 1918c2ecf20Sopenharmony_ci hdr->h_sport = sport; 1928c2ecf20Sopenharmony_ci hdr->h_dport = dport; 1938c2ecf20Sopenharmony_ci hdr->h_sequence = cpu_to_be64(seq); 1948c2ecf20Sopenharmony_ci hdr->h_exthdr[0] = RDS_EXTHDR_NONE; 1958c2ecf20Sopenharmony_ci} 1968c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_message_populate_header); 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ciint rds_message_add_extension(struct rds_header *hdr, unsigned int type, 1998c2ecf20Sopenharmony_ci const void *data, unsigned int len) 2008c2ecf20Sopenharmony_ci{ 2018c2ecf20Sopenharmony_ci unsigned int ext_len = sizeof(u8) + len; 2028c2ecf20Sopenharmony_ci unsigned char *dst; 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci /* For now, refuse to add more than one extension header */ 2058c2ecf20Sopenharmony_ci if (hdr->h_exthdr[0] != RDS_EXTHDR_NONE) 2068c2ecf20Sopenharmony_ci return 0; 2078c2ecf20Sopenharmony_ci 2088c2ecf20Sopenharmony_ci if (type >= __RDS_EXTHDR_MAX || len != rds_exthdr_size[type]) 2098c2ecf20Sopenharmony_ci return 0; 2108c2ecf20Sopenharmony_ci 2118c2ecf20Sopenharmony_ci if (ext_len >= RDS_HEADER_EXT_SPACE) 2128c2ecf20Sopenharmony_ci return 0; 2138c2ecf20Sopenharmony_ci dst = hdr->h_exthdr; 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci *dst++ = type; 2168c2ecf20Sopenharmony_ci memcpy(dst, data, len); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci dst[len] = RDS_EXTHDR_NONE; 2198c2ecf20Sopenharmony_ci return 1; 2208c2ecf20Sopenharmony_ci} 2218c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_message_add_extension); 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci/* 2248c2ecf20Sopenharmony_ci * If a message has extension headers, retrieve them here. 2258c2ecf20Sopenharmony_ci * Call like this: 2268c2ecf20Sopenharmony_ci * 2278c2ecf20Sopenharmony_ci * unsigned int pos = 0; 2288c2ecf20Sopenharmony_ci * 2298c2ecf20Sopenharmony_ci * while (1) { 2308c2ecf20Sopenharmony_ci * buflen = sizeof(buffer); 2318c2ecf20Sopenharmony_ci * type = rds_message_next_extension(hdr, &pos, buffer, &buflen); 2328c2ecf20Sopenharmony_ci * if (type == RDS_EXTHDR_NONE) 2338c2ecf20Sopenharmony_ci * break; 2348c2ecf20Sopenharmony_ci * ... 2358c2ecf20Sopenharmony_ci * } 2368c2ecf20Sopenharmony_ci */ 2378c2ecf20Sopenharmony_ciint rds_message_next_extension(struct rds_header *hdr, 2388c2ecf20Sopenharmony_ci unsigned int *pos, void *buf, unsigned int *buflen) 2398c2ecf20Sopenharmony_ci{ 2408c2ecf20Sopenharmony_ci unsigned int offset, ext_type, ext_len; 2418c2ecf20Sopenharmony_ci u8 *src = hdr->h_exthdr; 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci offset = *pos; 2448c2ecf20Sopenharmony_ci if (offset >= RDS_HEADER_EXT_SPACE) 2458c2ecf20Sopenharmony_ci goto none; 2468c2ecf20Sopenharmony_ci 2478c2ecf20Sopenharmony_ci /* Get the extension type and length. For now, the 2488c2ecf20Sopenharmony_ci * length is implied by the extension type. */ 2498c2ecf20Sopenharmony_ci ext_type = src[offset++]; 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_ci if (ext_type == RDS_EXTHDR_NONE || ext_type >= __RDS_EXTHDR_MAX) 2528c2ecf20Sopenharmony_ci goto none; 2538c2ecf20Sopenharmony_ci ext_len = rds_exthdr_size[ext_type]; 2548c2ecf20Sopenharmony_ci if (offset + ext_len > RDS_HEADER_EXT_SPACE) 2558c2ecf20Sopenharmony_ci goto none; 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci *pos = offset + ext_len; 2588c2ecf20Sopenharmony_ci if (ext_len < *buflen) 2598c2ecf20Sopenharmony_ci *buflen = ext_len; 2608c2ecf20Sopenharmony_ci memcpy(buf, src + offset, *buflen); 2618c2ecf20Sopenharmony_ci return ext_type; 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_cinone: 2648c2ecf20Sopenharmony_ci *pos = RDS_HEADER_EXT_SPACE; 2658c2ecf20Sopenharmony_ci *buflen = 0; 2668c2ecf20Sopenharmony_ci return RDS_EXTHDR_NONE; 2678c2ecf20Sopenharmony_ci} 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ciint rds_message_add_rdma_dest_extension(struct rds_header *hdr, u32 r_key, u32 offset) 2708c2ecf20Sopenharmony_ci{ 2718c2ecf20Sopenharmony_ci struct rds_ext_header_rdma_dest ext_hdr; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci ext_hdr.h_rdma_rkey = cpu_to_be32(r_key); 2748c2ecf20Sopenharmony_ci ext_hdr.h_rdma_offset = cpu_to_be32(offset); 2758c2ecf20Sopenharmony_ci return rds_message_add_extension(hdr, RDS_EXTHDR_RDMA_DEST, &ext_hdr, sizeof(ext_hdr)); 2768c2ecf20Sopenharmony_ci} 2778c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_message_add_rdma_dest_extension); 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci/* 2808c2ecf20Sopenharmony_ci * Each rds_message is allocated with extra space for the scatterlist entries 2818c2ecf20Sopenharmony_ci * rds ops will need. This is to minimize memory allocation count. Then, each rds op 2828c2ecf20Sopenharmony_ci * can grab SGs when initializing its part of the rds_message. 2838c2ecf20Sopenharmony_ci */ 2848c2ecf20Sopenharmony_cistruct rds_message *rds_message_alloc(unsigned int extra_len, gfp_t gfp) 2858c2ecf20Sopenharmony_ci{ 2868c2ecf20Sopenharmony_ci struct rds_message *rm; 2878c2ecf20Sopenharmony_ci 2888c2ecf20Sopenharmony_ci if (extra_len > KMALLOC_MAX_SIZE - sizeof(struct rds_message)) 2898c2ecf20Sopenharmony_ci return NULL; 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci rm = kzalloc(sizeof(struct rds_message) + extra_len, gfp); 2928c2ecf20Sopenharmony_ci if (!rm) 2938c2ecf20Sopenharmony_ci goto out; 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci rm->m_used_sgs = 0; 2968c2ecf20Sopenharmony_ci rm->m_total_sgs = extra_len / sizeof(struct scatterlist); 2978c2ecf20Sopenharmony_ci 2988c2ecf20Sopenharmony_ci refcount_set(&rm->m_refcount, 1); 2998c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rm->m_sock_item); 3008c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&rm->m_conn_item); 3018c2ecf20Sopenharmony_ci spin_lock_init(&rm->m_rs_lock); 3028c2ecf20Sopenharmony_ci init_waitqueue_head(&rm->m_flush_wait); 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ciout: 3058c2ecf20Sopenharmony_ci return rm; 3068c2ecf20Sopenharmony_ci} 3078c2ecf20Sopenharmony_ci 3088c2ecf20Sopenharmony_ci/* 3098c2ecf20Sopenharmony_ci * RDS ops use this to grab SG entries from the rm's sg pool. 3108c2ecf20Sopenharmony_ci */ 3118c2ecf20Sopenharmony_cistruct scatterlist *rds_message_alloc_sgs(struct rds_message *rm, int nents) 3128c2ecf20Sopenharmony_ci{ 3138c2ecf20Sopenharmony_ci struct scatterlist *sg_first = (struct scatterlist *) &rm[1]; 3148c2ecf20Sopenharmony_ci struct scatterlist *sg_ret; 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci if (nents <= 0) { 3178c2ecf20Sopenharmony_ci pr_warn("rds: alloc sgs failed! nents <= 0\n"); 3188c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci if (rm->m_used_sgs + nents > rm->m_total_sgs) { 3228c2ecf20Sopenharmony_ci pr_warn("rds: alloc sgs failed! total %d used %d nents %d\n", 3238c2ecf20Sopenharmony_ci rm->m_total_sgs, rm->m_used_sgs, nents); 3248c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 3258c2ecf20Sopenharmony_ci } 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci sg_ret = &sg_first[rm->m_used_sgs]; 3288c2ecf20Sopenharmony_ci sg_init_table(sg_ret, nents); 3298c2ecf20Sopenharmony_ci rm->m_used_sgs += nents; 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci return sg_ret; 3328c2ecf20Sopenharmony_ci} 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_cistruct rds_message *rds_message_map_pages(unsigned long *page_addrs, unsigned int total_len) 3358c2ecf20Sopenharmony_ci{ 3368c2ecf20Sopenharmony_ci struct rds_message *rm; 3378c2ecf20Sopenharmony_ci unsigned int i; 3388c2ecf20Sopenharmony_ci int num_sgs = DIV_ROUND_UP(total_len, PAGE_SIZE); 3398c2ecf20Sopenharmony_ci int extra_bytes = num_sgs * sizeof(struct scatterlist); 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci rm = rds_message_alloc(extra_bytes, GFP_NOWAIT); 3428c2ecf20Sopenharmony_ci if (!rm) 3438c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci set_bit(RDS_MSG_PAGEVEC, &rm->m_flags); 3468c2ecf20Sopenharmony_ci rm->m_inc.i_hdr.h_len = cpu_to_be32(total_len); 3478c2ecf20Sopenharmony_ci rm->data.op_nents = DIV_ROUND_UP(total_len, PAGE_SIZE); 3488c2ecf20Sopenharmony_ci rm->data.op_sg = rds_message_alloc_sgs(rm, num_sgs); 3498c2ecf20Sopenharmony_ci if (IS_ERR(rm->data.op_sg)) { 3508c2ecf20Sopenharmony_ci void *err = ERR_CAST(rm->data.op_sg); 3518c2ecf20Sopenharmony_ci rds_message_put(rm); 3528c2ecf20Sopenharmony_ci return err; 3538c2ecf20Sopenharmony_ci } 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci for (i = 0; i < rm->data.op_nents; ++i) { 3568c2ecf20Sopenharmony_ci sg_set_page(&rm->data.op_sg[i], 3578c2ecf20Sopenharmony_ci virt_to_page(page_addrs[i]), 3588c2ecf20Sopenharmony_ci PAGE_SIZE, 0); 3598c2ecf20Sopenharmony_ci } 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci return rm; 3628c2ecf20Sopenharmony_ci} 3638c2ecf20Sopenharmony_ci 3648c2ecf20Sopenharmony_cistatic int rds_message_zcopy_from_user(struct rds_message *rm, struct iov_iter *from) 3658c2ecf20Sopenharmony_ci{ 3668c2ecf20Sopenharmony_ci struct scatterlist *sg; 3678c2ecf20Sopenharmony_ci int ret = 0; 3688c2ecf20Sopenharmony_ci int length = iov_iter_count(from); 3698c2ecf20Sopenharmony_ci int total_copied = 0; 3708c2ecf20Sopenharmony_ci struct rds_msg_zcopy_info *info; 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci /* 3758c2ecf20Sopenharmony_ci * now allocate and copy in the data payload. 3768c2ecf20Sopenharmony_ci */ 3778c2ecf20Sopenharmony_ci sg = rm->data.op_sg; 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci info = kzalloc(sizeof(*info), GFP_KERNEL); 3808c2ecf20Sopenharmony_ci if (!info) 3818c2ecf20Sopenharmony_ci return -ENOMEM; 3828c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&info->rs_zcookie_next); 3838c2ecf20Sopenharmony_ci rm->data.op_mmp_znotifier = &info->znotif; 3848c2ecf20Sopenharmony_ci if (mm_account_pinned_pages(&rm->data.op_mmp_znotifier->z_mmp, 3858c2ecf20Sopenharmony_ci length)) { 3868c2ecf20Sopenharmony_ci ret = -ENOMEM; 3878c2ecf20Sopenharmony_ci goto err; 3888c2ecf20Sopenharmony_ci } 3898c2ecf20Sopenharmony_ci while (iov_iter_count(from)) { 3908c2ecf20Sopenharmony_ci struct page *pages; 3918c2ecf20Sopenharmony_ci size_t start; 3928c2ecf20Sopenharmony_ci ssize_t copied; 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci copied = iov_iter_get_pages(from, &pages, PAGE_SIZE, 3958c2ecf20Sopenharmony_ci 1, &start); 3968c2ecf20Sopenharmony_ci if (copied < 0) { 3978c2ecf20Sopenharmony_ci struct mmpin *mmp; 3988c2ecf20Sopenharmony_ci int i; 3998c2ecf20Sopenharmony_ci 4008c2ecf20Sopenharmony_ci for (i = 0; i < rm->data.op_nents; i++) 4018c2ecf20Sopenharmony_ci put_page(sg_page(&rm->data.op_sg[i])); 4028c2ecf20Sopenharmony_ci mmp = &rm->data.op_mmp_znotifier->z_mmp; 4038c2ecf20Sopenharmony_ci mm_unaccount_pinned_pages(mmp); 4048c2ecf20Sopenharmony_ci ret = -EFAULT; 4058c2ecf20Sopenharmony_ci goto err; 4068c2ecf20Sopenharmony_ci } 4078c2ecf20Sopenharmony_ci total_copied += copied; 4088c2ecf20Sopenharmony_ci iov_iter_advance(from, copied); 4098c2ecf20Sopenharmony_ci length -= copied; 4108c2ecf20Sopenharmony_ci sg_set_page(sg, pages, copied, start); 4118c2ecf20Sopenharmony_ci rm->data.op_nents++; 4128c2ecf20Sopenharmony_ci sg++; 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci WARN_ON_ONCE(length != 0); 4158c2ecf20Sopenharmony_ci return ret; 4168c2ecf20Sopenharmony_cierr: 4178c2ecf20Sopenharmony_ci kfree(info); 4188c2ecf20Sopenharmony_ci rm->data.op_mmp_znotifier = NULL; 4198c2ecf20Sopenharmony_ci return ret; 4208c2ecf20Sopenharmony_ci} 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ciint rds_message_copy_from_user(struct rds_message *rm, struct iov_iter *from, 4238c2ecf20Sopenharmony_ci bool zcopy) 4248c2ecf20Sopenharmony_ci{ 4258c2ecf20Sopenharmony_ci unsigned long to_copy, nbytes; 4268c2ecf20Sopenharmony_ci unsigned long sg_off; 4278c2ecf20Sopenharmony_ci struct scatterlist *sg; 4288c2ecf20Sopenharmony_ci int ret = 0; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_ci rm->m_inc.i_hdr.h_len = cpu_to_be32(iov_iter_count(from)); 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ci /* now allocate and copy in the data payload. */ 4338c2ecf20Sopenharmony_ci sg = rm->data.op_sg; 4348c2ecf20Sopenharmony_ci sg_off = 0; /* Dear gcc, sg->page will be null from kzalloc. */ 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci if (zcopy) 4378c2ecf20Sopenharmony_ci return rds_message_zcopy_from_user(rm, from); 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci while (iov_iter_count(from)) { 4408c2ecf20Sopenharmony_ci if (!sg_page(sg)) { 4418c2ecf20Sopenharmony_ci ret = rds_page_remainder_alloc(sg, iov_iter_count(from), 4428c2ecf20Sopenharmony_ci GFP_HIGHUSER); 4438c2ecf20Sopenharmony_ci if (ret) 4448c2ecf20Sopenharmony_ci return ret; 4458c2ecf20Sopenharmony_ci rm->data.op_nents++; 4468c2ecf20Sopenharmony_ci sg_off = 0; 4478c2ecf20Sopenharmony_ci } 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci to_copy = min_t(unsigned long, iov_iter_count(from), 4508c2ecf20Sopenharmony_ci sg->length - sg_off); 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci rds_stats_add(s_copy_from_user, to_copy); 4538c2ecf20Sopenharmony_ci nbytes = copy_page_from_iter(sg_page(sg), sg->offset + sg_off, 4548c2ecf20Sopenharmony_ci to_copy, from); 4558c2ecf20Sopenharmony_ci if (nbytes != to_copy) 4568c2ecf20Sopenharmony_ci return -EFAULT; 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci sg_off += to_copy; 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci if (sg_off == sg->length) 4618c2ecf20Sopenharmony_ci sg++; 4628c2ecf20Sopenharmony_ci } 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci return ret; 4658c2ecf20Sopenharmony_ci} 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ciint rds_message_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to) 4688c2ecf20Sopenharmony_ci{ 4698c2ecf20Sopenharmony_ci struct rds_message *rm; 4708c2ecf20Sopenharmony_ci struct scatterlist *sg; 4718c2ecf20Sopenharmony_ci unsigned long to_copy; 4728c2ecf20Sopenharmony_ci unsigned long vec_off; 4738c2ecf20Sopenharmony_ci int copied; 4748c2ecf20Sopenharmony_ci int ret; 4758c2ecf20Sopenharmony_ci u32 len; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci rm = container_of(inc, struct rds_message, m_inc); 4788c2ecf20Sopenharmony_ci len = be32_to_cpu(rm->m_inc.i_hdr.h_len); 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ci sg = rm->data.op_sg; 4818c2ecf20Sopenharmony_ci vec_off = 0; 4828c2ecf20Sopenharmony_ci copied = 0; 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci while (iov_iter_count(to) && copied < len) { 4858c2ecf20Sopenharmony_ci to_copy = min_t(unsigned long, iov_iter_count(to), 4868c2ecf20Sopenharmony_ci sg->length - vec_off); 4878c2ecf20Sopenharmony_ci to_copy = min_t(unsigned long, to_copy, len - copied); 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_ci rds_stats_add(s_copy_to_user, to_copy); 4908c2ecf20Sopenharmony_ci ret = copy_page_to_iter(sg_page(sg), sg->offset + vec_off, 4918c2ecf20Sopenharmony_ci to_copy, to); 4928c2ecf20Sopenharmony_ci if (ret != to_copy) 4938c2ecf20Sopenharmony_ci return -EFAULT; 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci vec_off += to_copy; 4968c2ecf20Sopenharmony_ci copied += to_copy; 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_ci if (vec_off == sg->length) { 4998c2ecf20Sopenharmony_ci vec_off = 0; 5008c2ecf20Sopenharmony_ci sg++; 5018c2ecf20Sopenharmony_ci } 5028c2ecf20Sopenharmony_ci } 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_ci return copied; 5058c2ecf20Sopenharmony_ci} 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_ci/* 5088c2ecf20Sopenharmony_ci * If the message is still on the send queue, wait until the transport 5098c2ecf20Sopenharmony_ci * is done with it. This is particularly important for RDMA operations. 5108c2ecf20Sopenharmony_ci */ 5118c2ecf20Sopenharmony_civoid rds_message_wait(struct rds_message *rm) 5128c2ecf20Sopenharmony_ci{ 5138c2ecf20Sopenharmony_ci wait_event_interruptible(rm->m_flush_wait, 5148c2ecf20Sopenharmony_ci !test_bit(RDS_MSG_MAPPED, &rm->m_flags)); 5158c2ecf20Sopenharmony_ci} 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_civoid rds_message_unmapped(struct rds_message *rm) 5188c2ecf20Sopenharmony_ci{ 5198c2ecf20Sopenharmony_ci clear_bit(RDS_MSG_MAPPED, &rm->m_flags); 5208c2ecf20Sopenharmony_ci wake_up_interruptible(&rm->m_flush_wait); 5218c2ecf20Sopenharmony_ci} 5228c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_message_unmapped); 523