18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (c) 2006 Oracle. All rights reserved. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or 118c2ecf20Sopenharmony_ci * without modification, are permitted provided that the following 128c2ecf20Sopenharmony_ci * conditions are met: 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above 158c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 168c2ecf20Sopenharmony_ci * disclaimer. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above 198c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 208c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials 218c2ecf20Sopenharmony_ci * provided with the distribution. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308c2ecf20Sopenharmony_ci * SOFTWARE. 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci */ 338c2ecf20Sopenharmony_ci#include <linux/percpu.h> 348c2ecf20Sopenharmony_ci#include <linux/seq_file.h> 358c2ecf20Sopenharmony_ci#include <linux/slab.h> 368c2ecf20Sopenharmony_ci#include <linux/proc_fs.h> 378c2ecf20Sopenharmony_ci#include <linux/export.h> 388c2ecf20Sopenharmony_ci 398c2ecf20Sopenharmony_ci#include "rds.h" 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci/* 428c2ecf20Sopenharmony_ci * This file implements a getsockopt() call which copies a set of fixed 438c2ecf20Sopenharmony_ci * sized structs into a user-specified buffer as a means of providing 448c2ecf20Sopenharmony_ci * read-only information about RDS. 458c2ecf20Sopenharmony_ci * 468c2ecf20Sopenharmony_ci * For a given information source there are a given number of fixed sized 478c2ecf20Sopenharmony_ci * structs at a given time. The structs are only copied if the user-specified 488c2ecf20Sopenharmony_ci * buffer is big enough. The destination pages that make up the buffer 498c2ecf20Sopenharmony_ci * are pinned for the duration of the copy. 508c2ecf20Sopenharmony_ci * 518c2ecf20Sopenharmony_ci * This gives us the following benefits: 528c2ecf20Sopenharmony_ci * 538c2ecf20Sopenharmony_ci * - simple implementation, no copy "position" across multiple calls 548c2ecf20Sopenharmony_ci * - consistent snapshot of an info source 558c2ecf20Sopenharmony_ci * - atomic copy works well with whatever locking info source has 568c2ecf20Sopenharmony_ci * - one portable tool to get rds info across implementations 578c2ecf20Sopenharmony_ci * - long-lived tool can get info without allocating 588c2ecf20Sopenharmony_ci * 598c2ecf20Sopenharmony_ci * at the following costs: 608c2ecf20Sopenharmony_ci * 618c2ecf20Sopenharmony_ci * - info source copy must be pinned, may be "large" 628c2ecf20Sopenharmony_ci */ 638c2ecf20Sopenharmony_ci 648c2ecf20Sopenharmony_cistruct rds_info_iterator { 658c2ecf20Sopenharmony_ci struct page **pages; 668c2ecf20Sopenharmony_ci void *addr; 678c2ecf20Sopenharmony_ci unsigned long offset; 688c2ecf20Sopenharmony_ci}; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(rds_info_lock); 718c2ecf20Sopenharmony_cistatic rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1]; 728c2ecf20Sopenharmony_ci 738c2ecf20Sopenharmony_civoid rds_info_register_func(int optname, rds_info_func func) 748c2ecf20Sopenharmony_ci{ 758c2ecf20Sopenharmony_ci int offset = optname - RDS_INFO_FIRST; 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 788c2ecf20Sopenharmony_ci 798c2ecf20Sopenharmony_ci spin_lock(&rds_info_lock); 808c2ecf20Sopenharmony_ci BUG_ON(rds_info_funcs[offset]); 818c2ecf20Sopenharmony_ci rds_info_funcs[offset] = func; 828c2ecf20Sopenharmony_ci spin_unlock(&rds_info_lock); 838c2ecf20Sopenharmony_ci} 848c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_info_register_func); 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_civoid rds_info_deregister_func(int optname, rds_info_func func) 878c2ecf20Sopenharmony_ci{ 888c2ecf20Sopenharmony_ci int offset = optname - RDS_INFO_FIRST; 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST); 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci spin_lock(&rds_info_lock); 938c2ecf20Sopenharmony_ci BUG_ON(rds_info_funcs[offset] != func); 948c2ecf20Sopenharmony_ci rds_info_funcs[offset] = NULL; 958c2ecf20Sopenharmony_ci spin_unlock(&rds_info_lock); 968c2ecf20Sopenharmony_ci} 978c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_info_deregister_func); 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci/* 1008c2ecf20Sopenharmony_ci * Typically we hold an atomic kmap across multiple rds_info_copy() calls 1018c2ecf20Sopenharmony_ci * because the kmap is so expensive. This must be called before using blocking 1028c2ecf20Sopenharmony_ci * operations while holding the mapping and as the iterator is torn down. 1038c2ecf20Sopenharmony_ci */ 1048c2ecf20Sopenharmony_civoid rds_info_iter_unmap(struct rds_info_iterator *iter) 1058c2ecf20Sopenharmony_ci{ 1068c2ecf20Sopenharmony_ci if (iter->addr) { 1078c2ecf20Sopenharmony_ci kunmap_atomic(iter->addr); 1088c2ecf20Sopenharmony_ci iter->addr = NULL; 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci} 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci/* 1138c2ecf20Sopenharmony_ci * get_user_pages() called flush_dcache_page() on the pages for us. 1148c2ecf20Sopenharmony_ci */ 1158c2ecf20Sopenharmony_civoid rds_info_copy(struct rds_info_iterator *iter, void *data, 1168c2ecf20Sopenharmony_ci unsigned long bytes) 1178c2ecf20Sopenharmony_ci{ 1188c2ecf20Sopenharmony_ci unsigned long this; 1198c2ecf20Sopenharmony_ci 1208c2ecf20Sopenharmony_ci while (bytes) { 1218c2ecf20Sopenharmony_ci if (!iter->addr) 1228c2ecf20Sopenharmony_ci iter->addr = kmap_atomic(*iter->pages); 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci this = min(bytes, PAGE_SIZE - iter->offset); 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci rdsdebug("page %p addr %p offset %lu this %lu data %p " 1278c2ecf20Sopenharmony_ci "bytes %lu\n", *iter->pages, iter->addr, 1288c2ecf20Sopenharmony_ci iter->offset, this, data, bytes); 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci memcpy(iter->addr + iter->offset, data, this); 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci data += this; 1338c2ecf20Sopenharmony_ci bytes -= this; 1348c2ecf20Sopenharmony_ci iter->offset += this; 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci if (iter->offset == PAGE_SIZE) { 1378c2ecf20Sopenharmony_ci kunmap_atomic(iter->addr); 1388c2ecf20Sopenharmony_ci iter->addr = NULL; 1398c2ecf20Sopenharmony_ci iter->offset = 0; 1408c2ecf20Sopenharmony_ci iter->pages++; 1418c2ecf20Sopenharmony_ci } 1428c2ecf20Sopenharmony_ci } 1438c2ecf20Sopenharmony_ci} 1448c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_info_copy); 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_ci/* 1478c2ecf20Sopenharmony_ci * @optval points to the userspace buffer that the information snapshot 1488c2ecf20Sopenharmony_ci * will be copied into. 1498c2ecf20Sopenharmony_ci * 1508c2ecf20Sopenharmony_ci * @optlen on input is the size of the buffer in userspace. @optlen 1518c2ecf20Sopenharmony_ci * on output is the size of the requested snapshot in bytes. 1528c2ecf20Sopenharmony_ci * 1538c2ecf20Sopenharmony_ci * This function returns -errno if there is a failure, particularly -ENOSPC 1548c2ecf20Sopenharmony_ci * if the given userspace buffer was not large enough to fit the snapshot. 1558c2ecf20Sopenharmony_ci * On success it returns the positive number of bytes of each array element 1568c2ecf20Sopenharmony_ci * in the snapshot. 1578c2ecf20Sopenharmony_ci */ 1588c2ecf20Sopenharmony_ciint rds_info_getsockopt(struct socket *sock, int optname, char __user *optval, 1598c2ecf20Sopenharmony_ci int __user *optlen) 1608c2ecf20Sopenharmony_ci{ 1618c2ecf20Sopenharmony_ci struct rds_info_iterator iter; 1628c2ecf20Sopenharmony_ci struct rds_info_lengths lens; 1638c2ecf20Sopenharmony_ci unsigned long nr_pages = 0; 1648c2ecf20Sopenharmony_ci unsigned long start; 1658c2ecf20Sopenharmony_ci rds_info_func func; 1668c2ecf20Sopenharmony_ci struct page **pages = NULL; 1678c2ecf20Sopenharmony_ci int ret; 1688c2ecf20Sopenharmony_ci int len; 1698c2ecf20Sopenharmony_ci int total; 1708c2ecf20Sopenharmony_ci 1718c2ecf20Sopenharmony_ci if (get_user(len, optlen)) { 1728c2ecf20Sopenharmony_ci ret = -EFAULT; 1738c2ecf20Sopenharmony_ci goto out; 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci /* check for all kinds of wrapping and the like */ 1778c2ecf20Sopenharmony_ci start = (unsigned long)optval; 1788c2ecf20Sopenharmony_ci if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) { 1798c2ecf20Sopenharmony_ci ret = -EINVAL; 1808c2ecf20Sopenharmony_ci goto out; 1818c2ecf20Sopenharmony_ci } 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci /* a 0 len call is just trying to probe its length */ 1848c2ecf20Sopenharmony_ci if (len == 0) 1858c2ecf20Sopenharmony_ci goto call_func; 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK)) 1888c2ecf20Sopenharmony_ci >> PAGE_SHIFT; 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL); 1918c2ecf20Sopenharmony_ci if (!pages) { 1928c2ecf20Sopenharmony_ci ret = -ENOMEM; 1938c2ecf20Sopenharmony_ci goto out; 1948c2ecf20Sopenharmony_ci } 1958c2ecf20Sopenharmony_ci ret = pin_user_pages_fast(start, nr_pages, FOLL_WRITE, pages); 1968c2ecf20Sopenharmony_ci if (ret != nr_pages) { 1978c2ecf20Sopenharmony_ci if (ret > 0) 1988c2ecf20Sopenharmony_ci nr_pages = ret; 1998c2ecf20Sopenharmony_ci else 2008c2ecf20Sopenharmony_ci nr_pages = 0; 2018c2ecf20Sopenharmony_ci ret = -EAGAIN; /* XXX ? */ 2028c2ecf20Sopenharmony_ci goto out; 2038c2ecf20Sopenharmony_ci } 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci rdsdebug("len %d nr_pages %lu\n", len, nr_pages); 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_cicall_func: 2088c2ecf20Sopenharmony_ci func = rds_info_funcs[optname - RDS_INFO_FIRST]; 2098c2ecf20Sopenharmony_ci if (!func) { 2108c2ecf20Sopenharmony_ci ret = -ENOPROTOOPT; 2118c2ecf20Sopenharmony_ci goto out; 2128c2ecf20Sopenharmony_ci } 2138c2ecf20Sopenharmony_ci 2148c2ecf20Sopenharmony_ci iter.pages = pages; 2158c2ecf20Sopenharmony_ci iter.addr = NULL; 2168c2ecf20Sopenharmony_ci iter.offset = start & (PAGE_SIZE - 1); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci func(sock, len, &iter, &lens); 2198c2ecf20Sopenharmony_ci BUG_ON(lens.each == 0); 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci total = lens.nr * lens.each; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci rds_info_iter_unmap(&iter); 2248c2ecf20Sopenharmony_ci 2258c2ecf20Sopenharmony_ci if (total > len) { 2268c2ecf20Sopenharmony_ci len = total; 2278c2ecf20Sopenharmony_ci ret = -ENOSPC; 2288c2ecf20Sopenharmony_ci } else { 2298c2ecf20Sopenharmony_ci len = total; 2308c2ecf20Sopenharmony_ci ret = lens.each; 2318c2ecf20Sopenharmony_ci } 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_ci if (put_user(len, optlen)) 2348c2ecf20Sopenharmony_ci ret = -EFAULT; 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ciout: 2378c2ecf20Sopenharmony_ci if (pages) 2388c2ecf20Sopenharmony_ci unpin_user_pages(pages, nr_pages); 2398c2ecf20Sopenharmony_ci kfree(pages); 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci return ret; 2428c2ecf20Sopenharmony_ci} 243