18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (c) 2006 Oracle.  All rights reserved.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two
58c2ecf20Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the
88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below:
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
118c2ecf20Sopenharmony_ci *     without modification, are permitted provided that the following
128c2ecf20Sopenharmony_ci *     conditions are met:
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci *      - Redistributions of source code must retain the above
158c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
168c2ecf20Sopenharmony_ci *        disclaimer.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
198c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
208c2ecf20Sopenharmony_ci *        disclaimer in the documentation and/or other materials
218c2ecf20Sopenharmony_ci *        provided with the distribution.
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
308c2ecf20Sopenharmony_ci * SOFTWARE.
318c2ecf20Sopenharmony_ci *
328c2ecf20Sopenharmony_ci */
338c2ecf20Sopenharmony_ci#include <linux/percpu.h>
348c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
358c2ecf20Sopenharmony_ci#include <linux/slab.h>
368c2ecf20Sopenharmony_ci#include <linux/proc_fs.h>
378c2ecf20Sopenharmony_ci#include <linux/export.h>
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci#include "rds.h"
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci/*
428c2ecf20Sopenharmony_ci * This file implements a getsockopt() call which copies a set of fixed
438c2ecf20Sopenharmony_ci * sized structs into a user-specified buffer as a means of providing
448c2ecf20Sopenharmony_ci * read-only information about RDS.
458c2ecf20Sopenharmony_ci *
468c2ecf20Sopenharmony_ci * For a given information source there are a given number of fixed sized
478c2ecf20Sopenharmony_ci * structs at a given time.  The structs are only copied if the user-specified
488c2ecf20Sopenharmony_ci * buffer is big enough.  The destination pages that make up the buffer
498c2ecf20Sopenharmony_ci * are pinned for the duration of the copy.
508c2ecf20Sopenharmony_ci *
518c2ecf20Sopenharmony_ci * This gives us the following benefits:
528c2ecf20Sopenharmony_ci *
538c2ecf20Sopenharmony_ci * - simple implementation, no copy "position" across multiple calls
548c2ecf20Sopenharmony_ci * - consistent snapshot of an info source
558c2ecf20Sopenharmony_ci * - atomic copy works well with whatever locking info source has
568c2ecf20Sopenharmony_ci * - one portable tool to get rds info across implementations
578c2ecf20Sopenharmony_ci * - long-lived tool can get info without allocating
588c2ecf20Sopenharmony_ci *
598c2ecf20Sopenharmony_ci * at the following costs:
608c2ecf20Sopenharmony_ci *
618c2ecf20Sopenharmony_ci * - info source copy must be pinned, may be "large"
628c2ecf20Sopenharmony_ci */
638c2ecf20Sopenharmony_ci
648c2ecf20Sopenharmony_cistruct rds_info_iterator {
658c2ecf20Sopenharmony_ci	struct page **pages;
668c2ecf20Sopenharmony_ci	void *addr;
678c2ecf20Sopenharmony_ci	unsigned long offset;
688c2ecf20Sopenharmony_ci};
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_cistatic DEFINE_SPINLOCK(rds_info_lock);
718c2ecf20Sopenharmony_cistatic rds_info_func rds_info_funcs[RDS_INFO_LAST - RDS_INFO_FIRST + 1];
728c2ecf20Sopenharmony_ci
738c2ecf20Sopenharmony_civoid rds_info_register_func(int optname, rds_info_func func)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	int offset = optname - RDS_INFO_FIRST;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	spin_lock(&rds_info_lock);
808c2ecf20Sopenharmony_ci	BUG_ON(rds_info_funcs[offset]);
818c2ecf20Sopenharmony_ci	rds_info_funcs[offset] = func;
828c2ecf20Sopenharmony_ci	spin_unlock(&rds_info_lock);
838c2ecf20Sopenharmony_ci}
848c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_info_register_func);
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_civoid rds_info_deregister_func(int optname, rds_info_func func)
878c2ecf20Sopenharmony_ci{
888c2ecf20Sopenharmony_ci	int offset = optname - RDS_INFO_FIRST;
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	BUG_ON(optname < RDS_INFO_FIRST || optname > RDS_INFO_LAST);
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	spin_lock(&rds_info_lock);
938c2ecf20Sopenharmony_ci	BUG_ON(rds_info_funcs[offset] != func);
948c2ecf20Sopenharmony_ci	rds_info_funcs[offset] = NULL;
958c2ecf20Sopenharmony_ci	spin_unlock(&rds_info_lock);
968c2ecf20Sopenharmony_ci}
978c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_info_deregister_func);
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci/*
1008c2ecf20Sopenharmony_ci * Typically we hold an atomic kmap across multiple rds_info_copy() calls
1018c2ecf20Sopenharmony_ci * because the kmap is so expensive.  This must be called before using blocking
1028c2ecf20Sopenharmony_ci * operations while holding the mapping and as the iterator is torn down.
1038c2ecf20Sopenharmony_ci */
1048c2ecf20Sopenharmony_civoid rds_info_iter_unmap(struct rds_info_iterator *iter)
1058c2ecf20Sopenharmony_ci{
1068c2ecf20Sopenharmony_ci	if (iter->addr) {
1078c2ecf20Sopenharmony_ci		kunmap_atomic(iter->addr);
1088c2ecf20Sopenharmony_ci		iter->addr = NULL;
1098c2ecf20Sopenharmony_ci	}
1108c2ecf20Sopenharmony_ci}
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci/*
1138c2ecf20Sopenharmony_ci * get_user_pages() called flush_dcache_page() on the pages for us.
1148c2ecf20Sopenharmony_ci */
1158c2ecf20Sopenharmony_civoid rds_info_copy(struct rds_info_iterator *iter, void *data,
1168c2ecf20Sopenharmony_ci		   unsigned long bytes)
1178c2ecf20Sopenharmony_ci{
1188c2ecf20Sopenharmony_ci	unsigned long this;
1198c2ecf20Sopenharmony_ci
1208c2ecf20Sopenharmony_ci	while (bytes) {
1218c2ecf20Sopenharmony_ci		if (!iter->addr)
1228c2ecf20Sopenharmony_ci			iter->addr = kmap_atomic(*iter->pages);
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci		this = min(bytes, PAGE_SIZE - iter->offset);
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_ci		rdsdebug("page %p addr %p offset %lu this %lu data %p "
1278c2ecf20Sopenharmony_ci			  "bytes %lu\n", *iter->pages, iter->addr,
1288c2ecf20Sopenharmony_ci			  iter->offset, this, data, bytes);
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci		memcpy(iter->addr + iter->offset, data, this);
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci		data += this;
1338c2ecf20Sopenharmony_ci		bytes -= this;
1348c2ecf20Sopenharmony_ci		iter->offset += this;
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci		if (iter->offset == PAGE_SIZE) {
1378c2ecf20Sopenharmony_ci			kunmap_atomic(iter->addr);
1388c2ecf20Sopenharmony_ci			iter->addr = NULL;
1398c2ecf20Sopenharmony_ci			iter->offset = 0;
1408c2ecf20Sopenharmony_ci			iter->pages++;
1418c2ecf20Sopenharmony_ci		}
1428c2ecf20Sopenharmony_ci	}
1438c2ecf20Sopenharmony_ci}
1448c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_info_copy);
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci/*
1478c2ecf20Sopenharmony_ci * @optval points to the userspace buffer that the information snapshot
1488c2ecf20Sopenharmony_ci * will be copied into.
1498c2ecf20Sopenharmony_ci *
1508c2ecf20Sopenharmony_ci * @optlen on input is the size of the buffer in userspace.  @optlen
1518c2ecf20Sopenharmony_ci * on output is the size of the requested snapshot in bytes.
1528c2ecf20Sopenharmony_ci *
1538c2ecf20Sopenharmony_ci * This function returns -errno if there is a failure, particularly -ENOSPC
1548c2ecf20Sopenharmony_ci * if the given userspace buffer was not large enough to fit the snapshot.
1558c2ecf20Sopenharmony_ci * On success it returns the positive number of bytes of each array element
1568c2ecf20Sopenharmony_ci * in the snapshot.
1578c2ecf20Sopenharmony_ci */
1588c2ecf20Sopenharmony_ciint rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
1598c2ecf20Sopenharmony_ci			int __user *optlen)
1608c2ecf20Sopenharmony_ci{
1618c2ecf20Sopenharmony_ci	struct rds_info_iterator iter;
1628c2ecf20Sopenharmony_ci	struct rds_info_lengths lens;
1638c2ecf20Sopenharmony_ci	unsigned long nr_pages = 0;
1648c2ecf20Sopenharmony_ci	unsigned long start;
1658c2ecf20Sopenharmony_ci	rds_info_func func;
1668c2ecf20Sopenharmony_ci	struct page **pages = NULL;
1678c2ecf20Sopenharmony_ci	int ret;
1688c2ecf20Sopenharmony_ci	int len;
1698c2ecf20Sopenharmony_ci	int total;
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci	if (get_user(len, optlen)) {
1728c2ecf20Sopenharmony_ci		ret = -EFAULT;
1738c2ecf20Sopenharmony_ci		goto out;
1748c2ecf20Sopenharmony_ci	}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	/* check for all kinds of wrapping and the like */
1778c2ecf20Sopenharmony_ci	start = (unsigned long)optval;
1788c2ecf20Sopenharmony_ci	if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) {
1798c2ecf20Sopenharmony_ci		ret = -EINVAL;
1808c2ecf20Sopenharmony_ci		goto out;
1818c2ecf20Sopenharmony_ci	}
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	/* a 0 len call is just trying to probe its length */
1848c2ecf20Sopenharmony_ci	if (len == 0)
1858c2ecf20Sopenharmony_ci		goto call_func;
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci	nr_pages = (PAGE_ALIGN(start + len) - (start & PAGE_MASK))
1888c2ecf20Sopenharmony_ci			>> PAGE_SHIFT;
1898c2ecf20Sopenharmony_ci
1908c2ecf20Sopenharmony_ci	pages = kmalloc_array(nr_pages, sizeof(struct page *), GFP_KERNEL);
1918c2ecf20Sopenharmony_ci	if (!pages) {
1928c2ecf20Sopenharmony_ci		ret = -ENOMEM;
1938c2ecf20Sopenharmony_ci		goto out;
1948c2ecf20Sopenharmony_ci	}
1958c2ecf20Sopenharmony_ci	ret = pin_user_pages_fast(start, nr_pages, FOLL_WRITE, pages);
1968c2ecf20Sopenharmony_ci	if (ret != nr_pages) {
1978c2ecf20Sopenharmony_ci		if (ret > 0)
1988c2ecf20Sopenharmony_ci			nr_pages = ret;
1998c2ecf20Sopenharmony_ci		else
2008c2ecf20Sopenharmony_ci			nr_pages = 0;
2018c2ecf20Sopenharmony_ci		ret = -EAGAIN; /* XXX ? */
2028c2ecf20Sopenharmony_ci		goto out;
2038c2ecf20Sopenharmony_ci	}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci	rdsdebug("len %d nr_pages %lu\n", len, nr_pages);
2068c2ecf20Sopenharmony_ci
2078c2ecf20Sopenharmony_cicall_func:
2088c2ecf20Sopenharmony_ci	func = rds_info_funcs[optname - RDS_INFO_FIRST];
2098c2ecf20Sopenharmony_ci	if (!func) {
2108c2ecf20Sopenharmony_ci		ret = -ENOPROTOOPT;
2118c2ecf20Sopenharmony_ci		goto out;
2128c2ecf20Sopenharmony_ci	}
2138c2ecf20Sopenharmony_ci
2148c2ecf20Sopenharmony_ci	iter.pages = pages;
2158c2ecf20Sopenharmony_ci	iter.addr = NULL;
2168c2ecf20Sopenharmony_ci	iter.offset = start & (PAGE_SIZE - 1);
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_ci	func(sock, len, &iter, &lens);
2198c2ecf20Sopenharmony_ci	BUG_ON(lens.each == 0);
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	total = lens.nr * lens.each;
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	rds_info_iter_unmap(&iter);
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci	if (total > len) {
2268c2ecf20Sopenharmony_ci		len = total;
2278c2ecf20Sopenharmony_ci		ret = -ENOSPC;
2288c2ecf20Sopenharmony_ci	} else {
2298c2ecf20Sopenharmony_ci		len = total;
2308c2ecf20Sopenharmony_ci		ret = lens.each;
2318c2ecf20Sopenharmony_ci	}
2328c2ecf20Sopenharmony_ci
2338c2ecf20Sopenharmony_ci	if (put_user(len, optlen))
2348c2ecf20Sopenharmony_ci		ret = -EFAULT;
2358c2ecf20Sopenharmony_ci
2368c2ecf20Sopenharmony_ciout:
2378c2ecf20Sopenharmony_ci	if (pages)
2388c2ecf20Sopenharmony_ci		unpin_user_pages(pages, nr_pages);
2398c2ecf20Sopenharmony_ci	kfree(pages);
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_ci	return ret;
2428c2ecf20Sopenharmony_ci}
243