18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two
58c2ecf20Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the
88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below:
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
118c2ecf20Sopenharmony_ci *     without modification, are permitted provided that the following
128c2ecf20Sopenharmony_ci *     conditions are met:
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci *      - Redistributions of source code must retain the above
158c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
168c2ecf20Sopenharmony_ci *        disclaimer.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
198c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
208c2ecf20Sopenharmony_ci *        disclaimer in the documentation and/or other materials
218c2ecf20Sopenharmony_ci *        provided with the distribution.
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
308c2ecf20Sopenharmony_ci * SOFTWARE.
318c2ecf20Sopenharmony_ci *
328c2ecf20Sopenharmony_ci */
338c2ecf20Sopenharmony_ci#include <linux/kernel.h>
348c2ecf20Sopenharmony_ci#include <linux/slab.h>
358c2ecf20Sopenharmony_ci#include <linux/rculist.h>
368c2ecf20Sopenharmony_ci#include <linux/llist.h>
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci#include "rds_single_path.h"
398c2ecf20Sopenharmony_ci#include "ib_mr.h"
408c2ecf20Sopenharmony_ci#include "rds.h"
418c2ecf20Sopenharmony_ci
428c2ecf20Sopenharmony_cistruct workqueue_struct *rds_ib_mr_wq;
438c2ecf20Sopenharmony_cistruct rds_ib_dereg_odp_mr {
448c2ecf20Sopenharmony_ci	struct work_struct work;
458c2ecf20Sopenharmony_ci	struct ib_mr *mr;
468c2ecf20Sopenharmony_ci};
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_cistatic void rds_ib_odp_mr_worker(struct work_struct *work);
498c2ecf20Sopenharmony_ci
508c2ecf20Sopenharmony_cistatic struct rds_ib_device *rds_ib_get_device(__be32 ipaddr)
518c2ecf20Sopenharmony_ci{
528c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev;
538c2ecf20Sopenharmony_ci	struct rds_ib_ipaddr *i_ipaddr;
548c2ecf20Sopenharmony_ci
558c2ecf20Sopenharmony_ci	rcu_read_lock();
568c2ecf20Sopenharmony_ci	list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) {
578c2ecf20Sopenharmony_ci		list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
588c2ecf20Sopenharmony_ci			if (i_ipaddr->ipaddr == ipaddr) {
598c2ecf20Sopenharmony_ci				refcount_inc(&rds_ibdev->refcount);
608c2ecf20Sopenharmony_ci				rcu_read_unlock();
618c2ecf20Sopenharmony_ci				return rds_ibdev;
628c2ecf20Sopenharmony_ci			}
638c2ecf20Sopenharmony_ci		}
648c2ecf20Sopenharmony_ci	}
658c2ecf20Sopenharmony_ci	rcu_read_unlock();
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	return NULL;
688c2ecf20Sopenharmony_ci}
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_cistatic int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
718c2ecf20Sopenharmony_ci{
728c2ecf20Sopenharmony_ci	struct rds_ib_ipaddr *i_ipaddr;
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL);
758c2ecf20Sopenharmony_ci	if (!i_ipaddr)
768c2ecf20Sopenharmony_ci		return -ENOMEM;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	i_ipaddr->ipaddr = ipaddr;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	spin_lock_irq(&rds_ibdev->spinlock);
818c2ecf20Sopenharmony_ci	list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list);
828c2ecf20Sopenharmony_ci	spin_unlock_irq(&rds_ibdev->spinlock);
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	return 0;
858c2ecf20Sopenharmony_ci}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_cistatic void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr)
888c2ecf20Sopenharmony_ci{
898c2ecf20Sopenharmony_ci	struct rds_ib_ipaddr *i_ipaddr;
908c2ecf20Sopenharmony_ci	struct rds_ib_ipaddr *to_free = NULL;
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci	spin_lock_irq(&rds_ibdev->spinlock);
948c2ecf20Sopenharmony_ci	list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) {
958c2ecf20Sopenharmony_ci		if (i_ipaddr->ipaddr == ipaddr) {
968c2ecf20Sopenharmony_ci			list_del_rcu(&i_ipaddr->list);
978c2ecf20Sopenharmony_ci			to_free = i_ipaddr;
988c2ecf20Sopenharmony_ci			break;
998c2ecf20Sopenharmony_ci		}
1008c2ecf20Sopenharmony_ci	}
1018c2ecf20Sopenharmony_ci	spin_unlock_irq(&rds_ibdev->spinlock);
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci	if (to_free)
1048c2ecf20Sopenharmony_ci		kfree_rcu(to_free, rcu);
1058c2ecf20Sopenharmony_ci}
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ciint rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev,
1088c2ecf20Sopenharmony_ci			 struct in6_addr *ipaddr)
1098c2ecf20Sopenharmony_ci{
1108c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev_old;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]);
1138c2ecf20Sopenharmony_ci	if (!rds_ibdev_old)
1148c2ecf20Sopenharmony_ci		return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
1158c2ecf20Sopenharmony_ci
1168c2ecf20Sopenharmony_ci	if (rds_ibdev_old != rds_ibdev) {
1178c2ecf20Sopenharmony_ci		rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]);
1188c2ecf20Sopenharmony_ci		rds_ib_dev_put(rds_ibdev_old);
1198c2ecf20Sopenharmony_ci		return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]);
1208c2ecf20Sopenharmony_ci	}
1218c2ecf20Sopenharmony_ci	rds_ib_dev_put(rds_ibdev_old);
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	return 0;
1248c2ecf20Sopenharmony_ci}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_civoid rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	/* conn was previously on the nodev_conns_list */
1318c2ecf20Sopenharmony_ci	spin_lock_irq(&ib_nodev_conns_lock);
1328c2ecf20Sopenharmony_ci	BUG_ON(list_empty(&ib_nodev_conns));
1338c2ecf20Sopenharmony_ci	BUG_ON(list_empty(&ic->ib_node));
1348c2ecf20Sopenharmony_ci	list_del(&ic->ib_node);
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	spin_lock(&rds_ibdev->spinlock);
1378c2ecf20Sopenharmony_ci	list_add_tail(&ic->ib_node, &rds_ibdev->conn_list);
1388c2ecf20Sopenharmony_ci	spin_unlock(&rds_ibdev->spinlock);
1398c2ecf20Sopenharmony_ci	spin_unlock_irq(&ib_nodev_conns_lock);
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	ic->rds_ibdev = rds_ibdev;
1428c2ecf20Sopenharmony_ci	refcount_inc(&rds_ibdev->refcount);
1438c2ecf20Sopenharmony_ci}
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_civoid rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn)
1468c2ecf20Sopenharmony_ci{
1478c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = conn->c_transport_data;
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci	/* place conn on nodev_conns_list */
1508c2ecf20Sopenharmony_ci	spin_lock(&ib_nodev_conns_lock);
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_ci	spin_lock_irq(&rds_ibdev->spinlock);
1538c2ecf20Sopenharmony_ci	BUG_ON(list_empty(&ic->ib_node));
1548c2ecf20Sopenharmony_ci	list_del(&ic->ib_node);
1558c2ecf20Sopenharmony_ci	spin_unlock_irq(&rds_ibdev->spinlock);
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	list_add_tail(&ic->ib_node, &ib_nodev_conns);
1588c2ecf20Sopenharmony_ci
1598c2ecf20Sopenharmony_ci	spin_unlock(&ib_nodev_conns_lock);
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ci	ic->rds_ibdev = NULL;
1628c2ecf20Sopenharmony_ci	rds_ib_dev_put(rds_ibdev);
1638c2ecf20Sopenharmony_ci}
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_civoid rds_ib_destroy_nodev_conns(void)
1668c2ecf20Sopenharmony_ci{
1678c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic, *_ic;
1688c2ecf20Sopenharmony_ci	LIST_HEAD(tmp_list);
1698c2ecf20Sopenharmony_ci
1708c2ecf20Sopenharmony_ci	/* avoid calling conn_destroy with irqs off */
1718c2ecf20Sopenharmony_ci	spin_lock_irq(&ib_nodev_conns_lock);
1728c2ecf20Sopenharmony_ci	list_splice(&ib_nodev_conns, &tmp_list);
1738c2ecf20Sopenharmony_ci	spin_unlock_irq(&ib_nodev_conns_lock);
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci	list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node)
1768c2ecf20Sopenharmony_ci		rds_conn_destroy(ic->conn);
1778c2ecf20Sopenharmony_ci}
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_civoid rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo)
1808c2ecf20Sopenharmony_ci{
1818c2ecf20Sopenharmony_ci	struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
1828c2ecf20Sopenharmony_ci
1838c2ecf20Sopenharmony_ci	iinfo->rdma_mr_max = pool_1m->max_items;
1848c2ecf20Sopenharmony_ci	iinfo->rdma_mr_size = pool_1m->max_pages;
1858c2ecf20Sopenharmony_ci}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
1888c2ecf20Sopenharmony_civoid rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev,
1898c2ecf20Sopenharmony_ci			 struct rds6_info_rdma_connection *iinfo6)
1908c2ecf20Sopenharmony_ci{
1918c2ecf20Sopenharmony_ci	struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool;
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci	iinfo6->rdma_mr_max = pool_1m->max_items;
1948c2ecf20Sopenharmony_ci	iinfo6->rdma_mr_size = pool_1m->max_pages;
1958c2ecf20Sopenharmony_ci}
1968c2ecf20Sopenharmony_ci#endif
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_cistruct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool)
1998c2ecf20Sopenharmony_ci{
2008c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = NULL;
2018c2ecf20Sopenharmony_ci	struct llist_node *ret;
2028c2ecf20Sopenharmony_ci	unsigned long flags;
2038c2ecf20Sopenharmony_ci
2048c2ecf20Sopenharmony_ci	spin_lock_irqsave(&pool->clean_lock, flags);
2058c2ecf20Sopenharmony_ci	ret = llist_del_first(&pool->clean_list);
2068c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&pool->clean_lock, flags);
2078c2ecf20Sopenharmony_ci	if (ret) {
2088c2ecf20Sopenharmony_ci		ibmr = llist_entry(ret, struct rds_ib_mr, llnode);
2098c2ecf20Sopenharmony_ci		if (pool->pool_type == RDS_IB_MR_8K_POOL)
2108c2ecf20Sopenharmony_ci			rds_ib_stats_inc(s_ib_rdma_mr_8k_reused);
2118c2ecf20Sopenharmony_ci		else
2128c2ecf20Sopenharmony_ci			rds_ib_stats_inc(s_ib_rdma_mr_1m_reused);
2138c2ecf20Sopenharmony_ci	}
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	return ibmr;
2168c2ecf20Sopenharmony_ci}
2178c2ecf20Sopenharmony_ci
2188c2ecf20Sopenharmony_civoid rds_ib_sync_mr(void *trans_private, int direction)
2198c2ecf20Sopenharmony_ci{
2208c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = trans_private;
2218c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev = ibmr->device;
2228c2ecf20Sopenharmony_ci
2238c2ecf20Sopenharmony_ci	if (ibmr->odp)
2248c2ecf20Sopenharmony_ci		return;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	switch (direction) {
2278c2ecf20Sopenharmony_ci	case DMA_FROM_DEVICE:
2288c2ecf20Sopenharmony_ci		ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg,
2298c2ecf20Sopenharmony_ci			ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
2308c2ecf20Sopenharmony_ci		break;
2318c2ecf20Sopenharmony_ci	case DMA_TO_DEVICE:
2328c2ecf20Sopenharmony_ci		ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg,
2338c2ecf20Sopenharmony_ci			ibmr->sg_dma_len, DMA_BIDIRECTIONAL);
2348c2ecf20Sopenharmony_ci		break;
2358c2ecf20Sopenharmony_ci	}
2368c2ecf20Sopenharmony_ci}
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_civoid __rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
2398c2ecf20Sopenharmony_ci{
2408c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev = ibmr->device;
2418c2ecf20Sopenharmony_ci
2428c2ecf20Sopenharmony_ci	if (ibmr->sg_dma_len) {
2438c2ecf20Sopenharmony_ci		ib_dma_unmap_sg(rds_ibdev->dev,
2448c2ecf20Sopenharmony_ci				ibmr->sg, ibmr->sg_len,
2458c2ecf20Sopenharmony_ci				DMA_BIDIRECTIONAL);
2468c2ecf20Sopenharmony_ci		ibmr->sg_dma_len = 0;
2478c2ecf20Sopenharmony_ci	}
2488c2ecf20Sopenharmony_ci
2498c2ecf20Sopenharmony_ci	/* Release the s/g list */
2508c2ecf20Sopenharmony_ci	if (ibmr->sg_len) {
2518c2ecf20Sopenharmony_ci		unsigned int i;
2528c2ecf20Sopenharmony_ci
2538c2ecf20Sopenharmony_ci		for (i = 0; i < ibmr->sg_len; ++i) {
2548c2ecf20Sopenharmony_ci			struct page *page = sg_page(&ibmr->sg[i]);
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci			/* FIXME we need a way to tell a r/w MR
2578c2ecf20Sopenharmony_ci			 * from a r/o MR */
2588c2ecf20Sopenharmony_ci			WARN_ON(!page->mapping && irqs_disabled());
2598c2ecf20Sopenharmony_ci			set_page_dirty(page);
2608c2ecf20Sopenharmony_ci			put_page(page);
2618c2ecf20Sopenharmony_ci		}
2628c2ecf20Sopenharmony_ci		kfree(ibmr->sg);
2638c2ecf20Sopenharmony_ci
2648c2ecf20Sopenharmony_ci		ibmr->sg = NULL;
2658c2ecf20Sopenharmony_ci		ibmr->sg_len = 0;
2668c2ecf20Sopenharmony_ci	}
2678c2ecf20Sopenharmony_ci}
2688c2ecf20Sopenharmony_ci
2698c2ecf20Sopenharmony_civoid rds_ib_teardown_mr(struct rds_ib_mr *ibmr)
2708c2ecf20Sopenharmony_ci{
2718c2ecf20Sopenharmony_ci	unsigned int pinned = ibmr->sg_len;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	__rds_ib_teardown_mr(ibmr);
2748c2ecf20Sopenharmony_ci	if (pinned) {
2758c2ecf20Sopenharmony_ci		struct rds_ib_mr_pool *pool = ibmr->pool;
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci		atomic_sub(pinned, &pool->free_pinned);
2788c2ecf20Sopenharmony_ci	}
2798c2ecf20Sopenharmony_ci}
2808c2ecf20Sopenharmony_ci
2818c2ecf20Sopenharmony_cistatic inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all)
2828c2ecf20Sopenharmony_ci{
2838c2ecf20Sopenharmony_ci	unsigned int item_count;
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci	item_count = atomic_read(&pool->item_count);
2868c2ecf20Sopenharmony_ci	if (free_all)
2878c2ecf20Sopenharmony_ci		return item_count;
2888c2ecf20Sopenharmony_ci
2898c2ecf20Sopenharmony_ci	return 0;
2908c2ecf20Sopenharmony_ci}
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ci/*
2938c2ecf20Sopenharmony_ci * given an llist of mrs, put them all into the list_head for more processing
2948c2ecf20Sopenharmony_ci */
2958c2ecf20Sopenharmony_cistatic unsigned int llist_append_to_list(struct llist_head *llist,
2968c2ecf20Sopenharmony_ci					 struct list_head *list)
2978c2ecf20Sopenharmony_ci{
2988c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr;
2998c2ecf20Sopenharmony_ci	struct llist_node *node;
3008c2ecf20Sopenharmony_ci	struct llist_node *next;
3018c2ecf20Sopenharmony_ci	unsigned int count = 0;
3028c2ecf20Sopenharmony_ci
3038c2ecf20Sopenharmony_ci	node = llist_del_all(llist);
3048c2ecf20Sopenharmony_ci	while (node) {
3058c2ecf20Sopenharmony_ci		next = node->next;
3068c2ecf20Sopenharmony_ci		ibmr = llist_entry(node, struct rds_ib_mr, llnode);
3078c2ecf20Sopenharmony_ci		list_add_tail(&ibmr->unmap_list, list);
3088c2ecf20Sopenharmony_ci		node = next;
3098c2ecf20Sopenharmony_ci		count++;
3108c2ecf20Sopenharmony_ci	}
3118c2ecf20Sopenharmony_ci	return count;
3128c2ecf20Sopenharmony_ci}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_ci/*
3158c2ecf20Sopenharmony_ci * this takes a list head of mrs and turns it into linked llist nodes
3168c2ecf20Sopenharmony_ci * of clusters.  Each cluster has linked llist nodes of
3178c2ecf20Sopenharmony_ci * MR_CLUSTER_SIZE mrs that are ready for reuse.
3188c2ecf20Sopenharmony_ci */
3198c2ecf20Sopenharmony_cistatic void list_to_llist_nodes(struct list_head *list,
3208c2ecf20Sopenharmony_ci				struct llist_node **nodes_head,
3218c2ecf20Sopenharmony_ci				struct llist_node **nodes_tail)
3228c2ecf20Sopenharmony_ci{
3238c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr;
3248c2ecf20Sopenharmony_ci	struct llist_node *cur = NULL;
3258c2ecf20Sopenharmony_ci	struct llist_node **next = nodes_head;
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	list_for_each_entry(ibmr, list, unmap_list) {
3288c2ecf20Sopenharmony_ci		cur = &ibmr->llnode;
3298c2ecf20Sopenharmony_ci		*next = cur;
3308c2ecf20Sopenharmony_ci		next = &cur->next;
3318c2ecf20Sopenharmony_ci	}
3328c2ecf20Sopenharmony_ci	*next = NULL;
3338c2ecf20Sopenharmony_ci	*nodes_tail = cur;
3348c2ecf20Sopenharmony_ci}
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci/*
3378c2ecf20Sopenharmony_ci * Flush our pool of MRs.
3388c2ecf20Sopenharmony_ci * At a minimum, all currently unused MRs are unmapped.
3398c2ecf20Sopenharmony_ci * If the number of MRs allocated exceeds the limit, we also try
3408c2ecf20Sopenharmony_ci * to free as many MRs as needed to get back to this limit.
3418c2ecf20Sopenharmony_ci */
3428c2ecf20Sopenharmony_ciint rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool,
3438c2ecf20Sopenharmony_ci			 int free_all, struct rds_ib_mr **ibmr_ret)
3448c2ecf20Sopenharmony_ci{
3458c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr;
3468c2ecf20Sopenharmony_ci	struct llist_node *clean_nodes;
3478c2ecf20Sopenharmony_ci	struct llist_node *clean_tail;
3488c2ecf20Sopenharmony_ci	LIST_HEAD(unmap_list);
3498c2ecf20Sopenharmony_ci	unsigned long unpinned = 0;
3508c2ecf20Sopenharmony_ci	unsigned int nfreed = 0, dirty_to_clean = 0, free_goal;
3518c2ecf20Sopenharmony_ci
3528c2ecf20Sopenharmony_ci	if (pool->pool_type == RDS_IB_MR_8K_POOL)
3538c2ecf20Sopenharmony_ci		rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush);
3548c2ecf20Sopenharmony_ci	else
3558c2ecf20Sopenharmony_ci		rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush);
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_ci	if (ibmr_ret) {
3588c2ecf20Sopenharmony_ci		DEFINE_WAIT(wait);
3598c2ecf20Sopenharmony_ci		while (!mutex_trylock(&pool->flush_lock)) {
3608c2ecf20Sopenharmony_ci			ibmr = rds_ib_reuse_mr(pool);
3618c2ecf20Sopenharmony_ci			if (ibmr) {
3628c2ecf20Sopenharmony_ci				*ibmr_ret = ibmr;
3638c2ecf20Sopenharmony_ci				finish_wait(&pool->flush_wait, &wait);
3648c2ecf20Sopenharmony_ci				goto out_nolock;
3658c2ecf20Sopenharmony_ci			}
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci			prepare_to_wait(&pool->flush_wait, &wait,
3688c2ecf20Sopenharmony_ci					TASK_UNINTERRUPTIBLE);
3698c2ecf20Sopenharmony_ci			if (llist_empty(&pool->clean_list))
3708c2ecf20Sopenharmony_ci				schedule();
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_ci			ibmr = rds_ib_reuse_mr(pool);
3738c2ecf20Sopenharmony_ci			if (ibmr) {
3748c2ecf20Sopenharmony_ci				*ibmr_ret = ibmr;
3758c2ecf20Sopenharmony_ci				finish_wait(&pool->flush_wait, &wait);
3768c2ecf20Sopenharmony_ci				goto out_nolock;
3778c2ecf20Sopenharmony_ci			}
3788c2ecf20Sopenharmony_ci		}
3798c2ecf20Sopenharmony_ci		finish_wait(&pool->flush_wait, &wait);
3808c2ecf20Sopenharmony_ci	} else
3818c2ecf20Sopenharmony_ci		mutex_lock(&pool->flush_lock);
3828c2ecf20Sopenharmony_ci
3838c2ecf20Sopenharmony_ci	if (ibmr_ret) {
3848c2ecf20Sopenharmony_ci		ibmr = rds_ib_reuse_mr(pool);
3858c2ecf20Sopenharmony_ci		if (ibmr) {
3868c2ecf20Sopenharmony_ci			*ibmr_ret = ibmr;
3878c2ecf20Sopenharmony_ci			goto out;
3888c2ecf20Sopenharmony_ci		}
3898c2ecf20Sopenharmony_ci	}
3908c2ecf20Sopenharmony_ci
3918c2ecf20Sopenharmony_ci	/* Get the list of all MRs to be dropped. Ordering matters -
3928c2ecf20Sopenharmony_ci	 * we want to put drop_list ahead of free_list.
3938c2ecf20Sopenharmony_ci	 */
3948c2ecf20Sopenharmony_ci	dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list);
3958c2ecf20Sopenharmony_ci	dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list);
3968c2ecf20Sopenharmony_ci	if (free_all) {
3978c2ecf20Sopenharmony_ci		unsigned long flags;
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci		spin_lock_irqsave(&pool->clean_lock, flags);
4008c2ecf20Sopenharmony_ci		llist_append_to_list(&pool->clean_list, &unmap_list);
4018c2ecf20Sopenharmony_ci		spin_unlock_irqrestore(&pool->clean_lock, flags);
4028c2ecf20Sopenharmony_ci	}
4038c2ecf20Sopenharmony_ci
4048c2ecf20Sopenharmony_ci	free_goal = rds_ib_flush_goal(pool, free_all);
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_ci	if (list_empty(&unmap_list))
4078c2ecf20Sopenharmony_ci		goto out;
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal);
4108c2ecf20Sopenharmony_ci
4118c2ecf20Sopenharmony_ci	if (!list_empty(&unmap_list)) {
4128c2ecf20Sopenharmony_ci		unsigned long flags;
4138c2ecf20Sopenharmony_ci
4148c2ecf20Sopenharmony_ci		list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail);
4158c2ecf20Sopenharmony_ci		if (ibmr_ret) {
4168c2ecf20Sopenharmony_ci			*ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode);
4178c2ecf20Sopenharmony_ci			clean_nodes = clean_nodes->next;
4188c2ecf20Sopenharmony_ci		}
4198c2ecf20Sopenharmony_ci		/* more than one entry in llist nodes */
4208c2ecf20Sopenharmony_ci		if (clean_nodes) {
4218c2ecf20Sopenharmony_ci			spin_lock_irqsave(&pool->clean_lock, flags);
4228c2ecf20Sopenharmony_ci			llist_add_batch(clean_nodes, clean_tail,
4238c2ecf20Sopenharmony_ci					&pool->clean_list);
4248c2ecf20Sopenharmony_ci			spin_unlock_irqrestore(&pool->clean_lock, flags);
4258c2ecf20Sopenharmony_ci		}
4268c2ecf20Sopenharmony_ci	}
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	atomic_sub(unpinned, &pool->free_pinned);
4298c2ecf20Sopenharmony_ci	atomic_sub(dirty_to_clean, &pool->dirty_count);
4308c2ecf20Sopenharmony_ci	atomic_sub(nfreed, &pool->item_count);
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ciout:
4338c2ecf20Sopenharmony_ci	mutex_unlock(&pool->flush_lock);
4348c2ecf20Sopenharmony_ci	if (waitqueue_active(&pool->flush_wait))
4358c2ecf20Sopenharmony_ci		wake_up(&pool->flush_wait);
4368c2ecf20Sopenharmony_ciout_nolock:
4378c2ecf20Sopenharmony_ci	return 0;
4388c2ecf20Sopenharmony_ci}
4398c2ecf20Sopenharmony_ci
4408c2ecf20Sopenharmony_cistruct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool)
4418c2ecf20Sopenharmony_ci{
4428c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = NULL;
4438c2ecf20Sopenharmony_ci	int iter = 0;
4448c2ecf20Sopenharmony_ci
4458c2ecf20Sopenharmony_ci	while (1) {
4468c2ecf20Sopenharmony_ci		ibmr = rds_ib_reuse_mr(pool);
4478c2ecf20Sopenharmony_ci		if (ibmr)
4488c2ecf20Sopenharmony_ci			return ibmr;
4498c2ecf20Sopenharmony_ci
4508c2ecf20Sopenharmony_ci		if (atomic_inc_return(&pool->item_count) <= pool->max_items)
4518c2ecf20Sopenharmony_ci			break;
4528c2ecf20Sopenharmony_ci
4538c2ecf20Sopenharmony_ci		atomic_dec(&pool->item_count);
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci		if (++iter > 2) {
4568c2ecf20Sopenharmony_ci			if (pool->pool_type == RDS_IB_MR_8K_POOL)
4578c2ecf20Sopenharmony_ci				rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted);
4588c2ecf20Sopenharmony_ci			else
4598c2ecf20Sopenharmony_ci				rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted);
4608c2ecf20Sopenharmony_ci			break;
4618c2ecf20Sopenharmony_ci		}
4628c2ecf20Sopenharmony_ci
4638c2ecf20Sopenharmony_ci		/* We do have some empty MRs. Flush them out. */
4648c2ecf20Sopenharmony_ci		if (pool->pool_type == RDS_IB_MR_8K_POOL)
4658c2ecf20Sopenharmony_ci			rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait);
4668c2ecf20Sopenharmony_ci		else
4678c2ecf20Sopenharmony_ci			rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait);
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci		rds_ib_flush_mr_pool(pool, 0, &ibmr);
4708c2ecf20Sopenharmony_ci		if (ibmr)
4718c2ecf20Sopenharmony_ci			return ibmr;
4728c2ecf20Sopenharmony_ci	}
4738c2ecf20Sopenharmony_ci
4748c2ecf20Sopenharmony_ci	return NULL;
4758c2ecf20Sopenharmony_ci}
4768c2ecf20Sopenharmony_ci
4778c2ecf20Sopenharmony_cistatic void rds_ib_mr_pool_flush_worker(struct work_struct *work)
4788c2ecf20Sopenharmony_ci{
4798c2ecf20Sopenharmony_ci	struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work);
4808c2ecf20Sopenharmony_ci
4818c2ecf20Sopenharmony_ci	rds_ib_flush_mr_pool(pool, 0, NULL);
4828c2ecf20Sopenharmony_ci}
4838c2ecf20Sopenharmony_ci
4848c2ecf20Sopenharmony_civoid rds_ib_free_mr(void *trans_private, int invalidate)
4858c2ecf20Sopenharmony_ci{
4868c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = trans_private;
4878c2ecf20Sopenharmony_ci	struct rds_ib_mr_pool *pool = ibmr->pool;
4888c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev = ibmr->device;
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len);
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	if (ibmr->odp) {
4938c2ecf20Sopenharmony_ci		/* A MR created and marked as use_once. We use delayed work,
4948c2ecf20Sopenharmony_ci		 * because there is a change that we are in interrupt and can't
4958c2ecf20Sopenharmony_ci		 * call to ib_dereg_mr() directly.
4968c2ecf20Sopenharmony_ci		 */
4978c2ecf20Sopenharmony_ci		INIT_DELAYED_WORK(&ibmr->work, rds_ib_odp_mr_worker);
4988c2ecf20Sopenharmony_ci		queue_delayed_work(rds_ib_mr_wq, &ibmr->work, 0);
4998c2ecf20Sopenharmony_ci		return;
5008c2ecf20Sopenharmony_ci	}
5018c2ecf20Sopenharmony_ci
5028c2ecf20Sopenharmony_ci	/* Return it to the pool's free list */
5038c2ecf20Sopenharmony_ci	rds_ib_free_frmr_list(ibmr);
5048c2ecf20Sopenharmony_ci
5058c2ecf20Sopenharmony_ci	atomic_add(ibmr->sg_len, &pool->free_pinned);
5068c2ecf20Sopenharmony_ci	atomic_inc(&pool->dirty_count);
5078c2ecf20Sopenharmony_ci
5088c2ecf20Sopenharmony_ci	/* If we've pinned too many pages, request a flush */
5098c2ecf20Sopenharmony_ci	if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
5108c2ecf20Sopenharmony_ci	    atomic_read(&pool->dirty_count) >= pool->max_items / 5)
5118c2ecf20Sopenharmony_ci		queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
5128c2ecf20Sopenharmony_ci
5138c2ecf20Sopenharmony_ci	if (invalidate) {
5148c2ecf20Sopenharmony_ci		if (likely(!in_interrupt())) {
5158c2ecf20Sopenharmony_ci			rds_ib_flush_mr_pool(pool, 0, NULL);
5168c2ecf20Sopenharmony_ci		} else {
5178c2ecf20Sopenharmony_ci			/* We get here if the user created a MR marked
5188c2ecf20Sopenharmony_ci			 * as use_once and invalidate at the same time.
5198c2ecf20Sopenharmony_ci			 */
5208c2ecf20Sopenharmony_ci			queue_delayed_work(rds_ib_mr_wq,
5218c2ecf20Sopenharmony_ci					   &pool->flush_worker, 10);
5228c2ecf20Sopenharmony_ci		}
5238c2ecf20Sopenharmony_ci	}
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_ci	rds_ib_dev_put(rds_ibdev);
5268c2ecf20Sopenharmony_ci}
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_civoid rds_ib_flush_mrs(void)
5298c2ecf20Sopenharmony_ci{
5308c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev;
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ci	down_read(&rds_ib_devices_lock);
5338c2ecf20Sopenharmony_ci	list_for_each_entry(rds_ibdev, &rds_ib_devices, list) {
5348c2ecf20Sopenharmony_ci		if (rds_ibdev->mr_8k_pool)
5358c2ecf20Sopenharmony_ci			rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL);
5368c2ecf20Sopenharmony_ci
5378c2ecf20Sopenharmony_ci		if (rds_ibdev->mr_1m_pool)
5388c2ecf20Sopenharmony_ci			rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL);
5398c2ecf20Sopenharmony_ci	}
5408c2ecf20Sopenharmony_ci	up_read(&rds_ib_devices_lock);
5418c2ecf20Sopenharmony_ci}
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ciu32 rds_ib_get_lkey(void *trans_private)
5448c2ecf20Sopenharmony_ci{
5458c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = trans_private;
5468c2ecf20Sopenharmony_ci
5478c2ecf20Sopenharmony_ci	return ibmr->u.mr->lkey;
5488c2ecf20Sopenharmony_ci}
5498c2ecf20Sopenharmony_ci
5508c2ecf20Sopenharmony_civoid *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
5518c2ecf20Sopenharmony_ci		    struct rds_sock *rs, u32 *key_ret,
5528c2ecf20Sopenharmony_ci		    struct rds_connection *conn,
5538c2ecf20Sopenharmony_ci		    u64 start, u64 length, int need_odp)
5548c2ecf20Sopenharmony_ci{
5558c2ecf20Sopenharmony_ci	struct rds_ib_device *rds_ibdev;
5568c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = NULL;
5578c2ecf20Sopenharmony_ci	struct rds_ib_connection *ic = NULL;
5588c2ecf20Sopenharmony_ci	int ret;
5598c2ecf20Sopenharmony_ci
5608c2ecf20Sopenharmony_ci	rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]);
5618c2ecf20Sopenharmony_ci	if (!rds_ibdev) {
5628c2ecf20Sopenharmony_ci		ret = -ENODEV;
5638c2ecf20Sopenharmony_ci		goto out;
5648c2ecf20Sopenharmony_ci	}
5658c2ecf20Sopenharmony_ci
5668c2ecf20Sopenharmony_ci	if (need_odp == ODP_ZEROBASED || need_odp == ODP_VIRTUAL) {
5678c2ecf20Sopenharmony_ci		u64 virt_addr = need_odp == ODP_ZEROBASED ? 0 : start;
5688c2ecf20Sopenharmony_ci		int access_flags =
5698c2ecf20Sopenharmony_ci			(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
5708c2ecf20Sopenharmony_ci			 IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC |
5718c2ecf20Sopenharmony_ci			 IB_ACCESS_ON_DEMAND);
5728c2ecf20Sopenharmony_ci		struct ib_sge sge = {};
5738c2ecf20Sopenharmony_ci		struct ib_mr *ib_mr;
5748c2ecf20Sopenharmony_ci
5758c2ecf20Sopenharmony_ci		if (!rds_ibdev->odp_capable) {
5768c2ecf20Sopenharmony_ci			ret = -EOPNOTSUPP;
5778c2ecf20Sopenharmony_ci			goto out;
5788c2ecf20Sopenharmony_ci		}
5798c2ecf20Sopenharmony_ci
5808c2ecf20Sopenharmony_ci		ib_mr = ib_reg_user_mr(rds_ibdev->pd, start, length, virt_addr,
5818c2ecf20Sopenharmony_ci				       access_flags);
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_ci		if (IS_ERR(ib_mr)) {
5848c2ecf20Sopenharmony_ci			rdsdebug("rds_ib_get_user_mr returned %d\n",
5858c2ecf20Sopenharmony_ci				 IS_ERR(ib_mr));
5868c2ecf20Sopenharmony_ci			ret = PTR_ERR(ib_mr);
5878c2ecf20Sopenharmony_ci			goto out;
5888c2ecf20Sopenharmony_ci		}
5898c2ecf20Sopenharmony_ci		if (key_ret)
5908c2ecf20Sopenharmony_ci			*key_ret = ib_mr->rkey;
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci		ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL);
5938c2ecf20Sopenharmony_ci		if (!ibmr) {
5948c2ecf20Sopenharmony_ci			ib_dereg_mr(ib_mr);
5958c2ecf20Sopenharmony_ci			ret = -ENOMEM;
5968c2ecf20Sopenharmony_ci			goto out;
5978c2ecf20Sopenharmony_ci		}
5988c2ecf20Sopenharmony_ci		ibmr->u.mr = ib_mr;
5998c2ecf20Sopenharmony_ci		ibmr->odp = 1;
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci		sge.addr = virt_addr;
6028c2ecf20Sopenharmony_ci		sge.length = length;
6038c2ecf20Sopenharmony_ci		sge.lkey = ib_mr->lkey;
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci		ib_advise_mr(rds_ibdev->pd,
6068c2ecf20Sopenharmony_ci			     IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE,
6078c2ecf20Sopenharmony_ci			     IB_UVERBS_ADVISE_MR_FLAG_FLUSH, &sge, 1);
6088c2ecf20Sopenharmony_ci		return ibmr;
6098c2ecf20Sopenharmony_ci	}
6108c2ecf20Sopenharmony_ci
6118c2ecf20Sopenharmony_ci	if (conn)
6128c2ecf20Sopenharmony_ci		ic = conn->c_transport_data;
6138c2ecf20Sopenharmony_ci
6148c2ecf20Sopenharmony_ci	if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) {
6158c2ecf20Sopenharmony_ci		ret = -ENODEV;
6168c2ecf20Sopenharmony_ci		goto out;
6178c2ecf20Sopenharmony_ci	}
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci	ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret);
6208c2ecf20Sopenharmony_ci	if (IS_ERR(ibmr)) {
6218c2ecf20Sopenharmony_ci		ret = PTR_ERR(ibmr);
6228c2ecf20Sopenharmony_ci		pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret);
6238c2ecf20Sopenharmony_ci	} else {
6248c2ecf20Sopenharmony_ci		return ibmr;
6258c2ecf20Sopenharmony_ci	}
6268c2ecf20Sopenharmony_ci
6278c2ecf20Sopenharmony_ci out:
6288c2ecf20Sopenharmony_ci	if (rds_ibdev)
6298c2ecf20Sopenharmony_ci		rds_ib_dev_put(rds_ibdev);
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_ci	return ERR_PTR(ret);
6328c2ecf20Sopenharmony_ci}
6338c2ecf20Sopenharmony_ci
6348c2ecf20Sopenharmony_civoid rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool)
6358c2ecf20Sopenharmony_ci{
6368c2ecf20Sopenharmony_ci	cancel_delayed_work_sync(&pool->flush_worker);
6378c2ecf20Sopenharmony_ci	rds_ib_flush_mr_pool(pool, 1, NULL);
6388c2ecf20Sopenharmony_ci	WARN_ON(atomic_read(&pool->item_count));
6398c2ecf20Sopenharmony_ci	WARN_ON(atomic_read(&pool->free_pinned));
6408c2ecf20Sopenharmony_ci	kfree(pool);
6418c2ecf20Sopenharmony_ci}
6428c2ecf20Sopenharmony_ci
6438c2ecf20Sopenharmony_cistruct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev,
6448c2ecf20Sopenharmony_ci					     int pool_type)
6458c2ecf20Sopenharmony_ci{
6468c2ecf20Sopenharmony_ci	struct rds_ib_mr_pool *pool;
6478c2ecf20Sopenharmony_ci
6488c2ecf20Sopenharmony_ci	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
6498c2ecf20Sopenharmony_ci	if (!pool)
6508c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
6518c2ecf20Sopenharmony_ci
6528c2ecf20Sopenharmony_ci	pool->pool_type = pool_type;
6538c2ecf20Sopenharmony_ci	init_llist_head(&pool->free_list);
6548c2ecf20Sopenharmony_ci	init_llist_head(&pool->drop_list);
6558c2ecf20Sopenharmony_ci	init_llist_head(&pool->clean_list);
6568c2ecf20Sopenharmony_ci	spin_lock_init(&pool->clean_lock);
6578c2ecf20Sopenharmony_ci	mutex_init(&pool->flush_lock);
6588c2ecf20Sopenharmony_ci	init_waitqueue_head(&pool->flush_wait);
6598c2ecf20Sopenharmony_ci	INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker);
6608c2ecf20Sopenharmony_ci
6618c2ecf20Sopenharmony_ci	if (pool_type == RDS_IB_MR_1M_POOL) {
6628c2ecf20Sopenharmony_ci		/* +1 allows for unaligned MRs */
6638c2ecf20Sopenharmony_ci		pool->max_pages = RDS_MR_1M_MSG_SIZE + 1;
6648c2ecf20Sopenharmony_ci		pool->max_items = rds_ibdev->max_1m_mrs;
6658c2ecf20Sopenharmony_ci	} else {
6668c2ecf20Sopenharmony_ci		/* pool_type == RDS_IB_MR_8K_POOL */
6678c2ecf20Sopenharmony_ci		pool->max_pages = RDS_MR_8K_MSG_SIZE + 1;
6688c2ecf20Sopenharmony_ci		pool->max_items = rds_ibdev->max_8k_mrs;
6698c2ecf20Sopenharmony_ci	}
6708c2ecf20Sopenharmony_ci
6718c2ecf20Sopenharmony_ci	pool->max_free_pinned = pool->max_items * pool->max_pages / 4;
6728c2ecf20Sopenharmony_ci	pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4;
6738c2ecf20Sopenharmony_ci
6748c2ecf20Sopenharmony_ci	return pool;
6758c2ecf20Sopenharmony_ci}
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ciint rds_ib_mr_init(void)
6788c2ecf20Sopenharmony_ci{
6798c2ecf20Sopenharmony_ci	rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM, 0);
6808c2ecf20Sopenharmony_ci	if (!rds_ib_mr_wq)
6818c2ecf20Sopenharmony_ci		return -ENOMEM;
6828c2ecf20Sopenharmony_ci	return 0;
6838c2ecf20Sopenharmony_ci}
6848c2ecf20Sopenharmony_ci
6858c2ecf20Sopenharmony_ci/* By the time this is called all the IB devices should have been torn down and
6868c2ecf20Sopenharmony_ci * had their pools freed.  As each pool is freed its work struct is waited on,
6878c2ecf20Sopenharmony_ci * so the pool flushing work queue should be idle by the time we get here.
6888c2ecf20Sopenharmony_ci */
6898c2ecf20Sopenharmony_civoid rds_ib_mr_exit(void)
6908c2ecf20Sopenharmony_ci{
6918c2ecf20Sopenharmony_ci	destroy_workqueue(rds_ib_mr_wq);
6928c2ecf20Sopenharmony_ci}
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_cistatic void rds_ib_odp_mr_worker(struct work_struct  *work)
6958c2ecf20Sopenharmony_ci{
6968c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr;
6978c2ecf20Sopenharmony_ci
6988c2ecf20Sopenharmony_ci	ibmr = container_of(work, struct rds_ib_mr, work.work);
6998c2ecf20Sopenharmony_ci	ib_dereg_mr(ibmr->u.mr);
7008c2ecf20Sopenharmony_ci	kfree(ibmr);
7018c2ecf20Sopenharmony_ci}
702