18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or 118c2ecf20Sopenharmony_ci * without modification, are permitted provided that the following 128c2ecf20Sopenharmony_ci * conditions are met: 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above 158c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 168c2ecf20Sopenharmony_ci * disclaimer. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above 198c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 208c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials 218c2ecf20Sopenharmony_ci * provided with the distribution. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308c2ecf20Sopenharmony_ci * SOFTWARE. 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci */ 338c2ecf20Sopenharmony_ci#include <linux/kernel.h> 348c2ecf20Sopenharmony_ci#include <linux/slab.h> 358c2ecf20Sopenharmony_ci#include <linux/rculist.h> 368c2ecf20Sopenharmony_ci#include <linux/llist.h> 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#include "rds_single_path.h" 398c2ecf20Sopenharmony_ci#include "ib_mr.h" 408c2ecf20Sopenharmony_ci#include "rds.h" 418c2ecf20Sopenharmony_ci 428c2ecf20Sopenharmony_cistruct workqueue_struct *rds_ib_mr_wq; 438c2ecf20Sopenharmony_cistruct rds_ib_dereg_odp_mr { 448c2ecf20Sopenharmony_ci struct work_struct work; 458c2ecf20Sopenharmony_ci struct ib_mr *mr; 468c2ecf20Sopenharmony_ci}; 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_cistatic void rds_ib_odp_mr_worker(struct work_struct *work); 498c2ecf20Sopenharmony_ci 508c2ecf20Sopenharmony_cistatic struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) 518c2ecf20Sopenharmony_ci{ 528c2ecf20Sopenharmony_ci struct rds_ib_device *rds_ibdev; 538c2ecf20Sopenharmony_ci struct rds_ib_ipaddr *i_ipaddr; 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_ci rcu_read_lock(); 568c2ecf20Sopenharmony_ci list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { 578c2ecf20Sopenharmony_ci list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 588c2ecf20Sopenharmony_ci if (i_ipaddr->ipaddr == ipaddr) { 598c2ecf20Sopenharmony_ci refcount_inc(&rds_ibdev->refcount); 608c2ecf20Sopenharmony_ci rcu_read_unlock(); 618c2ecf20Sopenharmony_ci return rds_ibdev; 628c2ecf20Sopenharmony_ci } 638c2ecf20Sopenharmony_ci } 648c2ecf20Sopenharmony_ci } 658c2ecf20Sopenharmony_ci rcu_read_unlock(); 668c2ecf20Sopenharmony_ci 678c2ecf20Sopenharmony_ci return NULL; 688c2ecf20Sopenharmony_ci} 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_cistatic int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 718c2ecf20Sopenharmony_ci{ 728c2ecf20Sopenharmony_ci struct rds_ib_ipaddr *i_ipaddr; 738c2ecf20Sopenharmony_ci 748c2ecf20Sopenharmony_ci i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL); 758c2ecf20Sopenharmony_ci if (!i_ipaddr) 768c2ecf20Sopenharmony_ci return -ENOMEM; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci i_ipaddr->ipaddr = ipaddr; 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci spin_lock_irq(&rds_ibdev->spinlock); 818c2ecf20Sopenharmony_ci list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list); 828c2ecf20Sopenharmony_ci spin_unlock_irq(&rds_ibdev->spinlock); 838c2ecf20Sopenharmony_ci 848c2ecf20Sopenharmony_ci return 0; 858c2ecf20Sopenharmony_ci} 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_cistatic void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 888c2ecf20Sopenharmony_ci{ 898c2ecf20Sopenharmony_ci struct rds_ib_ipaddr *i_ipaddr; 908c2ecf20Sopenharmony_ci struct rds_ib_ipaddr *to_free = NULL; 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci spin_lock_irq(&rds_ibdev->spinlock); 948c2ecf20Sopenharmony_ci list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 958c2ecf20Sopenharmony_ci if (i_ipaddr->ipaddr == ipaddr) { 968c2ecf20Sopenharmony_ci list_del_rcu(&i_ipaddr->list); 978c2ecf20Sopenharmony_ci to_free = i_ipaddr; 988c2ecf20Sopenharmony_ci break; 998c2ecf20Sopenharmony_ci } 1008c2ecf20Sopenharmony_ci } 1018c2ecf20Sopenharmony_ci spin_unlock_irq(&rds_ibdev->spinlock); 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci if (to_free) 1048c2ecf20Sopenharmony_ci kfree_rcu(to_free, rcu); 1058c2ecf20Sopenharmony_ci} 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ciint rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, 1088c2ecf20Sopenharmony_ci struct in6_addr *ipaddr) 1098c2ecf20Sopenharmony_ci{ 1108c2ecf20Sopenharmony_ci struct rds_ib_device *rds_ibdev_old; 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]); 1138c2ecf20Sopenharmony_ci if (!rds_ibdev_old) 1148c2ecf20Sopenharmony_ci return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci if (rds_ibdev_old != rds_ibdev) { 1178c2ecf20Sopenharmony_ci rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]); 1188c2ecf20Sopenharmony_ci rds_ib_dev_put(rds_ibdev_old); 1198c2ecf20Sopenharmony_ci return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); 1208c2ecf20Sopenharmony_ci } 1218c2ecf20Sopenharmony_ci rds_ib_dev_put(rds_ibdev_old); 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_ci return 0; 1248c2ecf20Sopenharmony_ci} 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_civoid rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 1278c2ecf20Sopenharmony_ci{ 1288c2ecf20Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci /* conn was previously on the nodev_conns_list */ 1318c2ecf20Sopenharmony_ci spin_lock_irq(&ib_nodev_conns_lock); 1328c2ecf20Sopenharmony_ci BUG_ON(list_empty(&ib_nodev_conns)); 1338c2ecf20Sopenharmony_ci BUG_ON(list_empty(&ic->ib_node)); 1348c2ecf20Sopenharmony_ci list_del(&ic->ib_node); 1358c2ecf20Sopenharmony_ci 1368c2ecf20Sopenharmony_ci spin_lock(&rds_ibdev->spinlock); 1378c2ecf20Sopenharmony_ci list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 1388c2ecf20Sopenharmony_ci spin_unlock(&rds_ibdev->spinlock); 1398c2ecf20Sopenharmony_ci spin_unlock_irq(&ib_nodev_conns_lock); 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci ic->rds_ibdev = rds_ibdev; 1428c2ecf20Sopenharmony_ci refcount_inc(&rds_ibdev->refcount); 1438c2ecf20Sopenharmony_ci} 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_civoid rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 1468c2ecf20Sopenharmony_ci{ 1478c2ecf20Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 1488c2ecf20Sopenharmony_ci 1498c2ecf20Sopenharmony_ci /* place conn on nodev_conns_list */ 1508c2ecf20Sopenharmony_ci spin_lock(&ib_nodev_conns_lock); 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci spin_lock_irq(&rds_ibdev->spinlock); 1538c2ecf20Sopenharmony_ci BUG_ON(list_empty(&ic->ib_node)); 1548c2ecf20Sopenharmony_ci list_del(&ic->ib_node); 1558c2ecf20Sopenharmony_ci spin_unlock_irq(&rds_ibdev->spinlock); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci list_add_tail(&ic->ib_node, &ib_nodev_conns); 1588c2ecf20Sopenharmony_ci 1598c2ecf20Sopenharmony_ci spin_unlock(&ib_nodev_conns_lock); 1608c2ecf20Sopenharmony_ci 1618c2ecf20Sopenharmony_ci ic->rds_ibdev = NULL; 1628c2ecf20Sopenharmony_ci rds_ib_dev_put(rds_ibdev); 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_civoid rds_ib_destroy_nodev_conns(void) 1668c2ecf20Sopenharmony_ci{ 1678c2ecf20Sopenharmony_ci struct rds_ib_connection *ic, *_ic; 1688c2ecf20Sopenharmony_ci LIST_HEAD(tmp_list); 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci /* avoid calling conn_destroy with irqs off */ 1718c2ecf20Sopenharmony_ci spin_lock_irq(&ib_nodev_conns_lock); 1728c2ecf20Sopenharmony_ci list_splice(&ib_nodev_conns, &tmp_list); 1738c2ecf20Sopenharmony_ci spin_unlock_irq(&ib_nodev_conns_lock); 1748c2ecf20Sopenharmony_ci 1758c2ecf20Sopenharmony_ci list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) 1768c2ecf20Sopenharmony_ci rds_conn_destroy(ic->conn); 1778c2ecf20Sopenharmony_ci} 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_civoid rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) 1808c2ecf20Sopenharmony_ci{ 1818c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; 1828c2ecf20Sopenharmony_ci 1838c2ecf20Sopenharmony_ci iinfo->rdma_mr_max = pool_1m->max_items; 1848c2ecf20Sopenharmony_ci iinfo->rdma_mr_size = pool_1m->max_pages; 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 1888c2ecf20Sopenharmony_civoid rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, 1898c2ecf20Sopenharmony_ci struct rds6_info_rdma_connection *iinfo6) 1908c2ecf20Sopenharmony_ci{ 1918c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci iinfo6->rdma_mr_max = pool_1m->max_items; 1948c2ecf20Sopenharmony_ci iinfo6->rdma_mr_size = pool_1m->max_pages; 1958c2ecf20Sopenharmony_ci} 1968c2ecf20Sopenharmony_ci#endif 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_cistruct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) 1998c2ecf20Sopenharmony_ci{ 2008c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = NULL; 2018c2ecf20Sopenharmony_ci struct llist_node *ret; 2028c2ecf20Sopenharmony_ci unsigned long flags; 2038c2ecf20Sopenharmony_ci 2048c2ecf20Sopenharmony_ci spin_lock_irqsave(&pool->clean_lock, flags); 2058c2ecf20Sopenharmony_ci ret = llist_del_first(&pool->clean_list); 2068c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pool->clean_lock, flags); 2078c2ecf20Sopenharmony_ci if (ret) { 2088c2ecf20Sopenharmony_ci ibmr = llist_entry(ret, struct rds_ib_mr, llnode); 2098c2ecf20Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 2108c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_reused); 2118c2ecf20Sopenharmony_ci else 2128c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_reused); 2138c2ecf20Sopenharmony_ci } 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci return ibmr; 2168c2ecf20Sopenharmony_ci} 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_civoid rds_ib_sync_mr(void *trans_private, int direction) 2198c2ecf20Sopenharmony_ci{ 2208c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = trans_private; 2218c2ecf20Sopenharmony_ci struct rds_ib_device *rds_ibdev = ibmr->device; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci if (ibmr->odp) 2248c2ecf20Sopenharmony_ci return; 2258c2ecf20Sopenharmony_ci 2268c2ecf20Sopenharmony_ci switch (direction) { 2278c2ecf20Sopenharmony_ci case DMA_FROM_DEVICE: 2288c2ecf20Sopenharmony_ci ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg, 2298c2ecf20Sopenharmony_ci ibmr->sg_dma_len, DMA_BIDIRECTIONAL); 2308c2ecf20Sopenharmony_ci break; 2318c2ecf20Sopenharmony_ci case DMA_TO_DEVICE: 2328c2ecf20Sopenharmony_ci ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg, 2338c2ecf20Sopenharmony_ci ibmr->sg_dma_len, DMA_BIDIRECTIONAL); 2348c2ecf20Sopenharmony_ci break; 2358c2ecf20Sopenharmony_ci } 2368c2ecf20Sopenharmony_ci} 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_civoid __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) 2398c2ecf20Sopenharmony_ci{ 2408c2ecf20Sopenharmony_ci struct rds_ib_device *rds_ibdev = ibmr->device; 2418c2ecf20Sopenharmony_ci 2428c2ecf20Sopenharmony_ci if (ibmr->sg_dma_len) { 2438c2ecf20Sopenharmony_ci ib_dma_unmap_sg(rds_ibdev->dev, 2448c2ecf20Sopenharmony_ci ibmr->sg, ibmr->sg_len, 2458c2ecf20Sopenharmony_ci DMA_BIDIRECTIONAL); 2468c2ecf20Sopenharmony_ci ibmr->sg_dma_len = 0; 2478c2ecf20Sopenharmony_ci } 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_ci /* Release the s/g list */ 2508c2ecf20Sopenharmony_ci if (ibmr->sg_len) { 2518c2ecf20Sopenharmony_ci unsigned int i; 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci for (i = 0; i < ibmr->sg_len; ++i) { 2548c2ecf20Sopenharmony_ci struct page *page = sg_page(&ibmr->sg[i]); 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci /* FIXME we need a way to tell a r/w MR 2578c2ecf20Sopenharmony_ci * from a r/o MR */ 2588c2ecf20Sopenharmony_ci WARN_ON(!page->mapping && irqs_disabled()); 2598c2ecf20Sopenharmony_ci set_page_dirty(page); 2608c2ecf20Sopenharmony_ci put_page(page); 2618c2ecf20Sopenharmony_ci } 2628c2ecf20Sopenharmony_ci kfree(ibmr->sg); 2638c2ecf20Sopenharmony_ci 2648c2ecf20Sopenharmony_ci ibmr->sg = NULL; 2658c2ecf20Sopenharmony_ci ibmr->sg_len = 0; 2668c2ecf20Sopenharmony_ci } 2678c2ecf20Sopenharmony_ci} 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_civoid rds_ib_teardown_mr(struct rds_ib_mr *ibmr) 2708c2ecf20Sopenharmony_ci{ 2718c2ecf20Sopenharmony_ci unsigned int pinned = ibmr->sg_len; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci __rds_ib_teardown_mr(ibmr); 2748c2ecf20Sopenharmony_ci if (pinned) { 2758c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool = ibmr->pool; 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci atomic_sub(pinned, &pool->free_pinned); 2788c2ecf20Sopenharmony_ci } 2798c2ecf20Sopenharmony_ci} 2808c2ecf20Sopenharmony_ci 2818c2ecf20Sopenharmony_cistatic inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all) 2828c2ecf20Sopenharmony_ci{ 2838c2ecf20Sopenharmony_ci unsigned int item_count; 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci item_count = atomic_read(&pool->item_count); 2868c2ecf20Sopenharmony_ci if (free_all) 2878c2ecf20Sopenharmony_ci return item_count; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci return 0; 2908c2ecf20Sopenharmony_ci} 2918c2ecf20Sopenharmony_ci 2928c2ecf20Sopenharmony_ci/* 2938c2ecf20Sopenharmony_ci * given an llist of mrs, put them all into the list_head for more processing 2948c2ecf20Sopenharmony_ci */ 2958c2ecf20Sopenharmony_cistatic unsigned int llist_append_to_list(struct llist_head *llist, 2968c2ecf20Sopenharmony_ci struct list_head *list) 2978c2ecf20Sopenharmony_ci{ 2988c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr; 2998c2ecf20Sopenharmony_ci struct llist_node *node; 3008c2ecf20Sopenharmony_ci struct llist_node *next; 3018c2ecf20Sopenharmony_ci unsigned int count = 0; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci node = llist_del_all(llist); 3048c2ecf20Sopenharmony_ci while (node) { 3058c2ecf20Sopenharmony_ci next = node->next; 3068c2ecf20Sopenharmony_ci ibmr = llist_entry(node, struct rds_ib_mr, llnode); 3078c2ecf20Sopenharmony_ci list_add_tail(&ibmr->unmap_list, list); 3088c2ecf20Sopenharmony_ci node = next; 3098c2ecf20Sopenharmony_ci count++; 3108c2ecf20Sopenharmony_ci } 3118c2ecf20Sopenharmony_ci return count; 3128c2ecf20Sopenharmony_ci} 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci/* 3158c2ecf20Sopenharmony_ci * this takes a list head of mrs and turns it into linked llist nodes 3168c2ecf20Sopenharmony_ci * of clusters. Each cluster has linked llist nodes of 3178c2ecf20Sopenharmony_ci * MR_CLUSTER_SIZE mrs that are ready for reuse. 3188c2ecf20Sopenharmony_ci */ 3198c2ecf20Sopenharmony_cistatic void list_to_llist_nodes(struct list_head *list, 3208c2ecf20Sopenharmony_ci struct llist_node **nodes_head, 3218c2ecf20Sopenharmony_ci struct llist_node **nodes_tail) 3228c2ecf20Sopenharmony_ci{ 3238c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr; 3248c2ecf20Sopenharmony_ci struct llist_node *cur = NULL; 3258c2ecf20Sopenharmony_ci struct llist_node **next = nodes_head; 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci list_for_each_entry(ibmr, list, unmap_list) { 3288c2ecf20Sopenharmony_ci cur = &ibmr->llnode; 3298c2ecf20Sopenharmony_ci *next = cur; 3308c2ecf20Sopenharmony_ci next = &cur->next; 3318c2ecf20Sopenharmony_ci } 3328c2ecf20Sopenharmony_ci *next = NULL; 3338c2ecf20Sopenharmony_ci *nodes_tail = cur; 3348c2ecf20Sopenharmony_ci} 3358c2ecf20Sopenharmony_ci 3368c2ecf20Sopenharmony_ci/* 3378c2ecf20Sopenharmony_ci * Flush our pool of MRs. 3388c2ecf20Sopenharmony_ci * At a minimum, all currently unused MRs are unmapped. 3398c2ecf20Sopenharmony_ci * If the number of MRs allocated exceeds the limit, we also try 3408c2ecf20Sopenharmony_ci * to free as many MRs as needed to get back to this limit. 3418c2ecf20Sopenharmony_ci */ 3428c2ecf20Sopenharmony_ciint rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, 3438c2ecf20Sopenharmony_ci int free_all, struct rds_ib_mr **ibmr_ret) 3448c2ecf20Sopenharmony_ci{ 3458c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr; 3468c2ecf20Sopenharmony_ci struct llist_node *clean_nodes; 3478c2ecf20Sopenharmony_ci struct llist_node *clean_tail; 3488c2ecf20Sopenharmony_ci LIST_HEAD(unmap_list); 3498c2ecf20Sopenharmony_ci unsigned long unpinned = 0; 3508c2ecf20Sopenharmony_ci unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; 3518c2ecf20Sopenharmony_ci 3528c2ecf20Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 3538c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush); 3548c2ecf20Sopenharmony_ci else 3558c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush); 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci if (ibmr_ret) { 3588c2ecf20Sopenharmony_ci DEFINE_WAIT(wait); 3598c2ecf20Sopenharmony_ci while (!mutex_trylock(&pool->flush_lock)) { 3608c2ecf20Sopenharmony_ci ibmr = rds_ib_reuse_mr(pool); 3618c2ecf20Sopenharmony_ci if (ibmr) { 3628c2ecf20Sopenharmony_ci *ibmr_ret = ibmr; 3638c2ecf20Sopenharmony_ci finish_wait(&pool->flush_wait, &wait); 3648c2ecf20Sopenharmony_ci goto out_nolock; 3658c2ecf20Sopenharmony_ci } 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci prepare_to_wait(&pool->flush_wait, &wait, 3688c2ecf20Sopenharmony_ci TASK_UNINTERRUPTIBLE); 3698c2ecf20Sopenharmony_ci if (llist_empty(&pool->clean_list)) 3708c2ecf20Sopenharmony_ci schedule(); 3718c2ecf20Sopenharmony_ci 3728c2ecf20Sopenharmony_ci ibmr = rds_ib_reuse_mr(pool); 3738c2ecf20Sopenharmony_ci if (ibmr) { 3748c2ecf20Sopenharmony_ci *ibmr_ret = ibmr; 3758c2ecf20Sopenharmony_ci finish_wait(&pool->flush_wait, &wait); 3768c2ecf20Sopenharmony_ci goto out_nolock; 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci } 3798c2ecf20Sopenharmony_ci finish_wait(&pool->flush_wait, &wait); 3808c2ecf20Sopenharmony_ci } else 3818c2ecf20Sopenharmony_ci mutex_lock(&pool->flush_lock); 3828c2ecf20Sopenharmony_ci 3838c2ecf20Sopenharmony_ci if (ibmr_ret) { 3848c2ecf20Sopenharmony_ci ibmr = rds_ib_reuse_mr(pool); 3858c2ecf20Sopenharmony_ci if (ibmr) { 3868c2ecf20Sopenharmony_ci *ibmr_ret = ibmr; 3878c2ecf20Sopenharmony_ci goto out; 3888c2ecf20Sopenharmony_ci } 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci /* Get the list of all MRs to be dropped. Ordering matters - 3928c2ecf20Sopenharmony_ci * we want to put drop_list ahead of free_list. 3938c2ecf20Sopenharmony_ci */ 3948c2ecf20Sopenharmony_ci dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list); 3958c2ecf20Sopenharmony_ci dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list); 3968c2ecf20Sopenharmony_ci if (free_all) { 3978c2ecf20Sopenharmony_ci unsigned long flags; 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_ci spin_lock_irqsave(&pool->clean_lock, flags); 4008c2ecf20Sopenharmony_ci llist_append_to_list(&pool->clean_list, &unmap_list); 4018c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pool->clean_lock, flags); 4028c2ecf20Sopenharmony_ci } 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci free_goal = rds_ib_flush_goal(pool, free_all); 4058c2ecf20Sopenharmony_ci 4068c2ecf20Sopenharmony_ci if (list_empty(&unmap_list)) 4078c2ecf20Sopenharmony_ci goto out; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); 4108c2ecf20Sopenharmony_ci 4118c2ecf20Sopenharmony_ci if (!list_empty(&unmap_list)) { 4128c2ecf20Sopenharmony_ci unsigned long flags; 4138c2ecf20Sopenharmony_ci 4148c2ecf20Sopenharmony_ci list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail); 4158c2ecf20Sopenharmony_ci if (ibmr_ret) { 4168c2ecf20Sopenharmony_ci *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); 4178c2ecf20Sopenharmony_ci clean_nodes = clean_nodes->next; 4188c2ecf20Sopenharmony_ci } 4198c2ecf20Sopenharmony_ci /* more than one entry in llist nodes */ 4208c2ecf20Sopenharmony_ci if (clean_nodes) { 4218c2ecf20Sopenharmony_ci spin_lock_irqsave(&pool->clean_lock, flags); 4228c2ecf20Sopenharmony_ci llist_add_batch(clean_nodes, clean_tail, 4238c2ecf20Sopenharmony_ci &pool->clean_list); 4248c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&pool->clean_lock, flags); 4258c2ecf20Sopenharmony_ci } 4268c2ecf20Sopenharmony_ci } 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci atomic_sub(unpinned, &pool->free_pinned); 4298c2ecf20Sopenharmony_ci atomic_sub(dirty_to_clean, &pool->dirty_count); 4308c2ecf20Sopenharmony_ci atomic_sub(nfreed, &pool->item_count); 4318c2ecf20Sopenharmony_ci 4328c2ecf20Sopenharmony_ciout: 4338c2ecf20Sopenharmony_ci mutex_unlock(&pool->flush_lock); 4348c2ecf20Sopenharmony_ci if (waitqueue_active(&pool->flush_wait)) 4358c2ecf20Sopenharmony_ci wake_up(&pool->flush_wait); 4368c2ecf20Sopenharmony_ciout_nolock: 4378c2ecf20Sopenharmony_ci return 0; 4388c2ecf20Sopenharmony_ci} 4398c2ecf20Sopenharmony_ci 4408c2ecf20Sopenharmony_cistruct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) 4418c2ecf20Sopenharmony_ci{ 4428c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = NULL; 4438c2ecf20Sopenharmony_ci int iter = 0; 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci while (1) { 4468c2ecf20Sopenharmony_ci ibmr = rds_ib_reuse_mr(pool); 4478c2ecf20Sopenharmony_ci if (ibmr) 4488c2ecf20Sopenharmony_ci return ibmr; 4498c2ecf20Sopenharmony_ci 4508c2ecf20Sopenharmony_ci if (atomic_inc_return(&pool->item_count) <= pool->max_items) 4518c2ecf20Sopenharmony_ci break; 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci atomic_dec(&pool->item_count); 4548c2ecf20Sopenharmony_ci 4558c2ecf20Sopenharmony_ci if (++iter > 2) { 4568c2ecf20Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 4578c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted); 4588c2ecf20Sopenharmony_ci else 4598c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted); 4608c2ecf20Sopenharmony_ci break; 4618c2ecf20Sopenharmony_ci } 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_ci /* We do have some empty MRs. Flush them out. */ 4648c2ecf20Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 4658c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait); 4668c2ecf20Sopenharmony_ci else 4678c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait); 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci rds_ib_flush_mr_pool(pool, 0, &ibmr); 4708c2ecf20Sopenharmony_ci if (ibmr) 4718c2ecf20Sopenharmony_ci return ibmr; 4728c2ecf20Sopenharmony_ci } 4738c2ecf20Sopenharmony_ci 4748c2ecf20Sopenharmony_ci return NULL; 4758c2ecf20Sopenharmony_ci} 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_cistatic void rds_ib_mr_pool_flush_worker(struct work_struct *work) 4788c2ecf20Sopenharmony_ci{ 4798c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci rds_ib_flush_mr_pool(pool, 0, NULL); 4828c2ecf20Sopenharmony_ci} 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_civoid rds_ib_free_mr(void *trans_private, int invalidate) 4858c2ecf20Sopenharmony_ci{ 4868c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = trans_private; 4878c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool = ibmr->pool; 4888c2ecf20Sopenharmony_ci struct rds_ib_device *rds_ibdev = ibmr->device; 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci if (ibmr->odp) { 4938c2ecf20Sopenharmony_ci /* A MR created and marked as use_once. We use delayed work, 4948c2ecf20Sopenharmony_ci * because there is a change that we are in interrupt and can't 4958c2ecf20Sopenharmony_ci * call to ib_dereg_mr() directly. 4968c2ecf20Sopenharmony_ci */ 4978c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&ibmr->work, rds_ib_odp_mr_worker); 4988c2ecf20Sopenharmony_ci queue_delayed_work(rds_ib_mr_wq, &ibmr->work, 0); 4998c2ecf20Sopenharmony_ci return; 5008c2ecf20Sopenharmony_ci } 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci /* Return it to the pool's free list */ 5038c2ecf20Sopenharmony_ci rds_ib_free_frmr_list(ibmr); 5048c2ecf20Sopenharmony_ci 5058c2ecf20Sopenharmony_ci atomic_add(ibmr->sg_len, &pool->free_pinned); 5068c2ecf20Sopenharmony_ci atomic_inc(&pool->dirty_count); 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_ci /* If we've pinned too many pages, request a flush */ 5098c2ecf20Sopenharmony_ci if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 5108c2ecf20Sopenharmony_ci atomic_read(&pool->dirty_count) >= pool->max_items / 5) 5118c2ecf20Sopenharmony_ci queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); 5128c2ecf20Sopenharmony_ci 5138c2ecf20Sopenharmony_ci if (invalidate) { 5148c2ecf20Sopenharmony_ci if (likely(!in_interrupt())) { 5158c2ecf20Sopenharmony_ci rds_ib_flush_mr_pool(pool, 0, NULL); 5168c2ecf20Sopenharmony_ci } else { 5178c2ecf20Sopenharmony_ci /* We get here if the user created a MR marked 5188c2ecf20Sopenharmony_ci * as use_once and invalidate at the same time. 5198c2ecf20Sopenharmony_ci */ 5208c2ecf20Sopenharmony_ci queue_delayed_work(rds_ib_mr_wq, 5218c2ecf20Sopenharmony_ci &pool->flush_worker, 10); 5228c2ecf20Sopenharmony_ci } 5238c2ecf20Sopenharmony_ci } 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci rds_ib_dev_put(rds_ibdev); 5268c2ecf20Sopenharmony_ci} 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_civoid rds_ib_flush_mrs(void) 5298c2ecf20Sopenharmony_ci{ 5308c2ecf20Sopenharmony_ci struct rds_ib_device *rds_ibdev; 5318c2ecf20Sopenharmony_ci 5328c2ecf20Sopenharmony_ci down_read(&rds_ib_devices_lock); 5338c2ecf20Sopenharmony_ci list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 5348c2ecf20Sopenharmony_ci if (rds_ibdev->mr_8k_pool) 5358c2ecf20Sopenharmony_ci rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL); 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci if (rds_ibdev->mr_1m_pool) 5388c2ecf20Sopenharmony_ci rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL); 5398c2ecf20Sopenharmony_ci } 5408c2ecf20Sopenharmony_ci up_read(&rds_ib_devices_lock); 5418c2ecf20Sopenharmony_ci} 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ciu32 rds_ib_get_lkey(void *trans_private) 5448c2ecf20Sopenharmony_ci{ 5458c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = trans_private; 5468c2ecf20Sopenharmony_ci 5478c2ecf20Sopenharmony_ci return ibmr->u.mr->lkey; 5488c2ecf20Sopenharmony_ci} 5498c2ecf20Sopenharmony_ci 5508c2ecf20Sopenharmony_civoid *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 5518c2ecf20Sopenharmony_ci struct rds_sock *rs, u32 *key_ret, 5528c2ecf20Sopenharmony_ci struct rds_connection *conn, 5538c2ecf20Sopenharmony_ci u64 start, u64 length, int need_odp) 5548c2ecf20Sopenharmony_ci{ 5558c2ecf20Sopenharmony_ci struct rds_ib_device *rds_ibdev; 5568c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = NULL; 5578c2ecf20Sopenharmony_ci struct rds_ib_connection *ic = NULL; 5588c2ecf20Sopenharmony_ci int ret; 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]); 5618c2ecf20Sopenharmony_ci if (!rds_ibdev) { 5628c2ecf20Sopenharmony_ci ret = -ENODEV; 5638c2ecf20Sopenharmony_ci goto out; 5648c2ecf20Sopenharmony_ci } 5658c2ecf20Sopenharmony_ci 5668c2ecf20Sopenharmony_ci if (need_odp == ODP_ZEROBASED || need_odp == ODP_VIRTUAL) { 5678c2ecf20Sopenharmony_ci u64 virt_addr = need_odp == ODP_ZEROBASED ? 0 : start; 5688c2ecf20Sopenharmony_ci int access_flags = 5698c2ecf20Sopenharmony_ci (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | 5708c2ecf20Sopenharmony_ci IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC | 5718c2ecf20Sopenharmony_ci IB_ACCESS_ON_DEMAND); 5728c2ecf20Sopenharmony_ci struct ib_sge sge = {}; 5738c2ecf20Sopenharmony_ci struct ib_mr *ib_mr; 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci if (!rds_ibdev->odp_capable) { 5768c2ecf20Sopenharmony_ci ret = -EOPNOTSUPP; 5778c2ecf20Sopenharmony_ci goto out; 5788c2ecf20Sopenharmony_ci } 5798c2ecf20Sopenharmony_ci 5808c2ecf20Sopenharmony_ci ib_mr = ib_reg_user_mr(rds_ibdev->pd, start, length, virt_addr, 5818c2ecf20Sopenharmony_ci access_flags); 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_ci if (IS_ERR(ib_mr)) { 5848c2ecf20Sopenharmony_ci rdsdebug("rds_ib_get_user_mr returned %d\n", 5858c2ecf20Sopenharmony_ci IS_ERR(ib_mr)); 5868c2ecf20Sopenharmony_ci ret = PTR_ERR(ib_mr); 5878c2ecf20Sopenharmony_ci goto out; 5888c2ecf20Sopenharmony_ci } 5898c2ecf20Sopenharmony_ci if (key_ret) 5908c2ecf20Sopenharmony_ci *key_ret = ib_mr->rkey; 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_ci ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); 5938c2ecf20Sopenharmony_ci if (!ibmr) { 5948c2ecf20Sopenharmony_ci ib_dereg_mr(ib_mr); 5958c2ecf20Sopenharmony_ci ret = -ENOMEM; 5968c2ecf20Sopenharmony_ci goto out; 5978c2ecf20Sopenharmony_ci } 5988c2ecf20Sopenharmony_ci ibmr->u.mr = ib_mr; 5998c2ecf20Sopenharmony_ci ibmr->odp = 1; 6008c2ecf20Sopenharmony_ci 6018c2ecf20Sopenharmony_ci sge.addr = virt_addr; 6028c2ecf20Sopenharmony_ci sge.length = length; 6038c2ecf20Sopenharmony_ci sge.lkey = ib_mr->lkey; 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci ib_advise_mr(rds_ibdev->pd, 6068c2ecf20Sopenharmony_ci IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, 6078c2ecf20Sopenharmony_ci IB_UVERBS_ADVISE_MR_FLAG_FLUSH, &sge, 1); 6088c2ecf20Sopenharmony_ci return ibmr; 6098c2ecf20Sopenharmony_ci } 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci if (conn) 6128c2ecf20Sopenharmony_ci ic = conn->c_transport_data; 6138c2ecf20Sopenharmony_ci 6148c2ecf20Sopenharmony_ci if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { 6158c2ecf20Sopenharmony_ci ret = -ENODEV; 6168c2ecf20Sopenharmony_ci goto out; 6178c2ecf20Sopenharmony_ci } 6188c2ecf20Sopenharmony_ci 6198c2ecf20Sopenharmony_ci ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); 6208c2ecf20Sopenharmony_ci if (IS_ERR(ibmr)) { 6218c2ecf20Sopenharmony_ci ret = PTR_ERR(ibmr); 6228c2ecf20Sopenharmony_ci pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); 6238c2ecf20Sopenharmony_ci } else { 6248c2ecf20Sopenharmony_ci return ibmr; 6258c2ecf20Sopenharmony_ci } 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci out: 6288c2ecf20Sopenharmony_ci if (rds_ibdev) 6298c2ecf20Sopenharmony_ci rds_ib_dev_put(rds_ibdev); 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci return ERR_PTR(ret); 6328c2ecf20Sopenharmony_ci} 6338c2ecf20Sopenharmony_ci 6348c2ecf20Sopenharmony_civoid rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 6358c2ecf20Sopenharmony_ci{ 6368c2ecf20Sopenharmony_ci cancel_delayed_work_sync(&pool->flush_worker); 6378c2ecf20Sopenharmony_ci rds_ib_flush_mr_pool(pool, 1, NULL); 6388c2ecf20Sopenharmony_ci WARN_ON(atomic_read(&pool->item_count)); 6398c2ecf20Sopenharmony_ci WARN_ON(atomic_read(&pool->free_pinned)); 6408c2ecf20Sopenharmony_ci kfree(pool); 6418c2ecf20Sopenharmony_ci} 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_cistruct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, 6448c2ecf20Sopenharmony_ci int pool_type) 6458c2ecf20Sopenharmony_ci{ 6468c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool; 6478c2ecf20Sopenharmony_ci 6488c2ecf20Sopenharmony_ci pool = kzalloc(sizeof(*pool), GFP_KERNEL); 6498c2ecf20Sopenharmony_ci if (!pool) 6508c2ecf20Sopenharmony_ci return ERR_PTR(-ENOMEM); 6518c2ecf20Sopenharmony_ci 6528c2ecf20Sopenharmony_ci pool->pool_type = pool_type; 6538c2ecf20Sopenharmony_ci init_llist_head(&pool->free_list); 6548c2ecf20Sopenharmony_ci init_llist_head(&pool->drop_list); 6558c2ecf20Sopenharmony_ci init_llist_head(&pool->clean_list); 6568c2ecf20Sopenharmony_ci spin_lock_init(&pool->clean_lock); 6578c2ecf20Sopenharmony_ci mutex_init(&pool->flush_lock); 6588c2ecf20Sopenharmony_ci init_waitqueue_head(&pool->flush_wait); 6598c2ecf20Sopenharmony_ci INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 6608c2ecf20Sopenharmony_ci 6618c2ecf20Sopenharmony_ci if (pool_type == RDS_IB_MR_1M_POOL) { 6628c2ecf20Sopenharmony_ci /* +1 allows for unaligned MRs */ 6638c2ecf20Sopenharmony_ci pool->max_pages = RDS_MR_1M_MSG_SIZE + 1; 6648c2ecf20Sopenharmony_ci pool->max_items = rds_ibdev->max_1m_mrs; 6658c2ecf20Sopenharmony_ci } else { 6668c2ecf20Sopenharmony_ci /* pool_type == RDS_IB_MR_8K_POOL */ 6678c2ecf20Sopenharmony_ci pool->max_pages = RDS_MR_8K_MSG_SIZE + 1; 6688c2ecf20Sopenharmony_ci pool->max_items = rds_ibdev->max_8k_mrs; 6698c2ecf20Sopenharmony_ci } 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci pool->max_free_pinned = pool->max_items * pool->max_pages / 4; 6728c2ecf20Sopenharmony_ci pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4; 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci return pool; 6758c2ecf20Sopenharmony_ci} 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ciint rds_ib_mr_init(void) 6788c2ecf20Sopenharmony_ci{ 6798c2ecf20Sopenharmony_ci rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM, 0); 6808c2ecf20Sopenharmony_ci if (!rds_ib_mr_wq) 6818c2ecf20Sopenharmony_ci return -ENOMEM; 6828c2ecf20Sopenharmony_ci return 0; 6838c2ecf20Sopenharmony_ci} 6848c2ecf20Sopenharmony_ci 6858c2ecf20Sopenharmony_ci/* By the time this is called all the IB devices should have been torn down and 6868c2ecf20Sopenharmony_ci * had their pools freed. As each pool is freed its work struct is waited on, 6878c2ecf20Sopenharmony_ci * so the pool flushing work queue should be idle by the time we get here. 6888c2ecf20Sopenharmony_ci */ 6898c2ecf20Sopenharmony_civoid rds_ib_mr_exit(void) 6908c2ecf20Sopenharmony_ci{ 6918c2ecf20Sopenharmony_ci destroy_workqueue(rds_ib_mr_wq); 6928c2ecf20Sopenharmony_ci} 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_cistatic void rds_ib_odp_mr_worker(struct work_struct *work) 6958c2ecf20Sopenharmony_ci{ 6968c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr; 6978c2ecf20Sopenharmony_ci 6988c2ecf20Sopenharmony_ci ibmr = container_of(work, struct rds_ib_mr, work.work); 6998c2ecf20Sopenharmony_ci ib_dereg_mr(ibmr->u.mr); 7008c2ecf20Sopenharmony_ci kfree(ibmr); 7018c2ecf20Sopenharmony_ci} 702