162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) 2006, 2018 Oracle and/or its affiliates. All rights reserved. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * This software is available to you under a choice of one of two 562306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the 862306a36Sopenharmony_ci * OpenIB.org BSD license below: 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or 1162306a36Sopenharmony_ci * without modification, are permitted provided that the following 1262306a36Sopenharmony_ci * conditions are met: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * - Redistributions of source code must retain the above 1562306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 1662306a36Sopenharmony_ci * disclaimer. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * - Redistributions in binary form must reproduce the above 1962306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2062306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials 2162306a36Sopenharmony_ci * provided with the distribution. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 3062306a36Sopenharmony_ci * SOFTWARE. 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci#include <linux/kernel.h> 3462306a36Sopenharmony_ci#include <linux/slab.h> 3562306a36Sopenharmony_ci#include <linux/rculist.h> 3662306a36Sopenharmony_ci#include <linux/llist.h> 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci#include "rds_single_path.h" 3962306a36Sopenharmony_ci#include "ib_mr.h" 4062306a36Sopenharmony_ci#include "rds.h" 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_cistruct workqueue_struct *rds_ib_mr_wq; 4362306a36Sopenharmony_cistruct rds_ib_dereg_odp_mr { 4462306a36Sopenharmony_ci struct work_struct work; 4562306a36Sopenharmony_ci struct ib_mr *mr; 4662306a36Sopenharmony_ci}; 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_cistatic void rds_ib_odp_mr_worker(struct work_struct *work); 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_cistatic struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) 5162306a36Sopenharmony_ci{ 5262306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev; 5362306a36Sopenharmony_ci struct rds_ib_ipaddr *i_ipaddr; 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_ci rcu_read_lock(); 5662306a36Sopenharmony_ci list_for_each_entry_rcu(rds_ibdev, &rds_ib_devices, list) { 5762306a36Sopenharmony_ci list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 5862306a36Sopenharmony_ci if (i_ipaddr->ipaddr == ipaddr) { 5962306a36Sopenharmony_ci refcount_inc(&rds_ibdev->refcount); 6062306a36Sopenharmony_ci rcu_read_unlock(); 6162306a36Sopenharmony_ci return rds_ibdev; 6262306a36Sopenharmony_ci } 6362306a36Sopenharmony_ci } 6462306a36Sopenharmony_ci } 6562306a36Sopenharmony_ci rcu_read_unlock(); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci return NULL; 6862306a36Sopenharmony_ci} 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_cistatic int rds_ib_add_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 7162306a36Sopenharmony_ci{ 7262306a36Sopenharmony_ci struct rds_ib_ipaddr *i_ipaddr; 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci i_ipaddr = kmalloc(sizeof *i_ipaddr, GFP_KERNEL); 7562306a36Sopenharmony_ci if (!i_ipaddr) 7662306a36Sopenharmony_ci return -ENOMEM; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci i_ipaddr->ipaddr = ipaddr; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci spin_lock_irq(&rds_ibdev->spinlock); 8162306a36Sopenharmony_ci list_add_tail_rcu(&i_ipaddr->list, &rds_ibdev->ipaddr_list); 8262306a36Sopenharmony_ci spin_unlock_irq(&rds_ibdev->spinlock); 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci return 0; 8562306a36Sopenharmony_ci} 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_cistatic void rds_ib_remove_ipaddr(struct rds_ib_device *rds_ibdev, __be32 ipaddr) 8862306a36Sopenharmony_ci{ 8962306a36Sopenharmony_ci struct rds_ib_ipaddr *i_ipaddr; 9062306a36Sopenharmony_ci struct rds_ib_ipaddr *to_free = NULL; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci spin_lock_irq(&rds_ibdev->spinlock); 9462306a36Sopenharmony_ci list_for_each_entry_rcu(i_ipaddr, &rds_ibdev->ipaddr_list, list) { 9562306a36Sopenharmony_ci if (i_ipaddr->ipaddr == ipaddr) { 9662306a36Sopenharmony_ci list_del_rcu(&i_ipaddr->list); 9762306a36Sopenharmony_ci to_free = i_ipaddr; 9862306a36Sopenharmony_ci break; 9962306a36Sopenharmony_ci } 10062306a36Sopenharmony_ci } 10162306a36Sopenharmony_ci spin_unlock_irq(&rds_ibdev->spinlock); 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci if (to_free) 10462306a36Sopenharmony_ci kfree_rcu(to_free, rcu); 10562306a36Sopenharmony_ci} 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ciint rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, 10862306a36Sopenharmony_ci struct in6_addr *ipaddr) 10962306a36Sopenharmony_ci{ 11062306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev_old; 11162306a36Sopenharmony_ci 11262306a36Sopenharmony_ci rds_ibdev_old = rds_ib_get_device(ipaddr->s6_addr32[3]); 11362306a36Sopenharmony_ci if (!rds_ibdev_old) 11462306a36Sopenharmony_ci return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci if (rds_ibdev_old != rds_ibdev) { 11762306a36Sopenharmony_ci rds_ib_remove_ipaddr(rds_ibdev_old, ipaddr->s6_addr32[3]); 11862306a36Sopenharmony_ci rds_ib_dev_put(rds_ibdev_old); 11962306a36Sopenharmony_ci return rds_ib_add_ipaddr(rds_ibdev, ipaddr->s6_addr32[3]); 12062306a36Sopenharmony_ci } 12162306a36Sopenharmony_ci rds_ib_dev_put(rds_ibdev_old); 12262306a36Sopenharmony_ci 12362306a36Sopenharmony_ci return 0; 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_civoid rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci /* conn was previously on the nodev_conns_list */ 13162306a36Sopenharmony_ci spin_lock_irq(&ib_nodev_conns_lock); 13262306a36Sopenharmony_ci BUG_ON(list_empty(&ib_nodev_conns)); 13362306a36Sopenharmony_ci BUG_ON(list_empty(&ic->ib_node)); 13462306a36Sopenharmony_ci list_del(&ic->ib_node); 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_ci spin_lock(&rds_ibdev->spinlock); 13762306a36Sopenharmony_ci list_add_tail(&ic->ib_node, &rds_ibdev->conn_list); 13862306a36Sopenharmony_ci spin_unlock(&rds_ibdev->spinlock); 13962306a36Sopenharmony_ci spin_unlock_irq(&ib_nodev_conns_lock); 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci ic->rds_ibdev = rds_ibdev; 14262306a36Sopenharmony_ci refcount_inc(&rds_ibdev->refcount); 14362306a36Sopenharmony_ci} 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_civoid rds_ib_remove_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn) 14662306a36Sopenharmony_ci{ 14762306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci /* place conn on nodev_conns_list */ 15062306a36Sopenharmony_ci spin_lock(&ib_nodev_conns_lock); 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_ci spin_lock_irq(&rds_ibdev->spinlock); 15362306a36Sopenharmony_ci BUG_ON(list_empty(&ic->ib_node)); 15462306a36Sopenharmony_ci list_del(&ic->ib_node); 15562306a36Sopenharmony_ci spin_unlock_irq(&rds_ibdev->spinlock); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci list_add_tail(&ic->ib_node, &ib_nodev_conns); 15862306a36Sopenharmony_ci 15962306a36Sopenharmony_ci spin_unlock(&ib_nodev_conns_lock); 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci ic->rds_ibdev = NULL; 16262306a36Sopenharmony_ci rds_ib_dev_put(rds_ibdev); 16362306a36Sopenharmony_ci} 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_civoid rds_ib_destroy_nodev_conns(void) 16662306a36Sopenharmony_ci{ 16762306a36Sopenharmony_ci struct rds_ib_connection *ic, *_ic; 16862306a36Sopenharmony_ci LIST_HEAD(tmp_list); 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci /* avoid calling conn_destroy with irqs off */ 17162306a36Sopenharmony_ci spin_lock_irq(&ib_nodev_conns_lock); 17262306a36Sopenharmony_ci list_splice(&ib_nodev_conns, &tmp_list); 17362306a36Sopenharmony_ci spin_unlock_irq(&ib_nodev_conns_lock); 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci list_for_each_entry_safe(ic, _ic, &tmp_list, ib_node) 17662306a36Sopenharmony_ci rds_conn_destroy(ic->conn); 17762306a36Sopenharmony_ci} 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_civoid rds_ib_get_mr_info(struct rds_ib_device *rds_ibdev, struct rds_info_rdma_connection *iinfo) 18062306a36Sopenharmony_ci{ 18162306a36Sopenharmony_ci struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci iinfo->rdma_mr_max = pool_1m->max_items; 18462306a36Sopenharmony_ci iinfo->rdma_mr_size = pool_1m->max_pages; 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6) 18862306a36Sopenharmony_civoid rds6_ib_get_mr_info(struct rds_ib_device *rds_ibdev, 18962306a36Sopenharmony_ci struct rds6_info_rdma_connection *iinfo6) 19062306a36Sopenharmony_ci{ 19162306a36Sopenharmony_ci struct rds_ib_mr_pool *pool_1m = rds_ibdev->mr_1m_pool; 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_ci iinfo6->rdma_mr_max = pool_1m->max_items; 19462306a36Sopenharmony_ci iinfo6->rdma_mr_size = pool_1m->max_pages; 19562306a36Sopenharmony_ci} 19662306a36Sopenharmony_ci#endif 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_cistruct rds_ib_mr *rds_ib_reuse_mr(struct rds_ib_mr_pool *pool) 19962306a36Sopenharmony_ci{ 20062306a36Sopenharmony_ci struct rds_ib_mr *ibmr = NULL; 20162306a36Sopenharmony_ci struct llist_node *ret; 20262306a36Sopenharmony_ci unsigned long flags; 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci spin_lock_irqsave(&pool->clean_lock, flags); 20562306a36Sopenharmony_ci ret = llist_del_first(&pool->clean_list); 20662306a36Sopenharmony_ci spin_unlock_irqrestore(&pool->clean_lock, flags); 20762306a36Sopenharmony_ci if (ret) { 20862306a36Sopenharmony_ci ibmr = llist_entry(ret, struct rds_ib_mr, llnode); 20962306a36Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 21062306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_reused); 21162306a36Sopenharmony_ci else 21262306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_reused); 21362306a36Sopenharmony_ci } 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci return ibmr; 21662306a36Sopenharmony_ci} 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_civoid rds_ib_sync_mr(void *trans_private, int direction) 21962306a36Sopenharmony_ci{ 22062306a36Sopenharmony_ci struct rds_ib_mr *ibmr = trans_private; 22162306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev = ibmr->device; 22262306a36Sopenharmony_ci 22362306a36Sopenharmony_ci if (ibmr->odp) 22462306a36Sopenharmony_ci return; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci switch (direction) { 22762306a36Sopenharmony_ci case DMA_FROM_DEVICE: 22862306a36Sopenharmony_ci ib_dma_sync_sg_for_cpu(rds_ibdev->dev, ibmr->sg, 22962306a36Sopenharmony_ci ibmr->sg_dma_len, DMA_BIDIRECTIONAL); 23062306a36Sopenharmony_ci break; 23162306a36Sopenharmony_ci case DMA_TO_DEVICE: 23262306a36Sopenharmony_ci ib_dma_sync_sg_for_device(rds_ibdev->dev, ibmr->sg, 23362306a36Sopenharmony_ci ibmr->sg_dma_len, DMA_BIDIRECTIONAL); 23462306a36Sopenharmony_ci break; 23562306a36Sopenharmony_ci } 23662306a36Sopenharmony_ci} 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_civoid __rds_ib_teardown_mr(struct rds_ib_mr *ibmr) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev = ibmr->device; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci if (ibmr->sg_dma_len) { 24362306a36Sopenharmony_ci ib_dma_unmap_sg(rds_ibdev->dev, 24462306a36Sopenharmony_ci ibmr->sg, ibmr->sg_len, 24562306a36Sopenharmony_ci DMA_BIDIRECTIONAL); 24662306a36Sopenharmony_ci ibmr->sg_dma_len = 0; 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci /* Release the s/g list */ 25062306a36Sopenharmony_ci if (ibmr->sg_len) { 25162306a36Sopenharmony_ci unsigned int i; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci for (i = 0; i < ibmr->sg_len; ++i) { 25462306a36Sopenharmony_ci struct page *page = sg_page(&ibmr->sg[i]); 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci /* FIXME we need a way to tell a r/w MR 25762306a36Sopenharmony_ci * from a r/o MR */ 25862306a36Sopenharmony_ci WARN_ON(!page->mapping && irqs_disabled()); 25962306a36Sopenharmony_ci set_page_dirty(page); 26062306a36Sopenharmony_ci put_page(page); 26162306a36Sopenharmony_ci } 26262306a36Sopenharmony_ci kfree(ibmr->sg); 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci ibmr->sg = NULL; 26562306a36Sopenharmony_ci ibmr->sg_len = 0; 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_civoid rds_ib_teardown_mr(struct rds_ib_mr *ibmr) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci unsigned int pinned = ibmr->sg_len; 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci __rds_ib_teardown_mr(ibmr); 27462306a36Sopenharmony_ci if (pinned) { 27562306a36Sopenharmony_ci struct rds_ib_mr_pool *pool = ibmr->pool; 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci atomic_sub(pinned, &pool->free_pinned); 27862306a36Sopenharmony_ci } 27962306a36Sopenharmony_ci} 28062306a36Sopenharmony_ci 28162306a36Sopenharmony_cistatic inline unsigned int rds_ib_flush_goal(struct rds_ib_mr_pool *pool, int free_all) 28262306a36Sopenharmony_ci{ 28362306a36Sopenharmony_ci unsigned int item_count; 28462306a36Sopenharmony_ci 28562306a36Sopenharmony_ci item_count = atomic_read(&pool->item_count); 28662306a36Sopenharmony_ci if (free_all) 28762306a36Sopenharmony_ci return item_count; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci return 0; 29062306a36Sopenharmony_ci} 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci/* 29362306a36Sopenharmony_ci * given an llist of mrs, put them all into the list_head for more processing 29462306a36Sopenharmony_ci */ 29562306a36Sopenharmony_cistatic unsigned int llist_append_to_list(struct llist_head *llist, 29662306a36Sopenharmony_ci struct list_head *list) 29762306a36Sopenharmony_ci{ 29862306a36Sopenharmony_ci struct rds_ib_mr *ibmr; 29962306a36Sopenharmony_ci struct llist_node *node; 30062306a36Sopenharmony_ci struct llist_node *next; 30162306a36Sopenharmony_ci unsigned int count = 0; 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci node = llist_del_all(llist); 30462306a36Sopenharmony_ci while (node) { 30562306a36Sopenharmony_ci next = node->next; 30662306a36Sopenharmony_ci ibmr = llist_entry(node, struct rds_ib_mr, llnode); 30762306a36Sopenharmony_ci list_add_tail(&ibmr->unmap_list, list); 30862306a36Sopenharmony_ci node = next; 30962306a36Sopenharmony_ci count++; 31062306a36Sopenharmony_ci } 31162306a36Sopenharmony_ci return count; 31262306a36Sopenharmony_ci} 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci/* 31562306a36Sopenharmony_ci * this takes a list head of mrs and turns it into linked llist nodes 31662306a36Sopenharmony_ci * of clusters. Each cluster has linked llist nodes of 31762306a36Sopenharmony_ci * MR_CLUSTER_SIZE mrs that are ready for reuse. 31862306a36Sopenharmony_ci */ 31962306a36Sopenharmony_cistatic void list_to_llist_nodes(struct list_head *list, 32062306a36Sopenharmony_ci struct llist_node **nodes_head, 32162306a36Sopenharmony_ci struct llist_node **nodes_tail) 32262306a36Sopenharmony_ci{ 32362306a36Sopenharmony_ci struct rds_ib_mr *ibmr; 32462306a36Sopenharmony_ci struct llist_node *cur = NULL; 32562306a36Sopenharmony_ci struct llist_node **next = nodes_head; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci list_for_each_entry(ibmr, list, unmap_list) { 32862306a36Sopenharmony_ci cur = &ibmr->llnode; 32962306a36Sopenharmony_ci *next = cur; 33062306a36Sopenharmony_ci next = &cur->next; 33162306a36Sopenharmony_ci } 33262306a36Sopenharmony_ci *next = NULL; 33362306a36Sopenharmony_ci *nodes_tail = cur; 33462306a36Sopenharmony_ci} 33562306a36Sopenharmony_ci 33662306a36Sopenharmony_ci/* 33762306a36Sopenharmony_ci * Flush our pool of MRs. 33862306a36Sopenharmony_ci * At a minimum, all currently unused MRs are unmapped. 33962306a36Sopenharmony_ci * If the number of MRs allocated exceeds the limit, we also try 34062306a36Sopenharmony_ci * to free as many MRs as needed to get back to this limit. 34162306a36Sopenharmony_ci */ 34262306a36Sopenharmony_ciint rds_ib_flush_mr_pool(struct rds_ib_mr_pool *pool, 34362306a36Sopenharmony_ci int free_all, struct rds_ib_mr **ibmr_ret) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci struct rds_ib_mr *ibmr; 34662306a36Sopenharmony_ci struct llist_node *clean_nodes; 34762306a36Sopenharmony_ci struct llist_node *clean_tail; 34862306a36Sopenharmony_ci LIST_HEAD(unmap_list); 34962306a36Sopenharmony_ci unsigned long unpinned = 0; 35062306a36Sopenharmony_ci unsigned int nfreed = 0, dirty_to_clean = 0, free_goal; 35162306a36Sopenharmony_ci 35262306a36Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 35362306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_flush); 35462306a36Sopenharmony_ci else 35562306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_flush); 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci if (ibmr_ret) { 35862306a36Sopenharmony_ci DEFINE_WAIT(wait); 35962306a36Sopenharmony_ci while (!mutex_trylock(&pool->flush_lock)) { 36062306a36Sopenharmony_ci ibmr = rds_ib_reuse_mr(pool); 36162306a36Sopenharmony_ci if (ibmr) { 36262306a36Sopenharmony_ci *ibmr_ret = ibmr; 36362306a36Sopenharmony_ci finish_wait(&pool->flush_wait, &wait); 36462306a36Sopenharmony_ci goto out_nolock; 36562306a36Sopenharmony_ci } 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci prepare_to_wait(&pool->flush_wait, &wait, 36862306a36Sopenharmony_ci TASK_UNINTERRUPTIBLE); 36962306a36Sopenharmony_ci if (llist_empty(&pool->clean_list)) 37062306a36Sopenharmony_ci schedule(); 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_ci ibmr = rds_ib_reuse_mr(pool); 37362306a36Sopenharmony_ci if (ibmr) { 37462306a36Sopenharmony_ci *ibmr_ret = ibmr; 37562306a36Sopenharmony_ci finish_wait(&pool->flush_wait, &wait); 37662306a36Sopenharmony_ci goto out_nolock; 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci } 37962306a36Sopenharmony_ci finish_wait(&pool->flush_wait, &wait); 38062306a36Sopenharmony_ci } else 38162306a36Sopenharmony_ci mutex_lock(&pool->flush_lock); 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci if (ibmr_ret) { 38462306a36Sopenharmony_ci ibmr = rds_ib_reuse_mr(pool); 38562306a36Sopenharmony_ci if (ibmr) { 38662306a36Sopenharmony_ci *ibmr_ret = ibmr; 38762306a36Sopenharmony_ci goto out; 38862306a36Sopenharmony_ci } 38962306a36Sopenharmony_ci } 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci /* Get the list of all MRs to be dropped. Ordering matters - 39262306a36Sopenharmony_ci * we want to put drop_list ahead of free_list. 39362306a36Sopenharmony_ci */ 39462306a36Sopenharmony_ci dirty_to_clean = llist_append_to_list(&pool->drop_list, &unmap_list); 39562306a36Sopenharmony_ci dirty_to_clean += llist_append_to_list(&pool->free_list, &unmap_list); 39662306a36Sopenharmony_ci if (free_all) { 39762306a36Sopenharmony_ci unsigned long flags; 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ci spin_lock_irqsave(&pool->clean_lock, flags); 40062306a36Sopenharmony_ci llist_append_to_list(&pool->clean_list, &unmap_list); 40162306a36Sopenharmony_ci spin_unlock_irqrestore(&pool->clean_lock, flags); 40262306a36Sopenharmony_ci } 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci free_goal = rds_ib_flush_goal(pool, free_all); 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci if (list_empty(&unmap_list)) 40762306a36Sopenharmony_ci goto out; 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci rds_ib_unreg_frmr(&unmap_list, &nfreed, &unpinned, free_goal); 41062306a36Sopenharmony_ci 41162306a36Sopenharmony_ci if (!list_empty(&unmap_list)) { 41262306a36Sopenharmony_ci unsigned long flags; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci list_to_llist_nodes(&unmap_list, &clean_nodes, &clean_tail); 41562306a36Sopenharmony_ci if (ibmr_ret) { 41662306a36Sopenharmony_ci *ibmr_ret = llist_entry(clean_nodes, struct rds_ib_mr, llnode); 41762306a36Sopenharmony_ci clean_nodes = clean_nodes->next; 41862306a36Sopenharmony_ci } 41962306a36Sopenharmony_ci /* more than one entry in llist nodes */ 42062306a36Sopenharmony_ci if (clean_nodes) { 42162306a36Sopenharmony_ci spin_lock_irqsave(&pool->clean_lock, flags); 42262306a36Sopenharmony_ci llist_add_batch(clean_nodes, clean_tail, 42362306a36Sopenharmony_ci &pool->clean_list); 42462306a36Sopenharmony_ci spin_unlock_irqrestore(&pool->clean_lock, flags); 42562306a36Sopenharmony_ci } 42662306a36Sopenharmony_ci } 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci atomic_sub(unpinned, &pool->free_pinned); 42962306a36Sopenharmony_ci atomic_sub(dirty_to_clean, &pool->dirty_count); 43062306a36Sopenharmony_ci atomic_sub(nfreed, &pool->item_count); 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ciout: 43362306a36Sopenharmony_ci mutex_unlock(&pool->flush_lock); 43462306a36Sopenharmony_ci if (waitqueue_active(&pool->flush_wait)) 43562306a36Sopenharmony_ci wake_up(&pool->flush_wait); 43662306a36Sopenharmony_ciout_nolock: 43762306a36Sopenharmony_ci return 0; 43862306a36Sopenharmony_ci} 43962306a36Sopenharmony_ci 44062306a36Sopenharmony_cistruct rds_ib_mr *rds_ib_try_reuse_ibmr(struct rds_ib_mr_pool *pool) 44162306a36Sopenharmony_ci{ 44262306a36Sopenharmony_ci struct rds_ib_mr *ibmr = NULL; 44362306a36Sopenharmony_ci int iter = 0; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci while (1) { 44662306a36Sopenharmony_ci ibmr = rds_ib_reuse_mr(pool); 44762306a36Sopenharmony_ci if (ibmr) 44862306a36Sopenharmony_ci return ibmr; 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci if (atomic_inc_return(&pool->item_count) <= pool->max_items) 45162306a36Sopenharmony_ci break; 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci atomic_dec(&pool->item_count); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci if (++iter > 2) { 45662306a36Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 45762306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_depleted); 45862306a36Sopenharmony_ci else 45962306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_depleted); 46062306a36Sopenharmony_ci break; 46162306a36Sopenharmony_ci } 46262306a36Sopenharmony_ci 46362306a36Sopenharmony_ci /* We do have some empty MRs. Flush them out. */ 46462306a36Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 46562306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_pool_wait); 46662306a36Sopenharmony_ci else 46762306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_pool_wait); 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci rds_ib_flush_mr_pool(pool, 0, &ibmr); 47062306a36Sopenharmony_ci if (ibmr) 47162306a36Sopenharmony_ci return ibmr; 47262306a36Sopenharmony_ci } 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ci return NULL; 47562306a36Sopenharmony_ci} 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_cistatic void rds_ib_mr_pool_flush_worker(struct work_struct *work) 47862306a36Sopenharmony_ci{ 47962306a36Sopenharmony_ci struct rds_ib_mr_pool *pool = container_of(work, struct rds_ib_mr_pool, flush_worker.work); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci rds_ib_flush_mr_pool(pool, 0, NULL); 48262306a36Sopenharmony_ci} 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_civoid rds_ib_free_mr(void *trans_private, int invalidate) 48562306a36Sopenharmony_ci{ 48662306a36Sopenharmony_ci struct rds_ib_mr *ibmr = trans_private; 48762306a36Sopenharmony_ci struct rds_ib_mr_pool *pool = ibmr->pool; 48862306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev = ibmr->device; 48962306a36Sopenharmony_ci 49062306a36Sopenharmony_ci rdsdebug("RDS/IB: free_mr nents %u\n", ibmr->sg_len); 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci if (ibmr->odp) { 49362306a36Sopenharmony_ci /* A MR created and marked as use_once. We use delayed work, 49462306a36Sopenharmony_ci * because there is a change that we are in interrupt and can't 49562306a36Sopenharmony_ci * call to ib_dereg_mr() directly. 49662306a36Sopenharmony_ci */ 49762306a36Sopenharmony_ci INIT_DELAYED_WORK(&ibmr->work, rds_ib_odp_mr_worker); 49862306a36Sopenharmony_ci queue_delayed_work(rds_ib_mr_wq, &ibmr->work, 0); 49962306a36Sopenharmony_ci return; 50062306a36Sopenharmony_ci } 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci /* Return it to the pool's free list */ 50362306a36Sopenharmony_ci rds_ib_free_frmr_list(ibmr); 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci atomic_add(ibmr->sg_len, &pool->free_pinned); 50662306a36Sopenharmony_ci atomic_inc(&pool->dirty_count); 50762306a36Sopenharmony_ci 50862306a36Sopenharmony_ci /* If we've pinned too many pages, request a flush */ 50962306a36Sopenharmony_ci if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 51062306a36Sopenharmony_ci atomic_read(&pool->dirty_count) >= pool->max_items / 5) 51162306a36Sopenharmony_ci queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci if (invalidate) { 51462306a36Sopenharmony_ci if (likely(!in_interrupt())) { 51562306a36Sopenharmony_ci rds_ib_flush_mr_pool(pool, 0, NULL); 51662306a36Sopenharmony_ci } else { 51762306a36Sopenharmony_ci /* We get here if the user created a MR marked 51862306a36Sopenharmony_ci * as use_once and invalidate at the same time. 51962306a36Sopenharmony_ci */ 52062306a36Sopenharmony_ci queue_delayed_work(rds_ib_mr_wq, 52162306a36Sopenharmony_ci &pool->flush_worker, 10); 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci } 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci rds_ib_dev_put(rds_ibdev); 52662306a36Sopenharmony_ci} 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_civoid rds_ib_flush_mrs(void) 52962306a36Sopenharmony_ci{ 53062306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev; 53162306a36Sopenharmony_ci 53262306a36Sopenharmony_ci down_read(&rds_ib_devices_lock); 53362306a36Sopenharmony_ci list_for_each_entry(rds_ibdev, &rds_ib_devices, list) { 53462306a36Sopenharmony_ci if (rds_ibdev->mr_8k_pool) 53562306a36Sopenharmony_ci rds_ib_flush_mr_pool(rds_ibdev->mr_8k_pool, 0, NULL); 53662306a36Sopenharmony_ci 53762306a36Sopenharmony_ci if (rds_ibdev->mr_1m_pool) 53862306a36Sopenharmony_ci rds_ib_flush_mr_pool(rds_ibdev->mr_1m_pool, 0, NULL); 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci up_read(&rds_ib_devices_lock); 54162306a36Sopenharmony_ci} 54262306a36Sopenharmony_ci 54362306a36Sopenharmony_ciu32 rds_ib_get_lkey(void *trans_private) 54462306a36Sopenharmony_ci{ 54562306a36Sopenharmony_ci struct rds_ib_mr *ibmr = trans_private; 54662306a36Sopenharmony_ci 54762306a36Sopenharmony_ci return ibmr->u.mr->lkey; 54862306a36Sopenharmony_ci} 54962306a36Sopenharmony_ci 55062306a36Sopenharmony_civoid *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents, 55162306a36Sopenharmony_ci struct rds_sock *rs, u32 *key_ret, 55262306a36Sopenharmony_ci struct rds_connection *conn, 55362306a36Sopenharmony_ci u64 start, u64 length, int need_odp) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci struct rds_ib_device *rds_ibdev; 55662306a36Sopenharmony_ci struct rds_ib_mr *ibmr = NULL; 55762306a36Sopenharmony_ci struct rds_ib_connection *ic = NULL; 55862306a36Sopenharmony_ci int ret; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci rds_ibdev = rds_ib_get_device(rs->rs_bound_addr.s6_addr32[3]); 56162306a36Sopenharmony_ci if (!rds_ibdev) { 56262306a36Sopenharmony_ci ret = -ENODEV; 56362306a36Sopenharmony_ci goto out; 56462306a36Sopenharmony_ci } 56562306a36Sopenharmony_ci 56662306a36Sopenharmony_ci if (need_odp == ODP_ZEROBASED || need_odp == ODP_VIRTUAL) { 56762306a36Sopenharmony_ci u64 virt_addr = need_odp == ODP_ZEROBASED ? 0 : start; 56862306a36Sopenharmony_ci int access_flags = 56962306a36Sopenharmony_ci (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ | 57062306a36Sopenharmony_ci IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC | 57162306a36Sopenharmony_ci IB_ACCESS_ON_DEMAND); 57262306a36Sopenharmony_ci struct ib_sge sge = {}; 57362306a36Sopenharmony_ci struct ib_mr *ib_mr; 57462306a36Sopenharmony_ci 57562306a36Sopenharmony_ci if (!rds_ibdev->odp_capable) { 57662306a36Sopenharmony_ci ret = -EOPNOTSUPP; 57762306a36Sopenharmony_ci goto out; 57862306a36Sopenharmony_ci } 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci ib_mr = ib_reg_user_mr(rds_ibdev->pd, start, length, virt_addr, 58162306a36Sopenharmony_ci access_flags); 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci if (IS_ERR(ib_mr)) { 58462306a36Sopenharmony_ci rdsdebug("rds_ib_get_user_mr returned %d\n", 58562306a36Sopenharmony_ci IS_ERR(ib_mr)); 58662306a36Sopenharmony_ci ret = PTR_ERR(ib_mr); 58762306a36Sopenharmony_ci goto out; 58862306a36Sopenharmony_ci } 58962306a36Sopenharmony_ci if (key_ret) 59062306a36Sopenharmony_ci *key_ret = ib_mr->rkey; 59162306a36Sopenharmony_ci 59262306a36Sopenharmony_ci ibmr = kzalloc(sizeof(*ibmr), GFP_KERNEL); 59362306a36Sopenharmony_ci if (!ibmr) { 59462306a36Sopenharmony_ci ib_dereg_mr(ib_mr); 59562306a36Sopenharmony_ci ret = -ENOMEM; 59662306a36Sopenharmony_ci goto out; 59762306a36Sopenharmony_ci } 59862306a36Sopenharmony_ci ibmr->u.mr = ib_mr; 59962306a36Sopenharmony_ci ibmr->odp = 1; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci sge.addr = virt_addr; 60262306a36Sopenharmony_ci sge.length = length; 60362306a36Sopenharmony_ci sge.lkey = ib_mr->lkey; 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci ib_advise_mr(rds_ibdev->pd, 60662306a36Sopenharmony_ci IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE, 60762306a36Sopenharmony_ci IB_UVERBS_ADVISE_MR_FLAG_FLUSH, &sge, 1); 60862306a36Sopenharmony_ci return ibmr; 60962306a36Sopenharmony_ci } 61062306a36Sopenharmony_ci 61162306a36Sopenharmony_ci if (conn) 61262306a36Sopenharmony_ci ic = conn->c_transport_data; 61362306a36Sopenharmony_ci 61462306a36Sopenharmony_ci if (!rds_ibdev->mr_8k_pool || !rds_ibdev->mr_1m_pool) { 61562306a36Sopenharmony_ci ret = -ENODEV; 61662306a36Sopenharmony_ci goto out; 61762306a36Sopenharmony_ci } 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci ibmr = rds_ib_reg_frmr(rds_ibdev, ic, sg, nents, key_ret); 62062306a36Sopenharmony_ci if (IS_ERR(ibmr)) { 62162306a36Sopenharmony_ci ret = PTR_ERR(ibmr); 62262306a36Sopenharmony_ci pr_warn("RDS/IB: rds_ib_get_mr failed (errno=%d)\n", ret); 62362306a36Sopenharmony_ci } else { 62462306a36Sopenharmony_ci return ibmr; 62562306a36Sopenharmony_ci } 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci out: 62862306a36Sopenharmony_ci if (rds_ibdev) 62962306a36Sopenharmony_ci rds_ib_dev_put(rds_ibdev); 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci return ERR_PTR(ret); 63262306a36Sopenharmony_ci} 63362306a36Sopenharmony_ci 63462306a36Sopenharmony_civoid rds_ib_destroy_mr_pool(struct rds_ib_mr_pool *pool) 63562306a36Sopenharmony_ci{ 63662306a36Sopenharmony_ci cancel_delayed_work_sync(&pool->flush_worker); 63762306a36Sopenharmony_ci rds_ib_flush_mr_pool(pool, 1, NULL); 63862306a36Sopenharmony_ci WARN_ON(atomic_read(&pool->item_count)); 63962306a36Sopenharmony_ci WARN_ON(atomic_read(&pool->free_pinned)); 64062306a36Sopenharmony_ci kfree(pool); 64162306a36Sopenharmony_ci} 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_cistruct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, 64462306a36Sopenharmony_ci int pool_type) 64562306a36Sopenharmony_ci{ 64662306a36Sopenharmony_ci struct rds_ib_mr_pool *pool; 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci pool = kzalloc(sizeof(*pool), GFP_KERNEL); 64962306a36Sopenharmony_ci if (!pool) 65062306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 65162306a36Sopenharmony_ci 65262306a36Sopenharmony_ci pool->pool_type = pool_type; 65362306a36Sopenharmony_ci init_llist_head(&pool->free_list); 65462306a36Sopenharmony_ci init_llist_head(&pool->drop_list); 65562306a36Sopenharmony_ci init_llist_head(&pool->clean_list); 65662306a36Sopenharmony_ci spin_lock_init(&pool->clean_lock); 65762306a36Sopenharmony_ci mutex_init(&pool->flush_lock); 65862306a36Sopenharmony_ci init_waitqueue_head(&pool->flush_wait); 65962306a36Sopenharmony_ci INIT_DELAYED_WORK(&pool->flush_worker, rds_ib_mr_pool_flush_worker); 66062306a36Sopenharmony_ci 66162306a36Sopenharmony_ci if (pool_type == RDS_IB_MR_1M_POOL) { 66262306a36Sopenharmony_ci /* +1 allows for unaligned MRs */ 66362306a36Sopenharmony_ci pool->max_pages = RDS_MR_1M_MSG_SIZE + 1; 66462306a36Sopenharmony_ci pool->max_items = rds_ibdev->max_1m_mrs; 66562306a36Sopenharmony_ci } else { 66662306a36Sopenharmony_ci /* pool_type == RDS_IB_MR_8K_POOL */ 66762306a36Sopenharmony_ci pool->max_pages = RDS_MR_8K_MSG_SIZE + 1; 66862306a36Sopenharmony_ci pool->max_items = rds_ibdev->max_8k_mrs; 66962306a36Sopenharmony_ci } 67062306a36Sopenharmony_ci 67162306a36Sopenharmony_ci pool->max_free_pinned = pool->max_items * pool->max_pages / 4; 67262306a36Sopenharmony_ci pool->max_items_soft = rds_ibdev->max_mrs * 3 / 4; 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci return pool; 67562306a36Sopenharmony_ci} 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ciint rds_ib_mr_init(void) 67862306a36Sopenharmony_ci{ 67962306a36Sopenharmony_ci rds_ib_mr_wq = alloc_workqueue("rds_mr_flushd", WQ_MEM_RECLAIM, 0); 68062306a36Sopenharmony_ci if (!rds_ib_mr_wq) 68162306a36Sopenharmony_ci return -ENOMEM; 68262306a36Sopenharmony_ci return 0; 68362306a36Sopenharmony_ci} 68462306a36Sopenharmony_ci 68562306a36Sopenharmony_ci/* By the time this is called all the IB devices should have been torn down and 68662306a36Sopenharmony_ci * had their pools freed. As each pool is freed its work struct is waited on, 68762306a36Sopenharmony_ci * so the pool flushing work queue should be idle by the time we get here. 68862306a36Sopenharmony_ci */ 68962306a36Sopenharmony_civoid rds_ib_mr_exit(void) 69062306a36Sopenharmony_ci{ 69162306a36Sopenharmony_ci destroy_workqueue(rds_ib_mr_wq); 69262306a36Sopenharmony_ci} 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_cistatic void rds_ib_odp_mr_worker(struct work_struct *work) 69562306a36Sopenharmony_ci{ 69662306a36Sopenharmony_ci struct rds_ib_mr *ibmr; 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci ibmr = container_of(work, struct rds_ib_mr, work.work); 69962306a36Sopenharmony_ci ib_dereg_mr(ibmr->u.mr); 70062306a36Sopenharmony_ci kfree(ibmr); 70162306a36Sopenharmony_ci} 702