162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) 2006, 2019 Oracle and/or its affiliates. All rights reserved. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * This software is available to you under a choice of one of two 562306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the 862306a36Sopenharmony_ci * OpenIB.org BSD license below: 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or 1162306a36Sopenharmony_ci * without modification, are permitted provided that the following 1262306a36Sopenharmony_ci * conditions are met: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * - Redistributions of source code must retain the above 1562306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 1662306a36Sopenharmony_ci * disclaimer. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * - Redistributions in binary form must reproduce the above 1962306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2062306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials 2162306a36Sopenharmony_ci * provided with the distribution. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 3062306a36Sopenharmony_ci * SOFTWARE. 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci#include <linux/kernel.h> 3462306a36Sopenharmony_ci#include <linux/sched/clock.h> 3562306a36Sopenharmony_ci#include <linux/slab.h> 3662306a36Sopenharmony_ci#include <linux/pci.h> 3762306a36Sopenharmony_ci#include <linux/dma-mapping.h> 3862306a36Sopenharmony_ci#include <rdma/rdma_cm.h> 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci#include "rds_single_path.h" 4162306a36Sopenharmony_ci#include "rds.h" 4262306a36Sopenharmony_ci#include "ib.h" 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_cistatic struct kmem_cache *rds_ib_incoming_slab; 4562306a36Sopenharmony_cistatic struct kmem_cache *rds_ib_frag_slab; 4662306a36Sopenharmony_cistatic atomic_t rds_ib_allocation = ATOMIC_INIT(0); 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_civoid rds_ib_recv_init_ring(struct rds_ib_connection *ic) 4962306a36Sopenharmony_ci{ 5062306a36Sopenharmony_ci struct rds_ib_recv_work *recv; 5162306a36Sopenharmony_ci u32 i; 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci for (i = 0, recv = ic->i_recvs; i < ic->i_recv_ring.w_nr; i++, recv++) { 5462306a36Sopenharmony_ci struct ib_sge *sge; 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_ci recv->r_ibinc = NULL; 5762306a36Sopenharmony_ci recv->r_frag = NULL; 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci recv->r_wr.next = NULL; 6062306a36Sopenharmony_ci recv->r_wr.wr_id = i; 6162306a36Sopenharmony_ci recv->r_wr.sg_list = recv->r_sge; 6262306a36Sopenharmony_ci recv->r_wr.num_sge = RDS_IB_RECV_SGE; 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci sge = &recv->r_sge[0]; 6562306a36Sopenharmony_ci sge->addr = ic->i_recv_hdrs_dma[i]; 6662306a36Sopenharmony_ci sge->length = sizeof(struct rds_header); 6762306a36Sopenharmony_ci sge->lkey = ic->i_pd->local_dma_lkey; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci sge = &recv->r_sge[1]; 7062306a36Sopenharmony_ci sge->addr = 0; 7162306a36Sopenharmony_ci sge->length = RDS_FRAG_SIZE; 7262306a36Sopenharmony_ci sge->lkey = ic->i_pd->local_dma_lkey; 7362306a36Sopenharmony_ci } 7462306a36Sopenharmony_ci} 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci/* 7762306a36Sopenharmony_ci * The entire 'from' list, including the from element itself, is put on 7862306a36Sopenharmony_ci * to the tail of the 'to' list. 7962306a36Sopenharmony_ci */ 8062306a36Sopenharmony_cistatic void list_splice_entire_tail(struct list_head *from, 8162306a36Sopenharmony_ci struct list_head *to) 8262306a36Sopenharmony_ci{ 8362306a36Sopenharmony_ci struct list_head *from_last = from->prev; 8462306a36Sopenharmony_ci 8562306a36Sopenharmony_ci list_splice_tail(from_last, to); 8662306a36Sopenharmony_ci list_add_tail(from_last, to); 8762306a36Sopenharmony_ci} 8862306a36Sopenharmony_ci 8962306a36Sopenharmony_cistatic void rds_ib_cache_xfer_to_ready(struct rds_ib_refill_cache *cache) 9062306a36Sopenharmony_ci{ 9162306a36Sopenharmony_ci struct list_head *tmp; 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci tmp = xchg(&cache->xfer, NULL); 9462306a36Sopenharmony_ci if (tmp) { 9562306a36Sopenharmony_ci if (cache->ready) 9662306a36Sopenharmony_ci list_splice_entire_tail(tmp, cache->ready); 9762306a36Sopenharmony_ci else 9862306a36Sopenharmony_ci cache->ready = tmp; 9962306a36Sopenharmony_ci } 10062306a36Sopenharmony_ci} 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_cistatic int rds_ib_recv_alloc_cache(struct rds_ib_refill_cache *cache, gfp_t gfp) 10362306a36Sopenharmony_ci{ 10462306a36Sopenharmony_ci struct rds_ib_cache_head *head; 10562306a36Sopenharmony_ci int cpu; 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci cache->percpu = alloc_percpu_gfp(struct rds_ib_cache_head, gfp); 10862306a36Sopenharmony_ci if (!cache->percpu) 10962306a36Sopenharmony_ci return -ENOMEM; 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 11262306a36Sopenharmony_ci head = per_cpu_ptr(cache->percpu, cpu); 11362306a36Sopenharmony_ci head->first = NULL; 11462306a36Sopenharmony_ci head->count = 0; 11562306a36Sopenharmony_ci } 11662306a36Sopenharmony_ci cache->xfer = NULL; 11762306a36Sopenharmony_ci cache->ready = NULL; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci return 0; 12062306a36Sopenharmony_ci} 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ciint rds_ib_recv_alloc_caches(struct rds_ib_connection *ic, gfp_t gfp) 12362306a36Sopenharmony_ci{ 12462306a36Sopenharmony_ci int ret; 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ci ret = rds_ib_recv_alloc_cache(&ic->i_cache_incs, gfp); 12762306a36Sopenharmony_ci if (!ret) { 12862306a36Sopenharmony_ci ret = rds_ib_recv_alloc_cache(&ic->i_cache_frags, gfp); 12962306a36Sopenharmony_ci if (ret) 13062306a36Sopenharmony_ci free_percpu(ic->i_cache_incs.percpu); 13162306a36Sopenharmony_ci } 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci return ret; 13462306a36Sopenharmony_ci} 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_cistatic void rds_ib_cache_splice_all_lists(struct rds_ib_refill_cache *cache, 13762306a36Sopenharmony_ci struct list_head *caller_list) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci struct rds_ib_cache_head *head; 14062306a36Sopenharmony_ci int cpu; 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 14362306a36Sopenharmony_ci head = per_cpu_ptr(cache->percpu, cpu); 14462306a36Sopenharmony_ci if (head->first) { 14562306a36Sopenharmony_ci list_splice_entire_tail(head->first, caller_list); 14662306a36Sopenharmony_ci head->first = NULL; 14762306a36Sopenharmony_ci } 14862306a36Sopenharmony_ci } 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci if (cache->ready) { 15162306a36Sopenharmony_ci list_splice_entire_tail(cache->ready, caller_list); 15262306a36Sopenharmony_ci cache->ready = NULL; 15362306a36Sopenharmony_ci } 15462306a36Sopenharmony_ci} 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_civoid rds_ib_recv_free_caches(struct rds_ib_connection *ic) 15762306a36Sopenharmony_ci{ 15862306a36Sopenharmony_ci struct rds_ib_incoming *inc; 15962306a36Sopenharmony_ci struct rds_ib_incoming *inc_tmp; 16062306a36Sopenharmony_ci struct rds_page_frag *frag; 16162306a36Sopenharmony_ci struct rds_page_frag *frag_tmp; 16262306a36Sopenharmony_ci LIST_HEAD(list); 16362306a36Sopenharmony_ci 16462306a36Sopenharmony_ci rds_ib_cache_xfer_to_ready(&ic->i_cache_incs); 16562306a36Sopenharmony_ci rds_ib_cache_splice_all_lists(&ic->i_cache_incs, &list); 16662306a36Sopenharmony_ci free_percpu(ic->i_cache_incs.percpu); 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci list_for_each_entry_safe(inc, inc_tmp, &list, ii_cache_entry) { 16962306a36Sopenharmony_ci list_del(&inc->ii_cache_entry); 17062306a36Sopenharmony_ci WARN_ON(!list_empty(&inc->ii_frags)); 17162306a36Sopenharmony_ci kmem_cache_free(rds_ib_incoming_slab, inc); 17262306a36Sopenharmony_ci atomic_dec(&rds_ib_allocation); 17362306a36Sopenharmony_ci } 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci rds_ib_cache_xfer_to_ready(&ic->i_cache_frags); 17662306a36Sopenharmony_ci rds_ib_cache_splice_all_lists(&ic->i_cache_frags, &list); 17762306a36Sopenharmony_ci free_percpu(ic->i_cache_frags.percpu); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci list_for_each_entry_safe(frag, frag_tmp, &list, f_cache_entry) { 18062306a36Sopenharmony_ci list_del(&frag->f_cache_entry); 18162306a36Sopenharmony_ci WARN_ON(!list_empty(&frag->f_item)); 18262306a36Sopenharmony_ci kmem_cache_free(rds_ib_frag_slab, frag); 18362306a36Sopenharmony_ci } 18462306a36Sopenharmony_ci} 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci/* fwd decl */ 18762306a36Sopenharmony_cistatic void rds_ib_recv_cache_put(struct list_head *new_item, 18862306a36Sopenharmony_ci struct rds_ib_refill_cache *cache); 18962306a36Sopenharmony_cistatic struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache); 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci/* Recycle frag and attached recv buffer f_sg */ 19362306a36Sopenharmony_cistatic void rds_ib_frag_free(struct rds_ib_connection *ic, 19462306a36Sopenharmony_ci struct rds_page_frag *frag) 19562306a36Sopenharmony_ci{ 19662306a36Sopenharmony_ci rdsdebug("frag %p page %p\n", frag, sg_page(&frag->f_sg)); 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci rds_ib_recv_cache_put(&frag->f_cache_entry, &ic->i_cache_frags); 19962306a36Sopenharmony_ci atomic_add(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs); 20062306a36Sopenharmony_ci rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE); 20162306a36Sopenharmony_ci} 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci/* Recycle inc after freeing attached frags */ 20462306a36Sopenharmony_civoid rds_ib_inc_free(struct rds_incoming *inc) 20562306a36Sopenharmony_ci{ 20662306a36Sopenharmony_ci struct rds_ib_incoming *ibinc; 20762306a36Sopenharmony_ci struct rds_page_frag *frag; 20862306a36Sopenharmony_ci struct rds_page_frag *pos; 20962306a36Sopenharmony_ci struct rds_ib_connection *ic = inc->i_conn->c_transport_data; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci /* Free attached frags */ 21462306a36Sopenharmony_ci list_for_each_entry_safe(frag, pos, &ibinc->ii_frags, f_item) { 21562306a36Sopenharmony_ci list_del_init(&frag->f_item); 21662306a36Sopenharmony_ci rds_ib_frag_free(ic, frag); 21762306a36Sopenharmony_ci } 21862306a36Sopenharmony_ci BUG_ON(!list_empty(&ibinc->ii_frags)); 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci rdsdebug("freeing ibinc %p inc %p\n", ibinc, inc); 22162306a36Sopenharmony_ci rds_ib_recv_cache_put(&ibinc->ii_cache_entry, &ic->i_cache_incs); 22262306a36Sopenharmony_ci} 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_cistatic void rds_ib_recv_clear_one(struct rds_ib_connection *ic, 22562306a36Sopenharmony_ci struct rds_ib_recv_work *recv) 22662306a36Sopenharmony_ci{ 22762306a36Sopenharmony_ci if (recv->r_ibinc) { 22862306a36Sopenharmony_ci rds_inc_put(&recv->r_ibinc->ii_inc); 22962306a36Sopenharmony_ci recv->r_ibinc = NULL; 23062306a36Sopenharmony_ci } 23162306a36Sopenharmony_ci if (recv->r_frag) { 23262306a36Sopenharmony_ci ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, DMA_FROM_DEVICE); 23362306a36Sopenharmony_ci rds_ib_frag_free(ic, recv->r_frag); 23462306a36Sopenharmony_ci recv->r_frag = NULL; 23562306a36Sopenharmony_ci } 23662306a36Sopenharmony_ci} 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_civoid rds_ib_recv_clear_ring(struct rds_ib_connection *ic) 23962306a36Sopenharmony_ci{ 24062306a36Sopenharmony_ci u32 i; 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci for (i = 0; i < ic->i_recv_ring.w_nr; i++) 24362306a36Sopenharmony_ci rds_ib_recv_clear_one(ic, &ic->i_recvs[i]); 24462306a36Sopenharmony_ci} 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_cistatic struct rds_ib_incoming *rds_ib_refill_one_inc(struct rds_ib_connection *ic, 24762306a36Sopenharmony_ci gfp_t slab_mask) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci struct rds_ib_incoming *ibinc; 25062306a36Sopenharmony_ci struct list_head *cache_item; 25162306a36Sopenharmony_ci int avail_allocs; 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci cache_item = rds_ib_recv_cache_get(&ic->i_cache_incs); 25462306a36Sopenharmony_ci if (cache_item) { 25562306a36Sopenharmony_ci ibinc = container_of(cache_item, struct rds_ib_incoming, ii_cache_entry); 25662306a36Sopenharmony_ci } else { 25762306a36Sopenharmony_ci avail_allocs = atomic_add_unless(&rds_ib_allocation, 25862306a36Sopenharmony_ci 1, rds_ib_sysctl_max_recv_allocation); 25962306a36Sopenharmony_ci if (!avail_allocs) { 26062306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rx_alloc_limit); 26162306a36Sopenharmony_ci return NULL; 26262306a36Sopenharmony_ci } 26362306a36Sopenharmony_ci ibinc = kmem_cache_alloc(rds_ib_incoming_slab, slab_mask); 26462306a36Sopenharmony_ci if (!ibinc) { 26562306a36Sopenharmony_ci atomic_dec(&rds_ib_allocation); 26662306a36Sopenharmony_ci return NULL; 26762306a36Sopenharmony_ci } 26862306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rx_total_incs); 26962306a36Sopenharmony_ci } 27062306a36Sopenharmony_ci INIT_LIST_HEAD(&ibinc->ii_frags); 27162306a36Sopenharmony_ci rds_inc_init(&ibinc->ii_inc, ic->conn, &ic->conn->c_faddr); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci return ibinc; 27462306a36Sopenharmony_ci} 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_cistatic struct rds_page_frag *rds_ib_refill_one_frag(struct rds_ib_connection *ic, 27762306a36Sopenharmony_ci gfp_t slab_mask, gfp_t page_mask) 27862306a36Sopenharmony_ci{ 27962306a36Sopenharmony_ci struct rds_page_frag *frag; 28062306a36Sopenharmony_ci struct list_head *cache_item; 28162306a36Sopenharmony_ci int ret; 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci cache_item = rds_ib_recv_cache_get(&ic->i_cache_frags); 28462306a36Sopenharmony_ci if (cache_item) { 28562306a36Sopenharmony_ci frag = container_of(cache_item, struct rds_page_frag, f_cache_entry); 28662306a36Sopenharmony_ci atomic_sub(RDS_FRAG_SIZE / SZ_1K, &ic->i_cache_allocs); 28762306a36Sopenharmony_ci rds_ib_stats_add(s_ib_recv_added_to_cache, RDS_FRAG_SIZE); 28862306a36Sopenharmony_ci } else { 28962306a36Sopenharmony_ci frag = kmem_cache_alloc(rds_ib_frag_slab, slab_mask); 29062306a36Sopenharmony_ci if (!frag) 29162306a36Sopenharmony_ci return NULL; 29262306a36Sopenharmony_ci 29362306a36Sopenharmony_ci sg_init_table(&frag->f_sg, 1); 29462306a36Sopenharmony_ci ret = rds_page_remainder_alloc(&frag->f_sg, 29562306a36Sopenharmony_ci RDS_FRAG_SIZE, page_mask); 29662306a36Sopenharmony_ci if (ret) { 29762306a36Sopenharmony_ci kmem_cache_free(rds_ib_frag_slab, frag); 29862306a36Sopenharmony_ci return NULL; 29962306a36Sopenharmony_ci } 30062306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rx_total_frags); 30162306a36Sopenharmony_ci } 30262306a36Sopenharmony_ci 30362306a36Sopenharmony_ci INIT_LIST_HEAD(&frag->f_item); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci return frag; 30662306a36Sopenharmony_ci} 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_cistatic int rds_ib_recv_refill_one(struct rds_connection *conn, 30962306a36Sopenharmony_ci struct rds_ib_recv_work *recv, gfp_t gfp) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 31262306a36Sopenharmony_ci struct ib_sge *sge; 31362306a36Sopenharmony_ci int ret = -ENOMEM; 31462306a36Sopenharmony_ci gfp_t slab_mask = gfp; 31562306a36Sopenharmony_ci gfp_t page_mask = gfp; 31662306a36Sopenharmony_ci 31762306a36Sopenharmony_ci if (gfp & __GFP_DIRECT_RECLAIM) { 31862306a36Sopenharmony_ci slab_mask = GFP_KERNEL; 31962306a36Sopenharmony_ci page_mask = GFP_HIGHUSER; 32062306a36Sopenharmony_ci } 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci if (!ic->i_cache_incs.ready) 32362306a36Sopenharmony_ci rds_ib_cache_xfer_to_ready(&ic->i_cache_incs); 32462306a36Sopenharmony_ci if (!ic->i_cache_frags.ready) 32562306a36Sopenharmony_ci rds_ib_cache_xfer_to_ready(&ic->i_cache_frags); 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci /* 32862306a36Sopenharmony_ci * ibinc was taken from recv if recv contained the start of a message. 32962306a36Sopenharmony_ci * recvs that were continuations will still have this allocated. 33062306a36Sopenharmony_ci */ 33162306a36Sopenharmony_ci if (!recv->r_ibinc) { 33262306a36Sopenharmony_ci recv->r_ibinc = rds_ib_refill_one_inc(ic, slab_mask); 33362306a36Sopenharmony_ci if (!recv->r_ibinc) 33462306a36Sopenharmony_ci goto out; 33562306a36Sopenharmony_ci } 33662306a36Sopenharmony_ci 33762306a36Sopenharmony_ci WARN_ON(recv->r_frag); /* leak! */ 33862306a36Sopenharmony_ci recv->r_frag = rds_ib_refill_one_frag(ic, slab_mask, page_mask); 33962306a36Sopenharmony_ci if (!recv->r_frag) 34062306a36Sopenharmony_ci goto out; 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_ci ret = ib_dma_map_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 34362306a36Sopenharmony_ci 1, DMA_FROM_DEVICE); 34462306a36Sopenharmony_ci WARN_ON(ret != 1); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci sge = &recv->r_sge[0]; 34762306a36Sopenharmony_ci sge->addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; 34862306a36Sopenharmony_ci sge->length = sizeof(struct rds_header); 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci sge = &recv->r_sge[1]; 35162306a36Sopenharmony_ci sge->addr = sg_dma_address(&recv->r_frag->f_sg); 35262306a36Sopenharmony_ci sge->length = sg_dma_len(&recv->r_frag->f_sg); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci ret = 0; 35562306a36Sopenharmony_ciout: 35662306a36Sopenharmony_ci return ret; 35762306a36Sopenharmony_ci} 35862306a36Sopenharmony_ci 35962306a36Sopenharmony_cistatic int acquire_refill(struct rds_connection *conn) 36062306a36Sopenharmony_ci{ 36162306a36Sopenharmony_ci return test_and_set_bit(RDS_RECV_REFILL, &conn->c_flags) == 0; 36262306a36Sopenharmony_ci} 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_cistatic void release_refill(struct rds_connection *conn) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci clear_bit(RDS_RECV_REFILL, &conn->c_flags); 36762306a36Sopenharmony_ci smp_mb__after_atomic(); 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci /* We don't use wait_on_bit()/wake_up_bit() because our waking is in a 37062306a36Sopenharmony_ci * hot path and finding waiters is very rare. We don't want to walk 37162306a36Sopenharmony_ci * the system-wide hashed waitqueue buckets in the fast path only to 37262306a36Sopenharmony_ci * almost never find waiters. 37362306a36Sopenharmony_ci */ 37462306a36Sopenharmony_ci if (waitqueue_active(&conn->c_waitq)) 37562306a36Sopenharmony_ci wake_up_all(&conn->c_waitq); 37662306a36Sopenharmony_ci} 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci/* 37962306a36Sopenharmony_ci * This tries to allocate and post unused work requests after making sure that 38062306a36Sopenharmony_ci * they have all the allocations they need to queue received fragments into 38162306a36Sopenharmony_ci * sockets. 38262306a36Sopenharmony_ci */ 38362306a36Sopenharmony_civoid rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) 38462306a36Sopenharmony_ci{ 38562306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 38662306a36Sopenharmony_ci struct rds_ib_recv_work *recv; 38762306a36Sopenharmony_ci unsigned int posted = 0; 38862306a36Sopenharmony_ci int ret = 0; 38962306a36Sopenharmony_ci bool can_wait = !!(gfp & __GFP_DIRECT_RECLAIM); 39062306a36Sopenharmony_ci bool must_wake = false; 39162306a36Sopenharmony_ci u32 pos; 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_ci /* the goal here is to just make sure that someone, somewhere 39462306a36Sopenharmony_ci * is posting buffers. If we can't get the refill lock, 39562306a36Sopenharmony_ci * let them do their thing 39662306a36Sopenharmony_ci */ 39762306a36Sopenharmony_ci if (!acquire_refill(conn)) 39862306a36Sopenharmony_ci return; 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci while ((prefill || rds_conn_up(conn)) && 40162306a36Sopenharmony_ci rds_ib_ring_alloc(&ic->i_recv_ring, 1, &pos)) { 40262306a36Sopenharmony_ci if (pos >= ic->i_recv_ring.w_nr) { 40362306a36Sopenharmony_ci printk(KERN_NOTICE "Argh - ring alloc returned pos=%u\n", 40462306a36Sopenharmony_ci pos); 40562306a36Sopenharmony_ci break; 40662306a36Sopenharmony_ci } 40762306a36Sopenharmony_ci 40862306a36Sopenharmony_ci recv = &ic->i_recvs[pos]; 40962306a36Sopenharmony_ci ret = rds_ib_recv_refill_one(conn, recv, gfp); 41062306a36Sopenharmony_ci if (ret) { 41162306a36Sopenharmony_ci must_wake = true; 41262306a36Sopenharmony_ci break; 41362306a36Sopenharmony_ci } 41462306a36Sopenharmony_ci 41562306a36Sopenharmony_ci rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv, 41662306a36Sopenharmony_ci recv->r_ibinc, sg_page(&recv->r_frag->f_sg), 41762306a36Sopenharmony_ci (long)sg_dma_address(&recv->r_frag->f_sg)); 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci /* XXX when can this fail? */ 42062306a36Sopenharmony_ci ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, NULL); 42162306a36Sopenharmony_ci if (ret) { 42262306a36Sopenharmony_ci rds_ib_conn_error(conn, "recv post on " 42362306a36Sopenharmony_ci "%pI6c returned %d, disconnecting and " 42462306a36Sopenharmony_ci "reconnecting\n", &conn->c_faddr, 42562306a36Sopenharmony_ci ret); 42662306a36Sopenharmony_ci break; 42762306a36Sopenharmony_ci } 42862306a36Sopenharmony_ci 42962306a36Sopenharmony_ci posted++; 43062306a36Sopenharmony_ci 43162306a36Sopenharmony_ci if ((posted > 128 && need_resched()) || posted > 8192) { 43262306a36Sopenharmony_ci must_wake = true; 43362306a36Sopenharmony_ci break; 43462306a36Sopenharmony_ci } 43562306a36Sopenharmony_ci } 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci /* We're doing flow control - update the window. */ 43862306a36Sopenharmony_ci if (ic->i_flowctl && posted) 43962306a36Sopenharmony_ci rds_ib_advertise_credits(conn, posted); 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci if (ret) 44262306a36Sopenharmony_ci rds_ib_ring_unalloc(&ic->i_recv_ring, 1); 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci release_refill(conn); 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_ci /* if we're called from the softirq handler, we'll be GFP_NOWAIT. 44762306a36Sopenharmony_ci * in this case the ring being low is going to lead to more interrupts 44862306a36Sopenharmony_ci * and we can safely let the softirq code take care of it unless the 44962306a36Sopenharmony_ci * ring is completely empty. 45062306a36Sopenharmony_ci * 45162306a36Sopenharmony_ci * if we're called from krdsd, we'll be GFP_KERNEL. In this case 45262306a36Sopenharmony_ci * we might have raced with the softirq code while we had the refill 45362306a36Sopenharmony_ci * lock held. Use rds_ib_ring_low() instead of ring_empty to decide 45462306a36Sopenharmony_ci * if we should requeue. 45562306a36Sopenharmony_ci */ 45662306a36Sopenharmony_ci if (rds_conn_up(conn) && 45762306a36Sopenharmony_ci (must_wake || 45862306a36Sopenharmony_ci (can_wait && rds_ib_ring_low(&ic->i_recv_ring)) || 45962306a36Sopenharmony_ci rds_ib_ring_empty(&ic->i_recv_ring))) { 46062306a36Sopenharmony_ci queue_delayed_work(rds_wq, &conn->c_recv_w, 1); 46162306a36Sopenharmony_ci } 46262306a36Sopenharmony_ci if (can_wait) 46362306a36Sopenharmony_ci cond_resched(); 46462306a36Sopenharmony_ci} 46562306a36Sopenharmony_ci 46662306a36Sopenharmony_ci/* 46762306a36Sopenharmony_ci * We want to recycle several types of recv allocations, like incs and frags. 46862306a36Sopenharmony_ci * To use this, the *_free() function passes in the ptr to a list_head within 46962306a36Sopenharmony_ci * the recyclee, as well as the cache to put it on. 47062306a36Sopenharmony_ci * 47162306a36Sopenharmony_ci * First, we put the memory on a percpu list. When this reaches a certain size, 47262306a36Sopenharmony_ci * We move it to an intermediate non-percpu list in a lockless manner, with some 47362306a36Sopenharmony_ci * xchg/compxchg wizardry. 47462306a36Sopenharmony_ci * 47562306a36Sopenharmony_ci * N.B. Instead of a list_head as the anchor, we use a single pointer, which can 47662306a36Sopenharmony_ci * be NULL and xchg'd. The list is actually empty when the pointer is NULL, and 47762306a36Sopenharmony_ci * list_empty() will return true with one element is actually present. 47862306a36Sopenharmony_ci */ 47962306a36Sopenharmony_cistatic void rds_ib_recv_cache_put(struct list_head *new_item, 48062306a36Sopenharmony_ci struct rds_ib_refill_cache *cache) 48162306a36Sopenharmony_ci{ 48262306a36Sopenharmony_ci unsigned long flags; 48362306a36Sopenharmony_ci struct list_head *old, *chpfirst; 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci local_irq_save(flags); 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci chpfirst = __this_cpu_read(cache->percpu->first); 48862306a36Sopenharmony_ci if (!chpfirst) 48962306a36Sopenharmony_ci INIT_LIST_HEAD(new_item); 49062306a36Sopenharmony_ci else /* put on front */ 49162306a36Sopenharmony_ci list_add_tail(new_item, chpfirst); 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci __this_cpu_write(cache->percpu->first, new_item); 49462306a36Sopenharmony_ci __this_cpu_inc(cache->percpu->count); 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT) 49762306a36Sopenharmony_ci goto end; 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci /* 50062306a36Sopenharmony_ci * Return our per-cpu first list to the cache's xfer by atomically 50162306a36Sopenharmony_ci * grabbing the current xfer list, appending it to our per-cpu list, 50262306a36Sopenharmony_ci * and then atomically returning that entire list back to the 50362306a36Sopenharmony_ci * cache's xfer list as long as it's still empty. 50462306a36Sopenharmony_ci */ 50562306a36Sopenharmony_ci do { 50662306a36Sopenharmony_ci old = xchg(&cache->xfer, NULL); 50762306a36Sopenharmony_ci if (old) 50862306a36Sopenharmony_ci list_splice_entire_tail(old, chpfirst); 50962306a36Sopenharmony_ci old = cmpxchg(&cache->xfer, NULL, chpfirst); 51062306a36Sopenharmony_ci } while (old); 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci 51362306a36Sopenharmony_ci __this_cpu_write(cache->percpu->first, NULL); 51462306a36Sopenharmony_ci __this_cpu_write(cache->percpu->count, 0); 51562306a36Sopenharmony_ciend: 51662306a36Sopenharmony_ci local_irq_restore(flags); 51762306a36Sopenharmony_ci} 51862306a36Sopenharmony_ci 51962306a36Sopenharmony_cistatic struct list_head *rds_ib_recv_cache_get(struct rds_ib_refill_cache *cache) 52062306a36Sopenharmony_ci{ 52162306a36Sopenharmony_ci struct list_head *head = cache->ready; 52262306a36Sopenharmony_ci 52362306a36Sopenharmony_ci if (head) { 52462306a36Sopenharmony_ci if (!list_empty(head)) { 52562306a36Sopenharmony_ci cache->ready = head->next; 52662306a36Sopenharmony_ci list_del_init(head); 52762306a36Sopenharmony_ci } else 52862306a36Sopenharmony_ci cache->ready = NULL; 52962306a36Sopenharmony_ci } 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci return head; 53262306a36Sopenharmony_ci} 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ciint rds_ib_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to) 53562306a36Sopenharmony_ci{ 53662306a36Sopenharmony_ci struct rds_ib_incoming *ibinc; 53762306a36Sopenharmony_ci struct rds_page_frag *frag; 53862306a36Sopenharmony_ci unsigned long to_copy; 53962306a36Sopenharmony_ci unsigned long frag_off = 0; 54062306a36Sopenharmony_ci int copied = 0; 54162306a36Sopenharmony_ci int ret; 54262306a36Sopenharmony_ci u32 len; 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_ci ibinc = container_of(inc, struct rds_ib_incoming, ii_inc); 54562306a36Sopenharmony_ci frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item); 54662306a36Sopenharmony_ci len = be32_to_cpu(inc->i_hdr.h_len); 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci while (iov_iter_count(to) && copied < len) { 54962306a36Sopenharmony_ci if (frag_off == RDS_FRAG_SIZE) { 55062306a36Sopenharmony_ci frag = list_entry(frag->f_item.next, 55162306a36Sopenharmony_ci struct rds_page_frag, f_item); 55262306a36Sopenharmony_ci frag_off = 0; 55362306a36Sopenharmony_ci } 55462306a36Sopenharmony_ci to_copy = min_t(unsigned long, iov_iter_count(to), 55562306a36Sopenharmony_ci RDS_FRAG_SIZE - frag_off); 55662306a36Sopenharmony_ci to_copy = min_t(unsigned long, to_copy, len - copied); 55762306a36Sopenharmony_ci 55862306a36Sopenharmony_ci /* XXX needs + offset for multiple recvs per page */ 55962306a36Sopenharmony_ci rds_stats_add(s_copy_to_user, to_copy); 56062306a36Sopenharmony_ci ret = copy_page_to_iter(sg_page(&frag->f_sg), 56162306a36Sopenharmony_ci frag->f_sg.offset + frag_off, 56262306a36Sopenharmony_ci to_copy, 56362306a36Sopenharmony_ci to); 56462306a36Sopenharmony_ci if (ret != to_copy) 56562306a36Sopenharmony_ci return -EFAULT; 56662306a36Sopenharmony_ci 56762306a36Sopenharmony_ci frag_off += to_copy; 56862306a36Sopenharmony_ci copied += to_copy; 56962306a36Sopenharmony_ci } 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci return copied; 57262306a36Sopenharmony_ci} 57362306a36Sopenharmony_ci 57462306a36Sopenharmony_ci/* ic starts out kzalloc()ed */ 57562306a36Sopenharmony_civoid rds_ib_recv_init_ack(struct rds_ib_connection *ic) 57662306a36Sopenharmony_ci{ 57762306a36Sopenharmony_ci struct ib_send_wr *wr = &ic->i_ack_wr; 57862306a36Sopenharmony_ci struct ib_sge *sge = &ic->i_ack_sge; 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci sge->addr = ic->i_ack_dma; 58162306a36Sopenharmony_ci sge->length = sizeof(struct rds_header); 58262306a36Sopenharmony_ci sge->lkey = ic->i_pd->local_dma_lkey; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci wr->sg_list = sge; 58562306a36Sopenharmony_ci wr->num_sge = 1; 58662306a36Sopenharmony_ci wr->opcode = IB_WR_SEND; 58762306a36Sopenharmony_ci wr->wr_id = RDS_IB_ACK_WR_ID; 58862306a36Sopenharmony_ci wr->send_flags = IB_SEND_SIGNALED | IB_SEND_SOLICITED; 58962306a36Sopenharmony_ci} 59062306a36Sopenharmony_ci 59162306a36Sopenharmony_ci/* 59262306a36Sopenharmony_ci * You'd think that with reliable IB connections you wouldn't need to ack 59362306a36Sopenharmony_ci * messages that have been received. The problem is that IB hardware generates 59462306a36Sopenharmony_ci * an ack message before it has DMAed the message into memory. This creates a 59562306a36Sopenharmony_ci * potential message loss if the HCA is disabled for any reason between when it 59662306a36Sopenharmony_ci * sends the ack and before the message is DMAed and processed. This is only a 59762306a36Sopenharmony_ci * potential issue if another HCA is available for fail-over. 59862306a36Sopenharmony_ci * 59962306a36Sopenharmony_ci * When the remote host receives our ack they'll free the sent message from 60062306a36Sopenharmony_ci * their send queue. To decrease the latency of this we always send an ack 60162306a36Sopenharmony_ci * immediately after we've received messages. 60262306a36Sopenharmony_ci * 60362306a36Sopenharmony_ci * For simplicity, we only have one ack in flight at a time. This puts 60462306a36Sopenharmony_ci * pressure on senders to have deep enough send queues to absorb the latency of 60562306a36Sopenharmony_ci * a single ack frame being in flight. This might not be good enough. 60662306a36Sopenharmony_ci * 60762306a36Sopenharmony_ci * This is implemented by have a long-lived send_wr and sge which point to a 60862306a36Sopenharmony_ci * statically allocated ack frame. This ack wr does not fall under the ring 60962306a36Sopenharmony_ci * accounting that the tx and rx wrs do. The QP attribute specifically makes 61062306a36Sopenharmony_ci * room for it beyond the ring size. Send completion notices its special 61162306a36Sopenharmony_ci * wr_id and avoids working with the ring in that case. 61262306a36Sopenharmony_ci */ 61362306a36Sopenharmony_ci#ifndef KERNEL_HAS_ATOMIC64 61462306a36Sopenharmony_civoid rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required) 61562306a36Sopenharmony_ci{ 61662306a36Sopenharmony_ci unsigned long flags; 61762306a36Sopenharmony_ci 61862306a36Sopenharmony_ci spin_lock_irqsave(&ic->i_ack_lock, flags); 61962306a36Sopenharmony_ci ic->i_ack_next = seq; 62062306a36Sopenharmony_ci if (ack_required) 62162306a36Sopenharmony_ci set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 62262306a36Sopenharmony_ci spin_unlock_irqrestore(&ic->i_ack_lock, flags); 62362306a36Sopenharmony_ci} 62462306a36Sopenharmony_ci 62562306a36Sopenharmony_cistatic u64 rds_ib_get_ack(struct rds_ib_connection *ic) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci unsigned long flags; 62862306a36Sopenharmony_ci u64 seq; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 63162306a36Sopenharmony_ci 63262306a36Sopenharmony_ci spin_lock_irqsave(&ic->i_ack_lock, flags); 63362306a36Sopenharmony_ci seq = ic->i_ack_next; 63462306a36Sopenharmony_ci spin_unlock_irqrestore(&ic->i_ack_lock, flags); 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ci return seq; 63762306a36Sopenharmony_ci} 63862306a36Sopenharmony_ci#else 63962306a36Sopenharmony_civoid rds_ib_set_ack(struct rds_ib_connection *ic, u64 seq, int ack_required) 64062306a36Sopenharmony_ci{ 64162306a36Sopenharmony_ci atomic64_set(&ic->i_ack_next, seq); 64262306a36Sopenharmony_ci if (ack_required) { 64362306a36Sopenharmony_ci smp_mb__before_atomic(); 64462306a36Sopenharmony_ci set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 64562306a36Sopenharmony_ci } 64662306a36Sopenharmony_ci} 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_cistatic u64 rds_ib_get_ack(struct rds_ib_connection *ic) 64962306a36Sopenharmony_ci{ 65062306a36Sopenharmony_ci clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 65162306a36Sopenharmony_ci smp_mb__after_atomic(); 65262306a36Sopenharmony_ci 65362306a36Sopenharmony_ci return atomic64_read(&ic->i_ack_next); 65462306a36Sopenharmony_ci} 65562306a36Sopenharmony_ci#endif 65662306a36Sopenharmony_ci 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_cistatic void rds_ib_send_ack(struct rds_ib_connection *ic, unsigned int adv_credits) 65962306a36Sopenharmony_ci{ 66062306a36Sopenharmony_ci struct rds_header *hdr = ic->i_ack; 66162306a36Sopenharmony_ci u64 seq; 66262306a36Sopenharmony_ci int ret; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci seq = rds_ib_get_ack(ic); 66562306a36Sopenharmony_ci 66662306a36Sopenharmony_ci rdsdebug("send_ack: ic %p ack %llu\n", ic, (unsigned long long) seq); 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, ic->i_ack_dma, 66962306a36Sopenharmony_ci sizeof(*hdr), DMA_TO_DEVICE); 67062306a36Sopenharmony_ci rds_message_populate_header(hdr, 0, 0, 0); 67162306a36Sopenharmony_ci hdr->h_ack = cpu_to_be64(seq); 67262306a36Sopenharmony_ci hdr->h_credit = adv_credits; 67362306a36Sopenharmony_ci rds_message_make_checksum(hdr); 67462306a36Sopenharmony_ci ib_dma_sync_single_for_device(ic->rds_ibdev->dev, ic->i_ack_dma, 67562306a36Sopenharmony_ci sizeof(*hdr), DMA_TO_DEVICE); 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci ic->i_ack_queued = jiffies; 67862306a36Sopenharmony_ci 67962306a36Sopenharmony_ci ret = ib_post_send(ic->i_cm_id->qp, &ic->i_ack_wr, NULL); 68062306a36Sopenharmony_ci if (unlikely(ret)) { 68162306a36Sopenharmony_ci /* Failed to send. Release the WR, and 68262306a36Sopenharmony_ci * force another ACK. 68362306a36Sopenharmony_ci */ 68462306a36Sopenharmony_ci clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 68562306a36Sopenharmony_ci set_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_ack_send_failure); 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci rds_ib_conn_error(ic->conn, "sending ack failed\n"); 69062306a36Sopenharmony_ci } else 69162306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_ack_sent); 69262306a36Sopenharmony_ci} 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci/* 69562306a36Sopenharmony_ci * There are 3 ways of getting acknowledgements to the peer: 69662306a36Sopenharmony_ci * 1. We call rds_ib_attempt_ack from the recv completion handler 69762306a36Sopenharmony_ci * to send an ACK-only frame. 69862306a36Sopenharmony_ci * However, there can be only one such frame in the send queue 69962306a36Sopenharmony_ci * at any time, so we may have to postpone it. 70062306a36Sopenharmony_ci * 2. When another (data) packet is transmitted while there's 70162306a36Sopenharmony_ci * an ACK in the queue, we piggyback the ACK sequence number 70262306a36Sopenharmony_ci * on the data packet. 70362306a36Sopenharmony_ci * 3. If the ACK WR is done sending, we get called from the 70462306a36Sopenharmony_ci * send queue completion handler, and check whether there's 70562306a36Sopenharmony_ci * another ACK pending (postponed because the WR was on the 70662306a36Sopenharmony_ci * queue). If so, we transmit it. 70762306a36Sopenharmony_ci * 70862306a36Sopenharmony_ci * We maintain 2 variables: 70962306a36Sopenharmony_ci * - i_ack_flags, which keeps track of whether the ACK WR 71062306a36Sopenharmony_ci * is currently in the send queue or not (IB_ACK_IN_FLIGHT) 71162306a36Sopenharmony_ci * - i_ack_next, which is the last sequence number we received 71262306a36Sopenharmony_ci * 71362306a36Sopenharmony_ci * Potentially, send queue and receive queue handlers can run concurrently. 71462306a36Sopenharmony_ci * It would be nice to not have to use a spinlock to synchronize things, 71562306a36Sopenharmony_ci * but the one problem that rules this out is that 64bit updates are 71662306a36Sopenharmony_ci * not atomic on all platforms. Things would be a lot simpler if 71762306a36Sopenharmony_ci * we had atomic64 or maybe cmpxchg64 everywhere. 71862306a36Sopenharmony_ci * 71962306a36Sopenharmony_ci * Reconnecting complicates this picture just slightly. When we 72062306a36Sopenharmony_ci * reconnect, we may be seeing duplicate packets. The peer 72162306a36Sopenharmony_ci * is retransmitting them, because it hasn't seen an ACK for 72262306a36Sopenharmony_ci * them. It is important that we ACK these. 72362306a36Sopenharmony_ci * 72462306a36Sopenharmony_ci * ACK mitigation adds a header flag "ACK_REQUIRED"; any packet with 72562306a36Sopenharmony_ci * this flag set *MUST* be acknowledged immediately. 72662306a36Sopenharmony_ci */ 72762306a36Sopenharmony_ci 72862306a36Sopenharmony_ci/* 72962306a36Sopenharmony_ci * When we get here, we're called from the recv queue handler. 73062306a36Sopenharmony_ci * Check whether we ought to transmit an ACK. 73162306a36Sopenharmony_ci */ 73262306a36Sopenharmony_civoid rds_ib_attempt_ack(struct rds_ib_connection *ic) 73362306a36Sopenharmony_ci{ 73462306a36Sopenharmony_ci unsigned int adv_credits; 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci if (!test_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) 73762306a36Sopenharmony_ci return; 73862306a36Sopenharmony_ci 73962306a36Sopenharmony_ci if (test_and_set_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags)) { 74062306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_ack_send_delayed); 74162306a36Sopenharmony_ci return; 74262306a36Sopenharmony_ci } 74362306a36Sopenharmony_ci 74462306a36Sopenharmony_ci /* Can we get a send credit? */ 74562306a36Sopenharmony_ci if (!rds_ib_send_grab_credits(ic, 1, &adv_credits, 0, RDS_MAX_ADV_CREDIT)) { 74662306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_tx_throttle); 74762306a36Sopenharmony_ci clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 74862306a36Sopenharmony_ci return; 74962306a36Sopenharmony_ci } 75062306a36Sopenharmony_ci 75162306a36Sopenharmony_ci clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags); 75262306a36Sopenharmony_ci rds_ib_send_ack(ic, adv_credits); 75362306a36Sopenharmony_ci} 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci/* 75662306a36Sopenharmony_ci * We get here from the send completion handler, when the 75762306a36Sopenharmony_ci * adapter tells us the ACK frame was sent. 75862306a36Sopenharmony_ci */ 75962306a36Sopenharmony_civoid rds_ib_ack_send_complete(struct rds_ib_connection *ic) 76062306a36Sopenharmony_ci{ 76162306a36Sopenharmony_ci clear_bit(IB_ACK_IN_FLIGHT, &ic->i_ack_flags); 76262306a36Sopenharmony_ci rds_ib_attempt_ack(ic); 76362306a36Sopenharmony_ci} 76462306a36Sopenharmony_ci 76562306a36Sopenharmony_ci/* 76662306a36Sopenharmony_ci * This is called by the regular xmit code when it wants to piggyback 76762306a36Sopenharmony_ci * an ACK on an outgoing frame. 76862306a36Sopenharmony_ci */ 76962306a36Sopenharmony_ciu64 rds_ib_piggyb_ack(struct rds_ib_connection *ic) 77062306a36Sopenharmony_ci{ 77162306a36Sopenharmony_ci if (test_and_clear_bit(IB_ACK_REQUESTED, &ic->i_ack_flags)) 77262306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_ack_send_piggybacked); 77362306a36Sopenharmony_ci return rds_ib_get_ack(ic); 77462306a36Sopenharmony_ci} 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci/* 77762306a36Sopenharmony_ci * It's kind of lame that we're copying from the posted receive pages into 77862306a36Sopenharmony_ci * long-lived bitmaps. We could have posted the bitmaps and rdma written into 77962306a36Sopenharmony_ci * them. But receiving new congestion bitmaps should be a *rare* event, so 78062306a36Sopenharmony_ci * hopefully we won't need to invest that complexity in making it more 78162306a36Sopenharmony_ci * efficient. By copying we can share a simpler core with TCP which has to 78262306a36Sopenharmony_ci * copy. 78362306a36Sopenharmony_ci */ 78462306a36Sopenharmony_cistatic void rds_ib_cong_recv(struct rds_connection *conn, 78562306a36Sopenharmony_ci struct rds_ib_incoming *ibinc) 78662306a36Sopenharmony_ci{ 78762306a36Sopenharmony_ci struct rds_cong_map *map; 78862306a36Sopenharmony_ci unsigned int map_off; 78962306a36Sopenharmony_ci unsigned int map_page; 79062306a36Sopenharmony_ci struct rds_page_frag *frag; 79162306a36Sopenharmony_ci unsigned long frag_off; 79262306a36Sopenharmony_ci unsigned long to_copy; 79362306a36Sopenharmony_ci unsigned long copied; 79462306a36Sopenharmony_ci __le64 uncongested = 0; 79562306a36Sopenharmony_ci void *addr; 79662306a36Sopenharmony_ci 79762306a36Sopenharmony_ci /* catch completely corrupt packets */ 79862306a36Sopenharmony_ci if (be32_to_cpu(ibinc->ii_inc.i_hdr.h_len) != RDS_CONG_MAP_BYTES) 79962306a36Sopenharmony_ci return; 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci map = conn->c_fcong; 80262306a36Sopenharmony_ci map_page = 0; 80362306a36Sopenharmony_ci map_off = 0; 80462306a36Sopenharmony_ci 80562306a36Sopenharmony_ci frag = list_entry(ibinc->ii_frags.next, struct rds_page_frag, f_item); 80662306a36Sopenharmony_ci frag_off = 0; 80762306a36Sopenharmony_ci 80862306a36Sopenharmony_ci copied = 0; 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci while (copied < RDS_CONG_MAP_BYTES) { 81162306a36Sopenharmony_ci __le64 *src, *dst; 81262306a36Sopenharmony_ci unsigned int k; 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci to_copy = min(RDS_FRAG_SIZE - frag_off, PAGE_SIZE - map_off); 81562306a36Sopenharmony_ci BUG_ON(to_copy & 7); /* Must be 64bit aligned. */ 81662306a36Sopenharmony_ci 81762306a36Sopenharmony_ci addr = kmap_atomic(sg_page(&frag->f_sg)); 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci src = addr + frag->f_sg.offset + frag_off; 82062306a36Sopenharmony_ci dst = (void *)map->m_page_addrs[map_page] + map_off; 82162306a36Sopenharmony_ci for (k = 0; k < to_copy; k += 8) { 82262306a36Sopenharmony_ci /* Record ports that became uncongested, ie 82362306a36Sopenharmony_ci * bits that changed from 0 to 1. */ 82462306a36Sopenharmony_ci uncongested |= ~(*src) & *dst; 82562306a36Sopenharmony_ci *dst++ = *src++; 82662306a36Sopenharmony_ci } 82762306a36Sopenharmony_ci kunmap_atomic(addr); 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci copied += to_copy; 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci map_off += to_copy; 83262306a36Sopenharmony_ci if (map_off == PAGE_SIZE) { 83362306a36Sopenharmony_ci map_off = 0; 83462306a36Sopenharmony_ci map_page++; 83562306a36Sopenharmony_ci } 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci frag_off += to_copy; 83862306a36Sopenharmony_ci if (frag_off == RDS_FRAG_SIZE) { 83962306a36Sopenharmony_ci frag = list_entry(frag->f_item.next, 84062306a36Sopenharmony_ci struct rds_page_frag, f_item); 84162306a36Sopenharmony_ci frag_off = 0; 84262306a36Sopenharmony_ci } 84362306a36Sopenharmony_ci } 84462306a36Sopenharmony_ci 84562306a36Sopenharmony_ci /* the congestion map is in little endian order */ 84662306a36Sopenharmony_ci rds_cong_map_updated(map, le64_to_cpu(uncongested)); 84762306a36Sopenharmony_ci} 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_cistatic void rds_ib_process_recv(struct rds_connection *conn, 85062306a36Sopenharmony_ci struct rds_ib_recv_work *recv, u32 data_len, 85162306a36Sopenharmony_ci struct rds_ib_ack_state *state) 85262306a36Sopenharmony_ci{ 85362306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 85462306a36Sopenharmony_ci struct rds_ib_incoming *ibinc = ic->i_ibinc; 85562306a36Sopenharmony_ci struct rds_header *ihdr, *hdr; 85662306a36Sopenharmony_ci dma_addr_t dma_addr = ic->i_recv_hdrs_dma[recv - ic->i_recvs]; 85762306a36Sopenharmony_ci 85862306a36Sopenharmony_ci /* XXX shut down the connection if port 0,0 are seen? */ 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci rdsdebug("ic %p ibinc %p recv %p byte len %u\n", ic, ibinc, recv, 86162306a36Sopenharmony_ci data_len); 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci if (data_len < sizeof(struct rds_header)) { 86462306a36Sopenharmony_ci rds_ib_conn_error(conn, "incoming message " 86562306a36Sopenharmony_ci "from %pI6c didn't include a " 86662306a36Sopenharmony_ci "header, disconnecting and " 86762306a36Sopenharmony_ci "reconnecting\n", 86862306a36Sopenharmony_ci &conn->c_faddr); 86962306a36Sopenharmony_ci return; 87062306a36Sopenharmony_ci } 87162306a36Sopenharmony_ci data_len -= sizeof(struct rds_header); 87262306a36Sopenharmony_ci 87362306a36Sopenharmony_ci ihdr = ic->i_recv_hdrs[recv - ic->i_recvs]; 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(ic->rds_ibdev->dev, dma_addr, 87662306a36Sopenharmony_ci sizeof(*ihdr), DMA_FROM_DEVICE); 87762306a36Sopenharmony_ci /* Validate the checksum. */ 87862306a36Sopenharmony_ci if (!rds_message_verify_checksum(ihdr)) { 87962306a36Sopenharmony_ci rds_ib_conn_error(conn, "incoming message " 88062306a36Sopenharmony_ci "from %pI6c has corrupted header - " 88162306a36Sopenharmony_ci "forcing a reconnect\n", 88262306a36Sopenharmony_ci &conn->c_faddr); 88362306a36Sopenharmony_ci rds_stats_inc(s_recv_drop_bad_checksum); 88462306a36Sopenharmony_ci goto done; 88562306a36Sopenharmony_ci } 88662306a36Sopenharmony_ci 88762306a36Sopenharmony_ci /* Process the ACK sequence which comes with every packet */ 88862306a36Sopenharmony_ci state->ack_recv = be64_to_cpu(ihdr->h_ack); 88962306a36Sopenharmony_ci state->ack_recv_valid = 1; 89062306a36Sopenharmony_ci 89162306a36Sopenharmony_ci /* Process the credits update if there was one */ 89262306a36Sopenharmony_ci if (ihdr->h_credit) 89362306a36Sopenharmony_ci rds_ib_send_add_credits(conn, ihdr->h_credit); 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci if (ihdr->h_sport == 0 && ihdr->h_dport == 0 && data_len == 0) { 89662306a36Sopenharmony_ci /* This is an ACK-only packet. The fact that it gets 89762306a36Sopenharmony_ci * special treatment here is that historically, ACKs 89862306a36Sopenharmony_ci * were rather special beasts. 89962306a36Sopenharmony_ci */ 90062306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_ack_received); 90162306a36Sopenharmony_ci 90262306a36Sopenharmony_ci /* 90362306a36Sopenharmony_ci * Usually the frags make their way on to incs and are then freed as 90462306a36Sopenharmony_ci * the inc is freed. We don't go that route, so we have to drop the 90562306a36Sopenharmony_ci * page ref ourselves. We can't just leave the page on the recv 90662306a36Sopenharmony_ci * because that confuses the dma mapping of pages and each recv's use 90762306a36Sopenharmony_ci * of a partial page. 90862306a36Sopenharmony_ci * 90962306a36Sopenharmony_ci * FIXME: Fold this into the code path below. 91062306a36Sopenharmony_ci */ 91162306a36Sopenharmony_ci rds_ib_frag_free(ic, recv->r_frag); 91262306a36Sopenharmony_ci recv->r_frag = NULL; 91362306a36Sopenharmony_ci goto done; 91462306a36Sopenharmony_ci } 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci /* 91762306a36Sopenharmony_ci * If we don't already have an inc on the connection then this 91862306a36Sopenharmony_ci * fragment has a header and starts a message.. copy its header 91962306a36Sopenharmony_ci * into the inc and save the inc so we can hang upcoming fragments 92062306a36Sopenharmony_ci * off its list. 92162306a36Sopenharmony_ci */ 92262306a36Sopenharmony_ci if (!ibinc) { 92362306a36Sopenharmony_ci ibinc = recv->r_ibinc; 92462306a36Sopenharmony_ci recv->r_ibinc = NULL; 92562306a36Sopenharmony_ci ic->i_ibinc = ibinc; 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci hdr = &ibinc->ii_inc.i_hdr; 92862306a36Sopenharmony_ci ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_HDR] = 92962306a36Sopenharmony_ci local_clock(); 93062306a36Sopenharmony_ci memcpy(hdr, ihdr, sizeof(*hdr)); 93162306a36Sopenharmony_ci ic->i_recv_data_rem = be32_to_cpu(hdr->h_len); 93262306a36Sopenharmony_ci ibinc->ii_inc.i_rx_lat_trace[RDS_MSG_RX_START] = 93362306a36Sopenharmony_ci local_clock(); 93462306a36Sopenharmony_ci 93562306a36Sopenharmony_ci rdsdebug("ic %p ibinc %p rem %u flag 0x%x\n", ic, ibinc, 93662306a36Sopenharmony_ci ic->i_recv_data_rem, hdr->h_flags); 93762306a36Sopenharmony_ci } else { 93862306a36Sopenharmony_ci hdr = &ibinc->ii_inc.i_hdr; 93962306a36Sopenharmony_ci /* We can't just use memcmp here; fragments of a 94062306a36Sopenharmony_ci * single message may carry different ACKs */ 94162306a36Sopenharmony_ci if (hdr->h_sequence != ihdr->h_sequence || 94262306a36Sopenharmony_ci hdr->h_len != ihdr->h_len || 94362306a36Sopenharmony_ci hdr->h_sport != ihdr->h_sport || 94462306a36Sopenharmony_ci hdr->h_dport != ihdr->h_dport) { 94562306a36Sopenharmony_ci rds_ib_conn_error(conn, 94662306a36Sopenharmony_ci "fragment header mismatch; forcing reconnect\n"); 94762306a36Sopenharmony_ci goto done; 94862306a36Sopenharmony_ci } 94962306a36Sopenharmony_ci } 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_ci list_add_tail(&recv->r_frag->f_item, &ibinc->ii_frags); 95262306a36Sopenharmony_ci recv->r_frag = NULL; 95362306a36Sopenharmony_ci 95462306a36Sopenharmony_ci if (ic->i_recv_data_rem > RDS_FRAG_SIZE) 95562306a36Sopenharmony_ci ic->i_recv_data_rem -= RDS_FRAG_SIZE; 95662306a36Sopenharmony_ci else { 95762306a36Sopenharmony_ci ic->i_recv_data_rem = 0; 95862306a36Sopenharmony_ci ic->i_ibinc = NULL; 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci if (ibinc->ii_inc.i_hdr.h_flags == RDS_FLAG_CONG_BITMAP) { 96162306a36Sopenharmony_ci rds_ib_cong_recv(conn, ibinc); 96262306a36Sopenharmony_ci } else { 96362306a36Sopenharmony_ci rds_recv_incoming(conn, &conn->c_faddr, &conn->c_laddr, 96462306a36Sopenharmony_ci &ibinc->ii_inc, GFP_ATOMIC); 96562306a36Sopenharmony_ci state->ack_next = be64_to_cpu(hdr->h_sequence); 96662306a36Sopenharmony_ci state->ack_next_valid = 1; 96762306a36Sopenharmony_ci } 96862306a36Sopenharmony_ci 96962306a36Sopenharmony_ci /* Evaluate the ACK_REQUIRED flag *after* we received 97062306a36Sopenharmony_ci * the complete frame, and after bumping the next_rx 97162306a36Sopenharmony_ci * sequence. */ 97262306a36Sopenharmony_ci if (hdr->h_flags & RDS_FLAG_ACK_REQUIRED) { 97362306a36Sopenharmony_ci rds_stats_inc(s_recv_ack_required); 97462306a36Sopenharmony_ci state->ack_required = 1; 97562306a36Sopenharmony_ci } 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_ci rds_inc_put(&ibinc->ii_inc); 97862306a36Sopenharmony_ci } 97962306a36Sopenharmony_cidone: 98062306a36Sopenharmony_ci ib_dma_sync_single_for_device(ic->rds_ibdev->dev, dma_addr, 98162306a36Sopenharmony_ci sizeof(*ihdr), DMA_FROM_DEVICE); 98262306a36Sopenharmony_ci} 98362306a36Sopenharmony_ci 98462306a36Sopenharmony_civoid rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, 98562306a36Sopenharmony_ci struct ib_wc *wc, 98662306a36Sopenharmony_ci struct rds_ib_ack_state *state) 98762306a36Sopenharmony_ci{ 98862306a36Sopenharmony_ci struct rds_connection *conn = ic->conn; 98962306a36Sopenharmony_ci struct rds_ib_recv_work *recv; 99062306a36Sopenharmony_ci 99162306a36Sopenharmony_ci rdsdebug("wc wr_id 0x%llx status %u (%s) byte_len %u imm_data %u\n", 99262306a36Sopenharmony_ci (unsigned long long)wc->wr_id, wc->status, 99362306a36Sopenharmony_ci ib_wc_status_msg(wc->status), wc->byte_len, 99462306a36Sopenharmony_ci be32_to_cpu(wc->ex.imm_data)); 99562306a36Sopenharmony_ci 99662306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rx_cq_event); 99762306a36Sopenharmony_ci recv = &ic->i_recvs[rds_ib_ring_oldest(&ic->i_recv_ring)]; 99862306a36Sopenharmony_ci ib_dma_unmap_sg(ic->i_cm_id->device, &recv->r_frag->f_sg, 1, 99962306a36Sopenharmony_ci DMA_FROM_DEVICE); 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_ci /* Also process recvs in connecting state because it is possible 100262306a36Sopenharmony_ci * to get a recv completion _before_ the rdmacm ESTABLISHED 100362306a36Sopenharmony_ci * event is processed. 100462306a36Sopenharmony_ci */ 100562306a36Sopenharmony_ci if (wc->status == IB_WC_SUCCESS) { 100662306a36Sopenharmony_ci rds_ib_process_recv(conn, recv, wc->byte_len, state); 100762306a36Sopenharmony_ci } else { 100862306a36Sopenharmony_ci /* We expect errors as the qp is drained during shutdown */ 100962306a36Sopenharmony_ci if (rds_conn_up(conn) || rds_conn_connecting(conn)) 101062306a36Sopenharmony_ci rds_ib_conn_error(conn, "recv completion on <%pI6c,%pI6c, %d> had status %u (%s), vendor err 0x%x, disconnecting and reconnecting\n", 101162306a36Sopenharmony_ci &conn->c_laddr, &conn->c_faddr, 101262306a36Sopenharmony_ci conn->c_tos, wc->status, 101362306a36Sopenharmony_ci ib_wc_status_msg(wc->status), 101462306a36Sopenharmony_ci wc->vendor_err); 101562306a36Sopenharmony_ci } 101662306a36Sopenharmony_ci 101762306a36Sopenharmony_ci /* rds_ib_process_recv() doesn't always consume the frag, and 101862306a36Sopenharmony_ci * we might not have called it at all if the wc didn't indicate 101962306a36Sopenharmony_ci * success. We already unmapped the frag's pages, though, and 102062306a36Sopenharmony_ci * the following rds_ib_ring_free() call tells the refill path 102162306a36Sopenharmony_ci * that it will not find an allocated frag here. Make sure we 102262306a36Sopenharmony_ci * keep that promise by freeing a frag that's still on the ring. 102362306a36Sopenharmony_ci */ 102462306a36Sopenharmony_ci if (recv->r_frag) { 102562306a36Sopenharmony_ci rds_ib_frag_free(ic, recv->r_frag); 102662306a36Sopenharmony_ci recv->r_frag = NULL; 102762306a36Sopenharmony_ci } 102862306a36Sopenharmony_ci rds_ib_ring_free(&ic->i_recv_ring, 1); 102962306a36Sopenharmony_ci 103062306a36Sopenharmony_ci /* If we ever end up with a really empty receive ring, we're 103162306a36Sopenharmony_ci * in deep trouble, as the sender will definitely see RNR 103262306a36Sopenharmony_ci * timeouts. */ 103362306a36Sopenharmony_ci if (rds_ib_ring_empty(&ic->i_recv_ring)) 103462306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rx_ring_empty); 103562306a36Sopenharmony_ci 103662306a36Sopenharmony_ci if (rds_ib_ring_low(&ic->i_recv_ring)) { 103762306a36Sopenharmony_ci rds_ib_recv_refill(conn, 0, GFP_NOWAIT | __GFP_NOWARN); 103862306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rx_refill_from_cq); 103962306a36Sopenharmony_ci } 104062306a36Sopenharmony_ci} 104162306a36Sopenharmony_ci 104262306a36Sopenharmony_ciint rds_ib_recv_path(struct rds_conn_path *cp) 104362306a36Sopenharmony_ci{ 104462306a36Sopenharmony_ci struct rds_connection *conn = cp->cp_conn; 104562306a36Sopenharmony_ci struct rds_ib_connection *ic = conn->c_transport_data; 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci rdsdebug("conn %p\n", conn); 104862306a36Sopenharmony_ci if (rds_conn_up(conn)) { 104962306a36Sopenharmony_ci rds_ib_attempt_ack(ic); 105062306a36Sopenharmony_ci rds_ib_recv_refill(conn, 0, GFP_KERNEL); 105162306a36Sopenharmony_ci rds_ib_stats_inc(s_ib_rx_refill_from_thread); 105262306a36Sopenharmony_ci } 105362306a36Sopenharmony_ci 105462306a36Sopenharmony_ci return 0; 105562306a36Sopenharmony_ci} 105662306a36Sopenharmony_ci 105762306a36Sopenharmony_ciint rds_ib_recv_init(void) 105862306a36Sopenharmony_ci{ 105962306a36Sopenharmony_ci struct sysinfo si; 106062306a36Sopenharmony_ci int ret = -ENOMEM; 106162306a36Sopenharmony_ci 106262306a36Sopenharmony_ci /* Default to 30% of all available RAM for recv memory */ 106362306a36Sopenharmony_ci si_meminfo(&si); 106462306a36Sopenharmony_ci rds_ib_sysctl_max_recv_allocation = si.totalram / 3 * PAGE_SIZE / RDS_FRAG_SIZE; 106562306a36Sopenharmony_ci 106662306a36Sopenharmony_ci rds_ib_incoming_slab = 106762306a36Sopenharmony_ci kmem_cache_create_usercopy("rds_ib_incoming", 106862306a36Sopenharmony_ci sizeof(struct rds_ib_incoming), 106962306a36Sopenharmony_ci 0, SLAB_HWCACHE_ALIGN, 107062306a36Sopenharmony_ci offsetof(struct rds_ib_incoming, 107162306a36Sopenharmony_ci ii_inc.i_usercopy), 107262306a36Sopenharmony_ci sizeof(struct rds_inc_usercopy), 107362306a36Sopenharmony_ci NULL); 107462306a36Sopenharmony_ci if (!rds_ib_incoming_slab) 107562306a36Sopenharmony_ci goto out; 107662306a36Sopenharmony_ci 107762306a36Sopenharmony_ci rds_ib_frag_slab = kmem_cache_create("rds_ib_frag", 107862306a36Sopenharmony_ci sizeof(struct rds_page_frag), 107962306a36Sopenharmony_ci 0, SLAB_HWCACHE_ALIGN, NULL); 108062306a36Sopenharmony_ci if (!rds_ib_frag_slab) { 108162306a36Sopenharmony_ci kmem_cache_destroy(rds_ib_incoming_slab); 108262306a36Sopenharmony_ci rds_ib_incoming_slab = NULL; 108362306a36Sopenharmony_ci } else 108462306a36Sopenharmony_ci ret = 0; 108562306a36Sopenharmony_ciout: 108662306a36Sopenharmony_ci return ret; 108762306a36Sopenharmony_ci} 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_civoid rds_ib_recv_exit(void) 109062306a36Sopenharmony_ci{ 109162306a36Sopenharmony_ci WARN_ON(atomic_read(&rds_ib_allocation)); 109262306a36Sopenharmony_ci 109362306a36Sopenharmony_ci kmem_cache_destroy(rds_ib_incoming_slab); 109462306a36Sopenharmony_ci kmem_cache_destroy(rds_ib_frag_slab); 109562306a36Sopenharmony_ci} 1096