18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (c) 2006 Oracle. All rights reserved. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or 118c2ecf20Sopenharmony_ci * without modification, are permitted provided that the following 128c2ecf20Sopenharmony_ci * conditions are met: 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above 158c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 168c2ecf20Sopenharmony_ci * disclaimer. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above 198c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 208c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials 218c2ecf20Sopenharmony_ci * provided with the distribution. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308c2ecf20Sopenharmony_ci * SOFTWARE. 318c2ecf20Sopenharmony_ci * 328c2ecf20Sopenharmony_ci */ 338c2ecf20Sopenharmony_ci#include <linux/highmem.h> 348c2ecf20Sopenharmony_ci#include <linux/gfp.h> 358c2ecf20Sopenharmony_ci#include <linux/cpu.h> 368c2ecf20Sopenharmony_ci#include <linux/export.h> 378c2ecf20Sopenharmony_ci 388c2ecf20Sopenharmony_ci#include "rds.h" 398c2ecf20Sopenharmony_ci 408c2ecf20Sopenharmony_cistruct rds_page_remainder { 418c2ecf20Sopenharmony_ci struct page *r_page; 428c2ecf20Sopenharmony_ci unsigned long r_offset; 438c2ecf20Sopenharmony_ci}; 448c2ecf20Sopenharmony_ci 458c2ecf20Sopenharmony_cistatic 468c2ecf20Sopenharmony_ciDEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci/** 498c2ecf20Sopenharmony_ci * rds_page_remainder_alloc - build up regions of a message. 508c2ecf20Sopenharmony_ci * 518c2ecf20Sopenharmony_ci * @scat: Scatter list for message 528c2ecf20Sopenharmony_ci * @bytes: the number of bytes needed. 538c2ecf20Sopenharmony_ci * @gfp: the waiting behaviour of the allocation 548c2ecf20Sopenharmony_ci * 558c2ecf20Sopenharmony_ci * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to 568c2ecf20Sopenharmony_ci * kmap the pages, etc. 578c2ecf20Sopenharmony_ci * 588c2ecf20Sopenharmony_ci * If @bytes is at least a full page then this just returns a page from 598c2ecf20Sopenharmony_ci * alloc_page(). 608c2ecf20Sopenharmony_ci * 618c2ecf20Sopenharmony_ci * If @bytes is a partial page then this stores the unused region of the 628c2ecf20Sopenharmony_ci * page in a per-cpu structure. Future partial-page allocations may be 638c2ecf20Sopenharmony_ci * satisfied from that cached region. This lets us waste less memory on 648c2ecf20Sopenharmony_ci * small allocations with minimal complexity. It works because the transmit 658c2ecf20Sopenharmony_ci * path passes read-only page regions down to devices. They hold a page 668c2ecf20Sopenharmony_ci * reference until they are done with the region. 678c2ecf20Sopenharmony_ci */ 688c2ecf20Sopenharmony_ciint rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, 698c2ecf20Sopenharmony_ci gfp_t gfp) 708c2ecf20Sopenharmony_ci{ 718c2ecf20Sopenharmony_ci struct rds_page_remainder *rem; 728c2ecf20Sopenharmony_ci unsigned long flags; 738c2ecf20Sopenharmony_ci struct page *page; 748c2ecf20Sopenharmony_ci int ret; 758c2ecf20Sopenharmony_ci 768c2ecf20Sopenharmony_ci gfp |= __GFP_HIGHMEM; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci /* jump straight to allocation if we're trying for a huge page */ 798c2ecf20Sopenharmony_ci if (bytes >= PAGE_SIZE) { 808c2ecf20Sopenharmony_ci page = alloc_page(gfp); 818c2ecf20Sopenharmony_ci if (!page) { 828c2ecf20Sopenharmony_ci ret = -ENOMEM; 838c2ecf20Sopenharmony_ci } else { 848c2ecf20Sopenharmony_ci sg_set_page(scat, page, PAGE_SIZE, 0); 858c2ecf20Sopenharmony_ci ret = 0; 868c2ecf20Sopenharmony_ci } 878c2ecf20Sopenharmony_ci goto out; 888c2ecf20Sopenharmony_ci } 898c2ecf20Sopenharmony_ci 908c2ecf20Sopenharmony_ci rem = &per_cpu(rds_page_remainders, get_cpu()); 918c2ecf20Sopenharmony_ci local_irq_save(flags); 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci while (1) { 948c2ecf20Sopenharmony_ci /* avoid a tiny region getting stuck by tossing it */ 958c2ecf20Sopenharmony_ci if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) { 968c2ecf20Sopenharmony_ci rds_stats_inc(s_page_remainder_miss); 978c2ecf20Sopenharmony_ci __free_page(rem->r_page); 988c2ecf20Sopenharmony_ci rem->r_page = NULL; 998c2ecf20Sopenharmony_ci } 1008c2ecf20Sopenharmony_ci 1018c2ecf20Sopenharmony_ci /* hand out a fragment from the cached page */ 1028c2ecf20Sopenharmony_ci if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) { 1038c2ecf20Sopenharmony_ci sg_set_page(scat, rem->r_page, bytes, rem->r_offset); 1048c2ecf20Sopenharmony_ci get_page(sg_page(scat)); 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_ci if (rem->r_offset != 0) 1078c2ecf20Sopenharmony_ci rds_stats_inc(s_page_remainder_hit); 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci rem->r_offset += ALIGN(bytes, 8); 1108c2ecf20Sopenharmony_ci if (rem->r_offset >= PAGE_SIZE) { 1118c2ecf20Sopenharmony_ci __free_page(rem->r_page); 1128c2ecf20Sopenharmony_ci rem->r_page = NULL; 1138c2ecf20Sopenharmony_ci } 1148c2ecf20Sopenharmony_ci ret = 0; 1158c2ecf20Sopenharmony_ci break; 1168c2ecf20Sopenharmony_ci } 1178c2ecf20Sopenharmony_ci 1188c2ecf20Sopenharmony_ci /* alloc if there is nothing for us to use */ 1198c2ecf20Sopenharmony_ci local_irq_restore(flags); 1208c2ecf20Sopenharmony_ci put_cpu(); 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci page = alloc_page(gfp); 1238c2ecf20Sopenharmony_ci 1248c2ecf20Sopenharmony_ci rem = &per_cpu(rds_page_remainders, get_cpu()); 1258c2ecf20Sopenharmony_ci local_irq_save(flags); 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_ci if (!page) { 1288c2ecf20Sopenharmony_ci ret = -ENOMEM; 1298c2ecf20Sopenharmony_ci break; 1308c2ecf20Sopenharmony_ci } 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci /* did someone race to fill the remainder before us? */ 1338c2ecf20Sopenharmony_ci if (rem->r_page) { 1348c2ecf20Sopenharmony_ci __free_page(page); 1358c2ecf20Sopenharmony_ci continue; 1368c2ecf20Sopenharmony_ci } 1378c2ecf20Sopenharmony_ci 1388c2ecf20Sopenharmony_ci /* otherwise install our page and loop around to alloc */ 1398c2ecf20Sopenharmony_ci rem->r_page = page; 1408c2ecf20Sopenharmony_ci rem->r_offset = 0; 1418c2ecf20Sopenharmony_ci } 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci local_irq_restore(flags); 1448c2ecf20Sopenharmony_ci put_cpu(); 1458c2ecf20Sopenharmony_ciout: 1468c2ecf20Sopenharmony_ci rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret, 1478c2ecf20Sopenharmony_ci ret ? NULL : sg_page(scat), ret ? 0 : scat->offset, 1488c2ecf20Sopenharmony_ci ret ? 0 : scat->length); 1498c2ecf20Sopenharmony_ci return ret; 1508c2ecf20Sopenharmony_ci} 1518c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_page_remainder_alloc); 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_civoid rds_page_exit(void) 1548c2ecf20Sopenharmony_ci{ 1558c2ecf20Sopenharmony_ci unsigned int cpu; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci for_each_possible_cpu(cpu) { 1588c2ecf20Sopenharmony_ci struct rds_page_remainder *rem; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci rem = &per_cpu(rds_page_remainders, cpu); 1618c2ecf20Sopenharmony_ci rdsdebug("cpu %u\n", cpu); 1628c2ecf20Sopenharmony_ci 1638c2ecf20Sopenharmony_ci if (rem->r_page) 1648c2ecf20Sopenharmony_ci __free_page(rem->r_page); 1658c2ecf20Sopenharmony_ci rem->r_page = NULL; 1668c2ecf20Sopenharmony_ci } 1678c2ecf20Sopenharmony_ci} 168