162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) 2006 Oracle. All rights reserved. 362306a36Sopenharmony_ci * 462306a36Sopenharmony_ci * This software is available to you under a choice of one of two 562306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the 862306a36Sopenharmony_ci * OpenIB.org BSD license below: 962306a36Sopenharmony_ci * 1062306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or 1162306a36Sopenharmony_ci * without modification, are permitted provided that the following 1262306a36Sopenharmony_ci * conditions are met: 1362306a36Sopenharmony_ci * 1462306a36Sopenharmony_ci * - Redistributions of source code must retain the above 1562306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 1662306a36Sopenharmony_ci * disclaimer. 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * - Redistributions in binary form must reproduce the above 1962306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2062306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials 2162306a36Sopenharmony_ci * provided with the distribution. 2262306a36Sopenharmony_ci * 2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 3062306a36Sopenharmony_ci * SOFTWARE. 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci#include <linux/highmem.h> 3462306a36Sopenharmony_ci#include <linux/gfp.h> 3562306a36Sopenharmony_ci#include <linux/cpu.h> 3662306a36Sopenharmony_ci#include <linux/export.h> 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci#include "rds.h" 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cistruct rds_page_remainder { 4162306a36Sopenharmony_ci struct page *r_page; 4262306a36Sopenharmony_ci unsigned long r_offset; 4362306a36Sopenharmony_ci}; 4462306a36Sopenharmony_ci 4562306a36Sopenharmony_cistatic 4662306a36Sopenharmony_ciDEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders); 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci/** 4962306a36Sopenharmony_ci * rds_page_remainder_alloc - build up regions of a message. 5062306a36Sopenharmony_ci * 5162306a36Sopenharmony_ci * @scat: Scatter list for message 5262306a36Sopenharmony_ci * @bytes: the number of bytes needed. 5362306a36Sopenharmony_ci * @gfp: the waiting behaviour of the allocation 5462306a36Sopenharmony_ci * 5562306a36Sopenharmony_ci * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to 5662306a36Sopenharmony_ci * kmap the pages, etc. 5762306a36Sopenharmony_ci * 5862306a36Sopenharmony_ci * If @bytes is at least a full page then this just returns a page from 5962306a36Sopenharmony_ci * alloc_page(). 6062306a36Sopenharmony_ci * 6162306a36Sopenharmony_ci * If @bytes is a partial page then this stores the unused region of the 6262306a36Sopenharmony_ci * page in a per-cpu structure. Future partial-page allocations may be 6362306a36Sopenharmony_ci * satisfied from that cached region. This lets us waste less memory on 6462306a36Sopenharmony_ci * small allocations with minimal complexity. It works because the transmit 6562306a36Sopenharmony_ci * path passes read-only page regions down to devices. They hold a page 6662306a36Sopenharmony_ci * reference until they are done with the region. 6762306a36Sopenharmony_ci */ 6862306a36Sopenharmony_ciint rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes, 6962306a36Sopenharmony_ci gfp_t gfp) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci struct rds_page_remainder *rem; 7262306a36Sopenharmony_ci unsigned long flags; 7362306a36Sopenharmony_ci struct page *page; 7462306a36Sopenharmony_ci int ret; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci gfp |= __GFP_HIGHMEM; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci /* jump straight to allocation if we're trying for a huge page */ 7962306a36Sopenharmony_ci if (bytes >= PAGE_SIZE) { 8062306a36Sopenharmony_ci page = alloc_page(gfp); 8162306a36Sopenharmony_ci if (!page) { 8262306a36Sopenharmony_ci ret = -ENOMEM; 8362306a36Sopenharmony_ci } else { 8462306a36Sopenharmony_ci sg_set_page(scat, page, PAGE_SIZE, 0); 8562306a36Sopenharmony_ci ret = 0; 8662306a36Sopenharmony_ci } 8762306a36Sopenharmony_ci goto out; 8862306a36Sopenharmony_ci } 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci rem = &per_cpu(rds_page_remainders, get_cpu()); 9162306a36Sopenharmony_ci local_irq_save(flags); 9262306a36Sopenharmony_ci 9362306a36Sopenharmony_ci while (1) { 9462306a36Sopenharmony_ci /* avoid a tiny region getting stuck by tossing it */ 9562306a36Sopenharmony_ci if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) { 9662306a36Sopenharmony_ci rds_stats_inc(s_page_remainder_miss); 9762306a36Sopenharmony_ci __free_page(rem->r_page); 9862306a36Sopenharmony_ci rem->r_page = NULL; 9962306a36Sopenharmony_ci } 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci /* hand out a fragment from the cached page */ 10262306a36Sopenharmony_ci if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) { 10362306a36Sopenharmony_ci sg_set_page(scat, rem->r_page, bytes, rem->r_offset); 10462306a36Sopenharmony_ci get_page(sg_page(scat)); 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci if (rem->r_offset != 0) 10762306a36Sopenharmony_ci rds_stats_inc(s_page_remainder_hit); 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci rem->r_offset += ALIGN(bytes, 8); 11062306a36Sopenharmony_ci if (rem->r_offset >= PAGE_SIZE) { 11162306a36Sopenharmony_ci __free_page(rem->r_page); 11262306a36Sopenharmony_ci rem->r_page = NULL; 11362306a36Sopenharmony_ci } 11462306a36Sopenharmony_ci ret = 0; 11562306a36Sopenharmony_ci break; 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci /* alloc if there is nothing for us to use */ 11962306a36Sopenharmony_ci local_irq_restore(flags); 12062306a36Sopenharmony_ci put_cpu(); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci page = alloc_page(gfp); 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ci rem = &per_cpu(rds_page_remainders, get_cpu()); 12562306a36Sopenharmony_ci local_irq_save(flags); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci if (!page) { 12862306a36Sopenharmony_ci ret = -ENOMEM; 12962306a36Sopenharmony_ci break; 13062306a36Sopenharmony_ci } 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci /* did someone race to fill the remainder before us? */ 13362306a36Sopenharmony_ci if (rem->r_page) { 13462306a36Sopenharmony_ci __free_page(page); 13562306a36Sopenharmony_ci continue; 13662306a36Sopenharmony_ci } 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci /* otherwise install our page and loop around to alloc */ 13962306a36Sopenharmony_ci rem->r_page = page; 14062306a36Sopenharmony_ci rem->r_offset = 0; 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci local_irq_restore(flags); 14462306a36Sopenharmony_ci put_cpu(); 14562306a36Sopenharmony_ciout: 14662306a36Sopenharmony_ci rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret, 14762306a36Sopenharmony_ci ret ? NULL : sg_page(scat), ret ? 0 : scat->offset, 14862306a36Sopenharmony_ci ret ? 0 : scat->length); 14962306a36Sopenharmony_ci return ret; 15062306a36Sopenharmony_ci} 15162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_page_remainder_alloc); 15262306a36Sopenharmony_ci 15362306a36Sopenharmony_civoid rds_page_exit(void) 15462306a36Sopenharmony_ci{ 15562306a36Sopenharmony_ci unsigned int cpu; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci for_each_possible_cpu(cpu) { 15862306a36Sopenharmony_ci struct rds_page_remainder *rem; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci rem = &per_cpu(rds_page_remainders, cpu); 16162306a36Sopenharmony_ci rdsdebug("cpu %u\n", cpu); 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci if (rem->r_page) 16462306a36Sopenharmony_ci __free_page(rem->r_page); 16562306a36Sopenharmony_ci rem->r_page = NULL; 16662306a36Sopenharmony_ci } 16762306a36Sopenharmony_ci} 168