162306a36Sopenharmony_ci/*
262306a36Sopenharmony_ci * Copyright (c) 2006 Oracle.  All rights reserved.
362306a36Sopenharmony_ci *
462306a36Sopenharmony_ci * This software is available to you under a choice of one of two
562306a36Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
662306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
762306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the
862306a36Sopenharmony_ci * OpenIB.org BSD license below:
962306a36Sopenharmony_ci *
1062306a36Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
1162306a36Sopenharmony_ci *     without modification, are permitted provided that the following
1262306a36Sopenharmony_ci *     conditions are met:
1362306a36Sopenharmony_ci *
1462306a36Sopenharmony_ci *      - Redistributions of source code must retain the above
1562306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
1662306a36Sopenharmony_ci *        disclaimer.
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
1962306a36Sopenharmony_ci *        copyright notice, this list of conditions and the following
2062306a36Sopenharmony_ci *        disclaimer in the documentation and/or other materials
2162306a36Sopenharmony_ci *        provided with the distribution.
2262306a36Sopenharmony_ci *
2362306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
2462306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
2562306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
2662306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
2762306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
2862306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
2962306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
3062306a36Sopenharmony_ci * SOFTWARE.
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci */
3362306a36Sopenharmony_ci#include <linux/highmem.h>
3462306a36Sopenharmony_ci#include <linux/gfp.h>
3562306a36Sopenharmony_ci#include <linux/cpu.h>
3662306a36Sopenharmony_ci#include <linux/export.h>
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci#include "rds.h"
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistruct rds_page_remainder {
4162306a36Sopenharmony_ci	struct page	*r_page;
4262306a36Sopenharmony_ci	unsigned long	r_offset;
4362306a36Sopenharmony_ci};
4462306a36Sopenharmony_ci
4562306a36Sopenharmony_cistatic
4662306a36Sopenharmony_ciDEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_ci/**
4962306a36Sopenharmony_ci * rds_page_remainder_alloc - build up regions of a message.
5062306a36Sopenharmony_ci *
5162306a36Sopenharmony_ci * @scat: Scatter list for message
5262306a36Sopenharmony_ci * @bytes: the number of bytes needed.
5362306a36Sopenharmony_ci * @gfp: the waiting behaviour of the allocation
5462306a36Sopenharmony_ci *
5562306a36Sopenharmony_ci * @gfp is always ored with __GFP_HIGHMEM.  Callers must be prepared to
5662306a36Sopenharmony_ci * kmap the pages, etc.
5762306a36Sopenharmony_ci *
5862306a36Sopenharmony_ci * If @bytes is at least a full page then this just returns a page from
5962306a36Sopenharmony_ci * alloc_page().
6062306a36Sopenharmony_ci *
6162306a36Sopenharmony_ci * If @bytes is a partial page then this stores the unused region of the
6262306a36Sopenharmony_ci * page in a per-cpu structure.  Future partial-page allocations may be
6362306a36Sopenharmony_ci * satisfied from that cached region.  This lets us waste less memory on
6462306a36Sopenharmony_ci * small allocations with minimal complexity.  It works because the transmit
6562306a36Sopenharmony_ci * path passes read-only page regions down to devices.  They hold a page
6662306a36Sopenharmony_ci * reference until they are done with the region.
6762306a36Sopenharmony_ci */
6862306a36Sopenharmony_ciint rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
6962306a36Sopenharmony_ci			     gfp_t gfp)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	struct rds_page_remainder *rem;
7262306a36Sopenharmony_ci	unsigned long flags;
7362306a36Sopenharmony_ci	struct page *page;
7462306a36Sopenharmony_ci	int ret;
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ci	gfp |= __GFP_HIGHMEM;
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci	/* jump straight to allocation if we're trying for a huge page */
7962306a36Sopenharmony_ci	if (bytes >= PAGE_SIZE) {
8062306a36Sopenharmony_ci		page = alloc_page(gfp);
8162306a36Sopenharmony_ci		if (!page) {
8262306a36Sopenharmony_ci			ret = -ENOMEM;
8362306a36Sopenharmony_ci		} else {
8462306a36Sopenharmony_ci			sg_set_page(scat, page, PAGE_SIZE, 0);
8562306a36Sopenharmony_ci			ret = 0;
8662306a36Sopenharmony_ci		}
8762306a36Sopenharmony_ci		goto out;
8862306a36Sopenharmony_ci	}
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	rem = &per_cpu(rds_page_remainders, get_cpu());
9162306a36Sopenharmony_ci	local_irq_save(flags);
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_ci	while (1) {
9462306a36Sopenharmony_ci		/* avoid a tiny region getting stuck by tossing it */
9562306a36Sopenharmony_ci		if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
9662306a36Sopenharmony_ci			rds_stats_inc(s_page_remainder_miss);
9762306a36Sopenharmony_ci			__free_page(rem->r_page);
9862306a36Sopenharmony_ci			rem->r_page = NULL;
9962306a36Sopenharmony_ci		}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci		/* hand out a fragment from the cached page */
10262306a36Sopenharmony_ci		if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
10362306a36Sopenharmony_ci			sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
10462306a36Sopenharmony_ci			get_page(sg_page(scat));
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci			if (rem->r_offset != 0)
10762306a36Sopenharmony_ci				rds_stats_inc(s_page_remainder_hit);
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci			rem->r_offset += ALIGN(bytes, 8);
11062306a36Sopenharmony_ci			if (rem->r_offset >= PAGE_SIZE) {
11162306a36Sopenharmony_ci				__free_page(rem->r_page);
11262306a36Sopenharmony_ci				rem->r_page = NULL;
11362306a36Sopenharmony_ci			}
11462306a36Sopenharmony_ci			ret = 0;
11562306a36Sopenharmony_ci			break;
11662306a36Sopenharmony_ci		}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci		/* alloc if there is nothing for us to use */
11962306a36Sopenharmony_ci		local_irq_restore(flags);
12062306a36Sopenharmony_ci		put_cpu();
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci		page = alloc_page(gfp);
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ci		rem = &per_cpu(rds_page_remainders, get_cpu());
12562306a36Sopenharmony_ci		local_irq_save(flags);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci		if (!page) {
12862306a36Sopenharmony_ci			ret = -ENOMEM;
12962306a36Sopenharmony_ci			break;
13062306a36Sopenharmony_ci		}
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci		/* did someone race to fill the remainder before us? */
13362306a36Sopenharmony_ci		if (rem->r_page) {
13462306a36Sopenharmony_ci			__free_page(page);
13562306a36Sopenharmony_ci			continue;
13662306a36Sopenharmony_ci		}
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci		/* otherwise install our page and loop around to alloc */
13962306a36Sopenharmony_ci		rem->r_page = page;
14062306a36Sopenharmony_ci		rem->r_offset = 0;
14162306a36Sopenharmony_ci	}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	local_irq_restore(flags);
14462306a36Sopenharmony_ci	put_cpu();
14562306a36Sopenharmony_ciout:
14662306a36Sopenharmony_ci	rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
14762306a36Sopenharmony_ci		 ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
14862306a36Sopenharmony_ci		 ret ? 0 : scat->length);
14962306a36Sopenharmony_ci	return ret;
15062306a36Sopenharmony_ci}
15162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_civoid rds_page_exit(void)
15462306a36Sopenharmony_ci{
15562306a36Sopenharmony_ci	unsigned int cpu;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
15862306a36Sopenharmony_ci		struct rds_page_remainder *rem;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci		rem = &per_cpu(rds_page_remainders, cpu);
16162306a36Sopenharmony_ci		rdsdebug("cpu %u\n", cpu);
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci		if (rem->r_page)
16462306a36Sopenharmony_ci			__free_page(rem->r_page);
16562306a36Sopenharmony_ci		rem->r_page = NULL;
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci}
168