18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci#include <linux/swap_cgroup.h>
38c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
48c2ecf20Sopenharmony_ci#include <linux/mm.h>
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/swapops.h> /* depends on mm.h include */
78c2ecf20Sopenharmony_ci
88c2ecf20Sopenharmony_cistatic DEFINE_MUTEX(swap_cgroup_mutex);
98c2ecf20Sopenharmony_cistruct swap_cgroup_ctrl {
108c2ecf20Sopenharmony_ci	struct page **map;
118c2ecf20Sopenharmony_ci	unsigned long length;
128c2ecf20Sopenharmony_ci	spinlock_t	lock;
138c2ecf20Sopenharmony_ci};
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_cistatic struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_cistruct swap_cgroup {
188c2ecf20Sopenharmony_ci	unsigned short		id;
198c2ecf20Sopenharmony_ci};
208c2ecf20Sopenharmony_ci#define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
218c2ecf20Sopenharmony_ci
228c2ecf20Sopenharmony_ci/*
238c2ecf20Sopenharmony_ci * SwapCgroup implements "lookup" and "exchange" operations.
248c2ecf20Sopenharmony_ci * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
258c2ecf20Sopenharmony_ci * against SwapCache. At swap_free(), this is accessed directly from swap.
268c2ecf20Sopenharmony_ci *
278c2ecf20Sopenharmony_ci * This means,
288c2ecf20Sopenharmony_ci *  - we have no race in "exchange" when we're accessed via SwapCache because
298c2ecf20Sopenharmony_ci *    SwapCache(and its swp_entry) is under lock.
308c2ecf20Sopenharmony_ci *  - When called via swap_free(), there is no user of this entry and no race.
318c2ecf20Sopenharmony_ci * Then, we don't need lock around "exchange".
328c2ecf20Sopenharmony_ci *
338c2ecf20Sopenharmony_ci * TODO: we can push these buffers out to HIGHMEM.
348c2ecf20Sopenharmony_ci */
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci/*
378c2ecf20Sopenharmony_ci * allocate buffer for swap_cgroup.
388c2ecf20Sopenharmony_ci */
398c2ecf20Sopenharmony_cistatic int swap_cgroup_prepare(int type)
408c2ecf20Sopenharmony_ci{
418c2ecf20Sopenharmony_ci	struct page *page;
428c2ecf20Sopenharmony_ci	struct swap_cgroup_ctrl *ctrl;
438c2ecf20Sopenharmony_ci	unsigned long idx, max;
448c2ecf20Sopenharmony_ci
458c2ecf20Sopenharmony_ci	ctrl = &swap_cgroup_ctrl[type];
468c2ecf20Sopenharmony_ci
478c2ecf20Sopenharmony_ci	for (idx = 0; idx < ctrl->length; idx++) {
488c2ecf20Sopenharmony_ci		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
498c2ecf20Sopenharmony_ci		if (!page)
508c2ecf20Sopenharmony_ci			goto not_enough_page;
518c2ecf20Sopenharmony_ci		ctrl->map[idx] = page;
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_ci		if (!(idx % SWAP_CLUSTER_MAX))
548c2ecf20Sopenharmony_ci			cond_resched();
558c2ecf20Sopenharmony_ci	}
568c2ecf20Sopenharmony_ci	return 0;
578c2ecf20Sopenharmony_cinot_enough_page:
588c2ecf20Sopenharmony_ci	max = idx;
598c2ecf20Sopenharmony_ci	for (idx = 0; idx < max; idx++)
608c2ecf20Sopenharmony_ci		__free_page(ctrl->map[idx]);
618c2ecf20Sopenharmony_ci
628c2ecf20Sopenharmony_ci	return -ENOMEM;
638c2ecf20Sopenharmony_ci}
648c2ecf20Sopenharmony_ci
658c2ecf20Sopenharmony_cistatic struct swap_cgroup *__lookup_swap_cgroup(struct swap_cgroup_ctrl *ctrl,
668c2ecf20Sopenharmony_ci						pgoff_t offset)
678c2ecf20Sopenharmony_ci{
688c2ecf20Sopenharmony_ci	struct page *mappage;
698c2ecf20Sopenharmony_ci	struct swap_cgroup *sc;
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_ci	mappage = ctrl->map[offset / SC_PER_PAGE];
728c2ecf20Sopenharmony_ci	sc = page_address(mappage);
738c2ecf20Sopenharmony_ci	return sc + offset % SC_PER_PAGE;
748c2ecf20Sopenharmony_ci}
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_cistatic struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
778c2ecf20Sopenharmony_ci					struct swap_cgroup_ctrl **ctrlp)
788c2ecf20Sopenharmony_ci{
798c2ecf20Sopenharmony_ci	pgoff_t offset = swp_offset(ent);
808c2ecf20Sopenharmony_ci	struct swap_cgroup_ctrl *ctrl;
818c2ecf20Sopenharmony_ci
828c2ecf20Sopenharmony_ci	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
838c2ecf20Sopenharmony_ci	if (ctrlp)
848c2ecf20Sopenharmony_ci		*ctrlp = ctrl;
858c2ecf20Sopenharmony_ci	return __lookup_swap_cgroup(ctrl, offset);
868c2ecf20Sopenharmony_ci}
878c2ecf20Sopenharmony_ci
888c2ecf20Sopenharmony_ci/**
898c2ecf20Sopenharmony_ci * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
908c2ecf20Sopenharmony_ci * @ent: swap entry to be cmpxchged
918c2ecf20Sopenharmony_ci * @old: old id
928c2ecf20Sopenharmony_ci * @new: new id
938c2ecf20Sopenharmony_ci *
948c2ecf20Sopenharmony_ci * Returns old id at success, 0 at failure.
958c2ecf20Sopenharmony_ci * (There is no mem_cgroup using 0 as its id)
968c2ecf20Sopenharmony_ci */
978c2ecf20Sopenharmony_ciunsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
988c2ecf20Sopenharmony_ci					unsigned short old, unsigned short new)
998c2ecf20Sopenharmony_ci{
1008c2ecf20Sopenharmony_ci	struct swap_cgroup_ctrl *ctrl;
1018c2ecf20Sopenharmony_ci	struct swap_cgroup *sc;
1028c2ecf20Sopenharmony_ci	unsigned long flags;
1038c2ecf20Sopenharmony_ci	unsigned short retval;
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci	sc = lookup_swap_cgroup(ent, &ctrl);
1068c2ecf20Sopenharmony_ci
1078c2ecf20Sopenharmony_ci	spin_lock_irqsave(&ctrl->lock, flags);
1088c2ecf20Sopenharmony_ci	retval = sc->id;
1098c2ecf20Sopenharmony_ci	if (retval == old)
1108c2ecf20Sopenharmony_ci		sc->id = new;
1118c2ecf20Sopenharmony_ci	else
1128c2ecf20Sopenharmony_ci		retval = 0;
1138c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&ctrl->lock, flags);
1148c2ecf20Sopenharmony_ci	return retval;
1158c2ecf20Sopenharmony_ci}
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci/**
1188c2ecf20Sopenharmony_ci * swap_cgroup_record - record mem_cgroup for a set of swap entries
1198c2ecf20Sopenharmony_ci * @ent: the first swap entry to be recorded into
1208c2ecf20Sopenharmony_ci * @id: mem_cgroup to be recorded
1218c2ecf20Sopenharmony_ci * @nr_ents: number of swap entries to be recorded
1228c2ecf20Sopenharmony_ci *
1238c2ecf20Sopenharmony_ci * Returns old value at success, 0 at failure.
1248c2ecf20Sopenharmony_ci * (Of course, old value can be 0.)
1258c2ecf20Sopenharmony_ci */
1268c2ecf20Sopenharmony_ciunsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id,
1278c2ecf20Sopenharmony_ci				  unsigned int nr_ents)
1288c2ecf20Sopenharmony_ci{
1298c2ecf20Sopenharmony_ci	struct swap_cgroup_ctrl *ctrl;
1308c2ecf20Sopenharmony_ci	struct swap_cgroup *sc;
1318c2ecf20Sopenharmony_ci	unsigned short old;
1328c2ecf20Sopenharmony_ci	unsigned long flags;
1338c2ecf20Sopenharmony_ci	pgoff_t offset = swp_offset(ent);
1348c2ecf20Sopenharmony_ci	pgoff_t end = offset + nr_ents;
1358c2ecf20Sopenharmony_ci
1368c2ecf20Sopenharmony_ci	sc = lookup_swap_cgroup(ent, &ctrl);
1378c2ecf20Sopenharmony_ci
1388c2ecf20Sopenharmony_ci	spin_lock_irqsave(&ctrl->lock, flags);
1398c2ecf20Sopenharmony_ci	old = sc->id;
1408c2ecf20Sopenharmony_ci	for (;;) {
1418c2ecf20Sopenharmony_ci		VM_BUG_ON(sc->id != old);
1428c2ecf20Sopenharmony_ci		sc->id = id;
1438c2ecf20Sopenharmony_ci		offset++;
1448c2ecf20Sopenharmony_ci		if (offset == end)
1458c2ecf20Sopenharmony_ci			break;
1468c2ecf20Sopenharmony_ci		if (offset % SC_PER_PAGE)
1478c2ecf20Sopenharmony_ci			sc++;
1488c2ecf20Sopenharmony_ci		else
1498c2ecf20Sopenharmony_ci			sc = __lookup_swap_cgroup(ctrl, offset);
1508c2ecf20Sopenharmony_ci	}
1518c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&ctrl->lock, flags);
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	return old;
1548c2ecf20Sopenharmony_ci}
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci/**
1578c2ecf20Sopenharmony_ci * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
1588c2ecf20Sopenharmony_ci * @ent: swap entry to be looked up.
1598c2ecf20Sopenharmony_ci *
1608c2ecf20Sopenharmony_ci * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
1618c2ecf20Sopenharmony_ci */
1628c2ecf20Sopenharmony_ciunsigned short lookup_swap_cgroup_id(swp_entry_t ent)
1638c2ecf20Sopenharmony_ci{
1648c2ecf20Sopenharmony_ci	return lookup_swap_cgroup(ent, NULL)->id;
1658c2ecf20Sopenharmony_ci}
1668c2ecf20Sopenharmony_ci
1678c2ecf20Sopenharmony_ciint swap_cgroup_swapon(int type, unsigned long max_pages)
1688c2ecf20Sopenharmony_ci{
1698c2ecf20Sopenharmony_ci	void *array;
1708c2ecf20Sopenharmony_ci	unsigned long array_size;
1718c2ecf20Sopenharmony_ci	unsigned long length;
1728c2ecf20Sopenharmony_ci	struct swap_cgroup_ctrl *ctrl;
1738c2ecf20Sopenharmony_ci
1748c2ecf20Sopenharmony_ci	length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
1758c2ecf20Sopenharmony_ci	array_size = length * sizeof(void *);
1768c2ecf20Sopenharmony_ci
1778c2ecf20Sopenharmony_ci	array = vzalloc(array_size);
1788c2ecf20Sopenharmony_ci	if (!array)
1798c2ecf20Sopenharmony_ci		goto nomem;
1808c2ecf20Sopenharmony_ci
1818c2ecf20Sopenharmony_ci	ctrl = &swap_cgroup_ctrl[type];
1828c2ecf20Sopenharmony_ci	mutex_lock(&swap_cgroup_mutex);
1838c2ecf20Sopenharmony_ci	ctrl->length = length;
1848c2ecf20Sopenharmony_ci	ctrl->map = array;
1858c2ecf20Sopenharmony_ci	spin_lock_init(&ctrl->lock);
1868c2ecf20Sopenharmony_ci	if (swap_cgroup_prepare(type)) {
1878c2ecf20Sopenharmony_ci		/* memory shortage */
1888c2ecf20Sopenharmony_ci		ctrl->map = NULL;
1898c2ecf20Sopenharmony_ci		ctrl->length = 0;
1908c2ecf20Sopenharmony_ci		mutex_unlock(&swap_cgroup_mutex);
1918c2ecf20Sopenharmony_ci		vfree(array);
1928c2ecf20Sopenharmony_ci		goto nomem;
1938c2ecf20Sopenharmony_ci	}
1948c2ecf20Sopenharmony_ci	mutex_unlock(&swap_cgroup_mutex);
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	return 0;
1978c2ecf20Sopenharmony_cinomem:
1988c2ecf20Sopenharmony_ci	pr_info("couldn't allocate enough memory for swap_cgroup\n");
1998c2ecf20Sopenharmony_ci	pr_info("swap_cgroup can be disabled by swapaccount=0 boot option\n");
2008c2ecf20Sopenharmony_ci	return -ENOMEM;
2018c2ecf20Sopenharmony_ci}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_civoid swap_cgroup_swapoff(int type)
2048c2ecf20Sopenharmony_ci{
2058c2ecf20Sopenharmony_ci	struct page **map;
2068c2ecf20Sopenharmony_ci	unsigned long i, length;
2078c2ecf20Sopenharmony_ci	struct swap_cgroup_ctrl *ctrl;
2088c2ecf20Sopenharmony_ci
2098c2ecf20Sopenharmony_ci	mutex_lock(&swap_cgroup_mutex);
2108c2ecf20Sopenharmony_ci	ctrl = &swap_cgroup_ctrl[type];
2118c2ecf20Sopenharmony_ci	map = ctrl->map;
2128c2ecf20Sopenharmony_ci	length = ctrl->length;
2138c2ecf20Sopenharmony_ci	ctrl->map = NULL;
2148c2ecf20Sopenharmony_ci	ctrl->length = 0;
2158c2ecf20Sopenharmony_ci	mutex_unlock(&swap_cgroup_mutex);
2168c2ecf20Sopenharmony_ci
2178c2ecf20Sopenharmony_ci	if (map) {
2188c2ecf20Sopenharmony_ci		for (i = 0; i < length; i++) {
2198c2ecf20Sopenharmony_ci			struct page *page = map[i];
2208c2ecf20Sopenharmony_ci			if (page)
2218c2ecf20Sopenharmony_ci				__free_page(page);
2228c2ecf20Sopenharmony_ci			if (!(i % SWAP_CLUSTER_MAX))
2238c2ecf20Sopenharmony_ci				cond_resched();
2248c2ecf20Sopenharmony_ci		}
2258c2ecf20Sopenharmony_ci		vfree(map);
2268c2ecf20Sopenharmony_ci	}
2278c2ecf20Sopenharmony_ci}
228