162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Collaborative memory management interface.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (C) 2008 IBM Corporation
662306a36Sopenharmony_ci * Author(s): Brian King (brking@linux.vnet.ibm.com),
762306a36Sopenharmony_ci */
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <linux/ctype.h>
1062306a36Sopenharmony_ci#include <linux/delay.h>
1162306a36Sopenharmony_ci#include <linux/errno.h>
1262306a36Sopenharmony_ci#include <linux/fs.h>
1362306a36Sopenharmony_ci#include <linux/gfp.h>
1462306a36Sopenharmony_ci#include <linux/kthread.h>
1562306a36Sopenharmony_ci#include <linux/module.h>
1662306a36Sopenharmony_ci#include <linux/oom.h>
1762306a36Sopenharmony_ci#include <linux/reboot.h>
1862306a36Sopenharmony_ci#include <linux/sched.h>
1962306a36Sopenharmony_ci#include <linux/stringify.h>
2062306a36Sopenharmony_ci#include <linux/swap.h>
2162306a36Sopenharmony_ci#include <linux/device.h>
2262306a36Sopenharmony_ci#include <linux/balloon_compaction.h>
2362306a36Sopenharmony_ci#include <asm/firmware.h>
2462306a36Sopenharmony_ci#include <asm/hvcall.h>
2562306a36Sopenharmony_ci#include <asm/mmu.h>
2662306a36Sopenharmony_ci#include <linux/uaccess.h>
2762306a36Sopenharmony_ci#include <linux/memory.h>
2862306a36Sopenharmony_ci#include <asm/plpar_wrappers.h>
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci#include "pseries.h"
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci#define CMM_DRIVER_VERSION	"1.0.0"
3362306a36Sopenharmony_ci#define CMM_DEFAULT_DELAY	1
3462306a36Sopenharmony_ci#define CMM_HOTPLUG_DELAY	5
3562306a36Sopenharmony_ci#define CMM_DEBUG			0
3662306a36Sopenharmony_ci#define CMM_DISABLE		0
3762306a36Sopenharmony_ci#define CMM_OOM_KB		1024
3862306a36Sopenharmony_ci#define CMM_MIN_MEM_MB		256
3962306a36Sopenharmony_ci#define KB2PAGES(_p)		((_p)>>(PAGE_SHIFT-10))
4062306a36Sopenharmony_ci#define PAGES2KB(_p)		((_p)<<(PAGE_SHIFT-10))
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci#define CMM_MEM_HOTPLUG_PRI	1
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistatic unsigned int delay = CMM_DEFAULT_DELAY;
4562306a36Sopenharmony_cistatic unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
4662306a36Sopenharmony_cistatic unsigned int oom_kb = CMM_OOM_KB;
4762306a36Sopenharmony_cistatic unsigned int cmm_debug = CMM_DEBUG;
4862306a36Sopenharmony_cistatic unsigned int cmm_disabled = CMM_DISABLE;
4962306a36Sopenharmony_cistatic unsigned long min_mem_mb = CMM_MIN_MEM_MB;
5062306a36Sopenharmony_cistatic bool __read_mostly simulate;
5162306a36Sopenharmony_cistatic unsigned long simulate_loan_target_kb;
5262306a36Sopenharmony_cistatic struct device cmm_dev;
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_ciMODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
5562306a36Sopenharmony_ciMODULE_DESCRIPTION("IBM System p Collaborative Memory Manager");
5662306a36Sopenharmony_ciMODULE_LICENSE("GPL");
5762306a36Sopenharmony_ciMODULE_VERSION(CMM_DRIVER_VERSION);
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_cimodule_param_named(delay, delay, uint, 0644);
6062306a36Sopenharmony_ciMODULE_PARM_DESC(delay, "Delay (in seconds) between polls to query hypervisor paging requests. "
6162306a36Sopenharmony_ci		 "[Default=" __stringify(CMM_DEFAULT_DELAY) "]");
6262306a36Sopenharmony_cimodule_param_named(hotplug_delay, hotplug_delay, uint, 0644);
6362306a36Sopenharmony_ciMODULE_PARM_DESC(hotplug_delay, "Delay (in seconds) after memory hotplug remove "
6462306a36Sopenharmony_ci		 "before loaning resumes. "
6562306a36Sopenharmony_ci		 "[Default=" __stringify(CMM_HOTPLUG_DELAY) "]");
6662306a36Sopenharmony_cimodule_param_named(oom_kb, oom_kb, uint, 0644);
6762306a36Sopenharmony_ciMODULE_PARM_DESC(oom_kb, "Amount of memory in kb to free on OOM. "
6862306a36Sopenharmony_ci		 "[Default=" __stringify(CMM_OOM_KB) "]");
6962306a36Sopenharmony_cimodule_param_named(min_mem_mb, min_mem_mb, ulong, 0644);
7062306a36Sopenharmony_ciMODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
7162306a36Sopenharmony_ci		 "[Default=" __stringify(CMM_MIN_MEM_MB) "]");
7262306a36Sopenharmony_cimodule_param_named(debug, cmm_debug, uint, 0644);
7362306a36Sopenharmony_ciMODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
7462306a36Sopenharmony_ci		 "[Default=" __stringify(CMM_DEBUG) "]");
7562306a36Sopenharmony_cimodule_param_named(simulate, simulate, bool, 0444);
7662306a36Sopenharmony_ciMODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
7762306a36Sopenharmony_ci
7862306a36Sopenharmony_ci#define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_cistatic atomic_long_t loaned_pages;
8162306a36Sopenharmony_cistatic unsigned long loaned_pages_target;
8262306a36Sopenharmony_cistatic unsigned long oom_freed_pages;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_cistatic DEFINE_MUTEX(hotplug_mutex);
8562306a36Sopenharmony_cistatic int hotplug_occurred; /* protected by the hotplug mutex */
8662306a36Sopenharmony_ci
8762306a36Sopenharmony_cistatic struct task_struct *cmm_thread_ptr;
8862306a36Sopenharmony_cistatic struct balloon_dev_info b_dev_info;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_cistatic long plpar_page_set_loaned(struct page *page)
9162306a36Sopenharmony_ci{
9262306a36Sopenharmony_ci	const unsigned long vpa = page_to_phys(page);
9362306a36Sopenharmony_ci	unsigned long cmo_page_sz = cmo_get_page_size();
9462306a36Sopenharmony_ci	long rc = 0;
9562306a36Sopenharmony_ci	int i;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	if (unlikely(simulate))
9862306a36Sopenharmony_ci		return 0;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
10162306a36Sopenharmony_ci		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
10462306a36Sopenharmony_ci		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE,
10562306a36Sopenharmony_ci				   vpa + i - cmo_page_sz, 0);
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci	return rc;
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_cistatic long plpar_page_set_active(struct page *page)
11162306a36Sopenharmony_ci{
11262306a36Sopenharmony_ci	const unsigned long vpa = page_to_phys(page);
11362306a36Sopenharmony_ci	unsigned long cmo_page_sz = cmo_get_page_size();
11462306a36Sopenharmony_ci	long rc = 0;
11562306a36Sopenharmony_ci	int i;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	if (unlikely(simulate))
11862306a36Sopenharmony_ci		return 0;
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
12162306a36Sopenharmony_ci		rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	for (i -= cmo_page_sz; rc && i != 0; i -= cmo_page_sz)
12462306a36Sopenharmony_ci		plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED,
12562306a36Sopenharmony_ci				   vpa + i - cmo_page_sz, 0);
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci	return rc;
12862306a36Sopenharmony_ci}
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci/**
13162306a36Sopenharmony_ci * cmm_alloc_pages - Allocate pages and mark them as loaned
13262306a36Sopenharmony_ci * @nr:	number of pages to allocate
13362306a36Sopenharmony_ci *
13462306a36Sopenharmony_ci * Return value:
13562306a36Sopenharmony_ci * 	number of pages requested to be allocated which were not
13662306a36Sopenharmony_ci **/
13762306a36Sopenharmony_cistatic long cmm_alloc_pages(long nr)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	struct page *page;
14062306a36Sopenharmony_ci	long rc;
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci	cmm_dbg("Begin request for %ld pages\n", nr);
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	while (nr) {
14562306a36Sopenharmony_ci		/* Exit if a hotplug operation is in progress or occurred */
14662306a36Sopenharmony_ci		if (mutex_trylock(&hotplug_mutex)) {
14762306a36Sopenharmony_ci			if (hotplug_occurred) {
14862306a36Sopenharmony_ci				mutex_unlock(&hotplug_mutex);
14962306a36Sopenharmony_ci				break;
15062306a36Sopenharmony_ci			}
15162306a36Sopenharmony_ci			mutex_unlock(&hotplug_mutex);
15262306a36Sopenharmony_ci		} else {
15362306a36Sopenharmony_ci			break;
15462306a36Sopenharmony_ci		}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci		page = balloon_page_alloc();
15762306a36Sopenharmony_ci		if (!page)
15862306a36Sopenharmony_ci			break;
15962306a36Sopenharmony_ci		rc = plpar_page_set_loaned(page);
16062306a36Sopenharmony_ci		if (rc) {
16162306a36Sopenharmony_ci			pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
16262306a36Sopenharmony_ci			__free_page(page);
16362306a36Sopenharmony_ci			break;
16462306a36Sopenharmony_ci		}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci		balloon_page_enqueue(&b_dev_info, page);
16762306a36Sopenharmony_ci		atomic_long_inc(&loaned_pages);
16862306a36Sopenharmony_ci		adjust_managed_page_count(page, -1);
16962306a36Sopenharmony_ci		nr--;
17062306a36Sopenharmony_ci	}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_ci	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
17362306a36Sopenharmony_ci	return nr;
17462306a36Sopenharmony_ci}
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci/**
17762306a36Sopenharmony_ci * cmm_free_pages - Free pages and mark them as active
17862306a36Sopenharmony_ci * @nr:	number of pages to free
17962306a36Sopenharmony_ci *
18062306a36Sopenharmony_ci * Return value:
18162306a36Sopenharmony_ci * 	number of pages requested to be freed which were not
18262306a36Sopenharmony_ci **/
18362306a36Sopenharmony_cistatic long cmm_free_pages(long nr)
18462306a36Sopenharmony_ci{
18562306a36Sopenharmony_ci	struct page *page;
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci	cmm_dbg("Begin free of %ld pages.\n", nr);
18862306a36Sopenharmony_ci	while (nr) {
18962306a36Sopenharmony_ci		page = balloon_page_dequeue(&b_dev_info);
19062306a36Sopenharmony_ci		if (!page)
19162306a36Sopenharmony_ci			break;
19262306a36Sopenharmony_ci		plpar_page_set_active(page);
19362306a36Sopenharmony_ci		adjust_managed_page_count(page, 1);
19462306a36Sopenharmony_ci		__free_page(page);
19562306a36Sopenharmony_ci		atomic_long_dec(&loaned_pages);
19662306a36Sopenharmony_ci		nr--;
19762306a36Sopenharmony_ci	}
19862306a36Sopenharmony_ci	cmm_dbg("End request with %ld pages unfulfilled\n", nr);
19962306a36Sopenharmony_ci	return nr;
20062306a36Sopenharmony_ci}
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci/**
20362306a36Sopenharmony_ci * cmm_oom_notify - OOM notifier
20462306a36Sopenharmony_ci * @self:	notifier block struct
20562306a36Sopenharmony_ci * @dummy:	not used
20662306a36Sopenharmony_ci * @parm:	returned - number of pages freed
20762306a36Sopenharmony_ci *
20862306a36Sopenharmony_ci * Return value:
20962306a36Sopenharmony_ci * 	NOTIFY_OK
21062306a36Sopenharmony_ci **/
21162306a36Sopenharmony_cistatic int cmm_oom_notify(struct notifier_block *self,
21262306a36Sopenharmony_ci			  unsigned long dummy, void *parm)
21362306a36Sopenharmony_ci{
21462306a36Sopenharmony_ci	unsigned long *freed = parm;
21562306a36Sopenharmony_ci	long nr = KB2PAGES(oom_kb);
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	cmm_dbg("OOM processing started\n");
21862306a36Sopenharmony_ci	nr = cmm_free_pages(nr);
21962306a36Sopenharmony_ci	loaned_pages_target = atomic_long_read(&loaned_pages);
22062306a36Sopenharmony_ci	*freed += KB2PAGES(oom_kb) - nr;
22162306a36Sopenharmony_ci	oom_freed_pages += KB2PAGES(oom_kb) - nr;
22262306a36Sopenharmony_ci	cmm_dbg("OOM processing complete\n");
22362306a36Sopenharmony_ci	return NOTIFY_OK;
22462306a36Sopenharmony_ci}
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci/**
22762306a36Sopenharmony_ci * cmm_get_mpp - Read memory performance parameters
22862306a36Sopenharmony_ci *
22962306a36Sopenharmony_ci * Makes hcall to query the current page loan request from the hypervisor.
23062306a36Sopenharmony_ci *
23162306a36Sopenharmony_ci * Return value:
23262306a36Sopenharmony_ci * 	nothing
23362306a36Sopenharmony_ci **/
23462306a36Sopenharmony_cistatic void cmm_get_mpp(void)
23562306a36Sopenharmony_ci{
23662306a36Sopenharmony_ci	const long __loaned_pages = atomic_long_read(&loaned_pages);
23762306a36Sopenharmony_ci	const long total_pages = totalram_pages() + __loaned_pages;
23862306a36Sopenharmony_ci	int rc;
23962306a36Sopenharmony_ci	struct hvcall_mpp_data mpp_data;
24062306a36Sopenharmony_ci	signed long active_pages_target, page_loan_request, target;
24162306a36Sopenharmony_ci	signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	if (likely(!simulate)) {
24462306a36Sopenharmony_ci		rc = h_get_mpp(&mpp_data);
24562306a36Sopenharmony_ci		if (rc != H_SUCCESS)
24662306a36Sopenharmony_ci			return;
24762306a36Sopenharmony_ci		page_loan_request = div_s64((s64)mpp_data.loan_request,
24862306a36Sopenharmony_ci					    PAGE_SIZE);
24962306a36Sopenharmony_ci		target = page_loan_request + __loaned_pages;
25062306a36Sopenharmony_ci	} else {
25162306a36Sopenharmony_ci		target = KB2PAGES(simulate_loan_target_kb);
25262306a36Sopenharmony_ci		page_loan_request = target - __loaned_pages;
25362306a36Sopenharmony_ci	}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	if (target < 0 || total_pages < min_mem_pages)
25662306a36Sopenharmony_ci		target = 0;
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	if (target > oom_freed_pages)
25962306a36Sopenharmony_ci		target -= oom_freed_pages;
26062306a36Sopenharmony_ci	else
26162306a36Sopenharmony_ci		target = 0;
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci	active_pages_target = total_pages - target;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	if (min_mem_pages > active_pages_target)
26662306a36Sopenharmony_ci		target = total_pages - min_mem_pages;
26762306a36Sopenharmony_ci
26862306a36Sopenharmony_ci	if (target < 0)
26962306a36Sopenharmony_ci		target = 0;
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	loaned_pages_target = target;
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_ci	cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
27462306a36Sopenharmony_ci		page_loan_request, __loaned_pages, loaned_pages_target,
27562306a36Sopenharmony_ci		oom_freed_pages, totalram_pages());
27662306a36Sopenharmony_ci}
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_cistatic struct notifier_block cmm_oom_nb = {
27962306a36Sopenharmony_ci	.notifier_call = cmm_oom_notify
28062306a36Sopenharmony_ci};
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci/**
28362306a36Sopenharmony_ci * cmm_thread - CMM task thread
28462306a36Sopenharmony_ci * @dummy:	not used
28562306a36Sopenharmony_ci *
28662306a36Sopenharmony_ci * Return value:
28762306a36Sopenharmony_ci * 	0
28862306a36Sopenharmony_ci **/
28962306a36Sopenharmony_cistatic int cmm_thread(void *dummy)
29062306a36Sopenharmony_ci{
29162306a36Sopenharmony_ci	unsigned long timeleft;
29262306a36Sopenharmony_ci	long __loaned_pages;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	while (1) {
29562306a36Sopenharmony_ci		timeleft = msleep_interruptible(delay * 1000);
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci		if (kthread_should_stop() || timeleft)
29862306a36Sopenharmony_ci			break;
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_ci		if (mutex_trylock(&hotplug_mutex)) {
30162306a36Sopenharmony_ci			if (hotplug_occurred) {
30262306a36Sopenharmony_ci				hotplug_occurred = 0;
30362306a36Sopenharmony_ci				mutex_unlock(&hotplug_mutex);
30462306a36Sopenharmony_ci				cmm_dbg("Hotplug operation has occurred, "
30562306a36Sopenharmony_ci						"loaning activity suspended "
30662306a36Sopenharmony_ci						"for %d seconds.\n",
30762306a36Sopenharmony_ci						hotplug_delay);
30862306a36Sopenharmony_ci				timeleft = msleep_interruptible(hotplug_delay *
30962306a36Sopenharmony_ci						1000);
31062306a36Sopenharmony_ci				if (kthread_should_stop() || timeleft)
31162306a36Sopenharmony_ci					break;
31262306a36Sopenharmony_ci				continue;
31362306a36Sopenharmony_ci			}
31462306a36Sopenharmony_ci			mutex_unlock(&hotplug_mutex);
31562306a36Sopenharmony_ci		} else {
31662306a36Sopenharmony_ci			cmm_dbg("Hotplug operation in progress, activity "
31762306a36Sopenharmony_ci					"suspended\n");
31862306a36Sopenharmony_ci			continue;
31962306a36Sopenharmony_ci		}
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci		cmm_get_mpp();
32262306a36Sopenharmony_ci
32362306a36Sopenharmony_ci		__loaned_pages = atomic_long_read(&loaned_pages);
32462306a36Sopenharmony_ci		if (loaned_pages_target > __loaned_pages) {
32562306a36Sopenharmony_ci			if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
32662306a36Sopenharmony_ci				loaned_pages_target = __loaned_pages;
32762306a36Sopenharmony_ci		} else if (loaned_pages_target < __loaned_pages)
32862306a36Sopenharmony_ci			cmm_free_pages(__loaned_pages - loaned_pages_target);
32962306a36Sopenharmony_ci	}
33062306a36Sopenharmony_ci	return 0;
33162306a36Sopenharmony_ci}
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci#define CMM_SHOW(name, format, args...)			\
33462306a36Sopenharmony_ci	static ssize_t show_##name(struct device *dev,	\
33562306a36Sopenharmony_ci				   struct device_attribute *attr,	\
33662306a36Sopenharmony_ci				   char *buf)			\
33762306a36Sopenharmony_ci	{							\
33862306a36Sopenharmony_ci		return sprintf(buf, format, ##args);		\
33962306a36Sopenharmony_ci	}							\
34062306a36Sopenharmony_ci	static DEVICE_ATTR(name, 0444, show_##name, NULL)
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ciCMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
34362306a36Sopenharmony_ciCMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
34462306a36Sopenharmony_ci
34562306a36Sopenharmony_cistatic ssize_t show_oom_pages(struct device *dev,
34662306a36Sopenharmony_ci			      struct device_attribute *attr, char *buf)
34762306a36Sopenharmony_ci{
34862306a36Sopenharmony_ci	return sprintf(buf, "%lu\n", PAGES2KB(oom_freed_pages));
34962306a36Sopenharmony_ci}
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_cistatic ssize_t store_oom_pages(struct device *dev,
35262306a36Sopenharmony_ci			       struct device_attribute *attr,
35362306a36Sopenharmony_ci			       const char *buf, size_t count)
35462306a36Sopenharmony_ci{
35562306a36Sopenharmony_ci	unsigned long val = simple_strtoul (buf, NULL, 10);
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci	if (!capable(CAP_SYS_ADMIN))
35862306a36Sopenharmony_ci		return -EPERM;
35962306a36Sopenharmony_ci	if (val != 0)
36062306a36Sopenharmony_ci		return -EBADMSG;
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	oom_freed_pages = 0;
36362306a36Sopenharmony_ci	return count;
36462306a36Sopenharmony_ci}
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_cistatic DEVICE_ATTR(oom_freed_kb, 0644,
36762306a36Sopenharmony_ci		   show_oom_pages, store_oom_pages);
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_cistatic struct device_attribute *cmm_attrs[] = {
37062306a36Sopenharmony_ci	&dev_attr_loaned_kb,
37162306a36Sopenharmony_ci	&dev_attr_loaned_target_kb,
37262306a36Sopenharmony_ci	&dev_attr_oom_freed_kb,
37362306a36Sopenharmony_ci};
37462306a36Sopenharmony_ci
37562306a36Sopenharmony_cistatic DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
37662306a36Sopenharmony_ci			 simulate_loan_target_kb);
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_cistatic struct bus_type cmm_subsys = {
37962306a36Sopenharmony_ci	.name = "cmm",
38062306a36Sopenharmony_ci	.dev_name = "cmm",
38162306a36Sopenharmony_ci};
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_cistatic void cmm_release_device(struct device *dev)
38462306a36Sopenharmony_ci{
38562306a36Sopenharmony_ci}
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci/**
38862306a36Sopenharmony_ci * cmm_sysfs_register - Register with sysfs
38962306a36Sopenharmony_ci *
39062306a36Sopenharmony_ci * Return value:
39162306a36Sopenharmony_ci * 	0 on success / other on failure
39262306a36Sopenharmony_ci **/
39362306a36Sopenharmony_cistatic int cmm_sysfs_register(struct device *dev)
39462306a36Sopenharmony_ci{
39562306a36Sopenharmony_ci	int i, rc;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	if ((rc = subsys_system_register(&cmm_subsys, NULL)))
39862306a36Sopenharmony_ci		return rc;
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_ci	dev->id = 0;
40162306a36Sopenharmony_ci	dev->bus = &cmm_subsys;
40262306a36Sopenharmony_ci	dev->release = cmm_release_device;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	if ((rc = device_register(dev)))
40562306a36Sopenharmony_ci		goto subsys_unregister;
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++) {
40862306a36Sopenharmony_ci		if ((rc = device_create_file(dev, cmm_attrs[i])))
40962306a36Sopenharmony_ci			goto fail;
41062306a36Sopenharmony_ci	}
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	if (!simulate)
41362306a36Sopenharmony_ci		return 0;
41462306a36Sopenharmony_ci	rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
41562306a36Sopenharmony_ci	if (rc)
41662306a36Sopenharmony_ci		goto fail;
41762306a36Sopenharmony_ci	return 0;
41862306a36Sopenharmony_ci
41962306a36Sopenharmony_cifail:
42062306a36Sopenharmony_ci	while (--i >= 0)
42162306a36Sopenharmony_ci		device_remove_file(dev, cmm_attrs[i]);
42262306a36Sopenharmony_ci	device_unregister(dev);
42362306a36Sopenharmony_cisubsys_unregister:
42462306a36Sopenharmony_ci	bus_unregister(&cmm_subsys);
42562306a36Sopenharmony_ci	return rc;
42662306a36Sopenharmony_ci}
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci/**
42962306a36Sopenharmony_ci * cmm_unregister_sysfs - Unregister from sysfs
43062306a36Sopenharmony_ci *
43162306a36Sopenharmony_ci **/
43262306a36Sopenharmony_cistatic void cmm_unregister_sysfs(struct device *dev)
43362306a36Sopenharmony_ci{
43462306a36Sopenharmony_ci	int i;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(cmm_attrs); i++)
43762306a36Sopenharmony_ci		device_remove_file(dev, cmm_attrs[i]);
43862306a36Sopenharmony_ci	device_unregister(dev);
43962306a36Sopenharmony_ci	bus_unregister(&cmm_subsys);
44062306a36Sopenharmony_ci}
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci/**
44362306a36Sopenharmony_ci * cmm_reboot_notifier - Make sure pages are not still marked as "loaned"
44462306a36Sopenharmony_ci *
44562306a36Sopenharmony_ci **/
44662306a36Sopenharmony_cistatic int cmm_reboot_notifier(struct notifier_block *nb,
44762306a36Sopenharmony_ci			       unsigned long action, void *unused)
44862306a36Sopenharmony_ci{
44962306a36Sopenharmony_ci	if (action == SYS_RESTART) {
45062306a36Sopenharmony_ci		if (cmm_thread_ptr)
45162306a36Sopenharmony_ci			kthread_stop(cmm_thread_ptr);
45262306a36Sopenharmony_ci		cmm_thread_ptr = NULL;
45362306a36Sopenharmony_ci		cmm_free_pages(atomic_long_read(&loaned_pages));
45462306a36Sopenharmony_ci	}
45562306a36Sopenharmony_ci	return NOTIFY_DONE;
45662306a36Sopenharmony_ci}
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_cistatic struct notifier_block cmm_reboot_nb = {
45962306a36Sopenharmony_ci	.notifier_call = cmm_reboot_notifier,
46062306a36Sopenharmony_ci};
46162306a36Sopenharmony_ci
46262306a36Sopenharmony_ci/**
46362306a36Sopenharmony_ci * cmm_memory_cb - Handle memory hotplug notifier calls
46462306a36Sopenharmony_ci * @self:	notifier block struct
46562306a36Sopenharmony_ci * @action:	action to take
46662306a36Sopenharmony_ci * @arg:	struct memory_notify data for handler
46762306a36Sopenharmony_ci *
46862306a36Sopenharmony_ci * Return value:
46962306a36Sopenharmony_ci *	NOTIFY_OK or notifier error based on subfunction return value
47062306a36Sopenharmony_ci *
47162306a36Sopenharmony_ci **/
47262306a36Sopenharmony_cistatic int cmm_memory_cb(struct notifier_block *self,
47362306a36Sopenharmony_ci			unsigned long action, void *arg)
47462306a36Sopenharmony_ci{
47562306a36Sopenharmony_ci	switch (action) {
47662306a36Sopenharmony_ci	case MEM_GOING_OFFLINE:
47762306a36Sopenharmony_ci		mutex_lock(&hotplug_mutex);
47862306a36Sopenharmony_ci		hotplug_occurred = 1;
47962306a36Sopenharmony_ci		break;
48062306a36Sopenharmony_ci	case MEM_OFFLINE:
48162306a36Sopenharmony_ci	case MEM_CANCEL_OFFLINE:
48262306a36Sopenharmony_ci		mutex_unlock(&hotplug_mutex);
48362306a36Sopenharmony_ci		cmm_dbg("Memory offline operation complete.\n");
48462306a36Sopenharmony_ci		break;
48562306a36Sopenharmony_ci	case MEM_GOING_ONLINE:
48662306a36Sopenharmony_ci	case MEM_ONLINE:
48762306a36Sopenharmony_ci	case MEM_CANCEL_ONLINE:
48862306a36Sopenharmony_ci		break;
48962306a36Sopenharmony_ci	}
49062306a36Sopenharmony_ci
49162306a36Sopenharmony_ci	return NOTIFY_OK;
49262306a36Sopenharmony_ci}
49362306a36Sopenharmony_ci
49462306a36Sopenharmony_cistatic struct notifier_block cmm_mem_nb = {
49562306a36Sopenharmony_ci	.notifier_call = cmm_memory_cb,
49662306a36Sopenharmony_ci	.priority = CMM_MEM_HOTPLUG_PRI
49762306a36Sopenharmony_ci};
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci#ifdef CONFIG_BALLOON_COMPACTION
50062306a36Sopenharmony_cistatic int cmm_migratepage(struct balloon_dev_info *b_dev_info,
50162306a36Sopenharmony_ci			   struct page *newpage, struct page *page,
50262306a36Sopenharmony_ci			   enum migrate_mode mode)
50362306a36Sopenharmony_ci{
50462306a36Sopenharmony_ci	unsigned long flags;
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	/*
50762306a36Sopenharmony_ci	 * loan/"inflate" the newpage first.
50862306a36Sopenharmony_ci	 *
50962306a36Sopenharmony_ci	 * We might race against the cmm_thread who might discover after our
51062306a36Sopenharmony_ci	 * loan request that another page is to be unloaned. However, once
51162306a36Sopenharmony_ci	 * the cmm_thread runs again later, this error will automatically
51262306a36Sopenharmony_ci	 * be corrected.
51362306a36Sopenharmony_ci	 */
51462306a36Sopenharmony_ci	if (plpar_page_set_loaned(newpage)) {
51562306a36Sopenharmony_ci		/* Unlikely, but possible. Tell the caller not to retry now. */
51662306a36Sopenharmony_ci		pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
51762306a36Sopenharmony_ci		return -EBUSY;
51862306a36Sopenharmony_ci	}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci	/* balloon page list reference */
52162306a36Sopenharmony_ci	get_page(newpage);
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	/*
52462306a36Sopenharmony_ci	 * When we migrate a page to a different zone, we have to fixup the
52562306a36Sopenharmony_ci	 * count of both involved zones as we adjusted the managed page count
52662306a36Sopenharmony_ci	 * when inflating.
52762306a36Sopenharmony_ci	 */
52862306a36Sopenharmony_ci	if (page_zone(page) != page_zone(newpage)) {
52962306a36Sopenharmony_ci		adjust_managed_page_count(page, 1);
53062306a36Sopenharmony_ci		adjust_managed_page_count(newpage, -1);
53162306a36Sopenharmony_ci	}
53262306a36Sopenharmony_ci
53362306a36Sopenharmony_ci	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
53462306a36Sopenharmony_ci	balloon_page_insert(b_dev_info, newpage);
53562306a36Sopenharmony_ci	balloon_page_delete(page);
53662306a36Sopenharmony_ci	b_dev_info->isolated_pages--;
53762306a36Sopenharmony_ci	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
53862306a36Sopenharmony_ci
53962306a36Sopenharmony_ci	/*
54062306a36Sopenharmony_ci	 * activate/"deflate" the old page. We ignore any errors just like the
54162306a36Sopenharmony_ci	 * other callers.
54262306a36Sopenharmony_ci	 */
54362306a36Sopenharmony_ci	plpar_page_set_active(page);
54462306a36Sopenharmony_ci
54562306a36Sopenharmony_ci	/* balloon page list reference */
54662306a36Sopenharmony_ci	put_page(page);
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	return MIGRATEPAGE_SUCCESS;
54962306a36Sopenharmony_ci}
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_cistatic void cmm_balloon_compaction_init(void)
55262306a36Sopenharmony_ci{
55362306a36Sopenharmony_ci	balloon_devinfo_init(&b_dev_info);
55462306a36Sopenharmony_ci	b_dev_info.migratepage = cmm_migratepage;
55562306a36Sopenharmony_ci}
55662306a36Sopenharmony_ci#else /* CONFIG_BALLOON_COMPACTION */
55762306a36Sopenharmony_cistatic void cmm_balloon_compaction_init(void)
55862306a36Sopenharmony_ci{
55962306a36Sopenharmony_ci}
56062306a36Sopenharmony_ci#endif /* CONFIG_BALLOON_COMPACTION */
56162306a36Sopenharmony_ci
56262306a36Sopenharmony_ci/**
56362306a36Sopenharmony_ci * cmm_init - Module initialization
56462306a36Sopenharmony_ci *
56562306a36Sopenharmony_ci * Return value:
56662306a36Sopenharmony_ci * 	0 on success / other on failure
56762306a36Sopenharmony_ci **/
56862306a36Sopenharmony_cistatic int cmm_init(void)
56962306a36Sopenharmony_ci{
57062306a36Sopenharmony_ci	int rc;
57162306a36Sopenharmony_ci
57262306a36Sopenharmony_ci	if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
57362306a36Sopenharmony_ci		return -EOPNOTSUPP;
57462306a36Sopenharmony_ci
57562306a36Sopenharmony_ci	cmm_balloon_compaction_init();
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	rc = register_oom_notifier(&cmm_oom_nb);
57862306a36Sopenharmony_ci	if (rc < 0)
57962306a36Sopenharmony_ci		goto out_balloon_compaction;
58062306a36Sopenharmony_ci
58162306a36Sopenharmony_ci	if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
58262306a36Sopenharmony_ci		goto out_oom_notifier;
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	if ((rc = cmm_sysfs_register(&cmm_dev)))
58562306a36Sopenharmony_ci		goto out_reboot_notifier;
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci	rc = register_memory_notifier(&cmm_mem_nb);
58862306a36Sopenharmony_ci	if (rc)
58962306a36Sopenharmony_ci		goto out_unregister_notifier;
59062306a36Sopenharmony_ci
59162306a36Sopenharmony_ci	if (cmm_disabled)
59262306a36Sopenharmony_ci		return 0;
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
59562306a36Sopenharmony_ci	if (IS_ERR(cmm_thread_ptr)) {
59662306a36Sopenharmony_ci		rc = PTR_ERR(cmm_thread_ptr);
59762306a36Sopenharmony_ci		goto out_unregister_notifier;
59862306a36Sopenharmony_ci	}
59962306a36Sopenharmony_ci
60062306a36Sopenharmony_ci	return 0;
60162306a36Sopenharmony_ciout_unregister_notifier:
60262306a36Sopenharmony_ci	unregister_memory_notifier(&cmm_mem_nb);
60362306a36Sopenharmony_ci	cmm_unregister_sysfs(&cmm_dev);
60462306a36Sopenharmony_ciout_reboot_notifier:
60562306a36Sopenharmony_ci	unregister_reboot_notifier(&cmm_reboot_nb);
60662306a36Sopenharmony_ciout_oom_notifier:
60762306a36Sopenharmony_ci	unregister_oom_notifier(&cmm_oom_nb);
60862306a36Sopenharmony_ciout_balloon_compaction:
60962306a36Sopenharmony_ci	return rc;
61062306a36Sopenharmony_ci}
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci/**
61362306a36Sopenharmony_ci * cmm_exit - Module exit
61462306a36Sopenharmony_ci *
61562306a36Sopenharmony_ci * Return value:
61662306a36Sopenharmony_ci * 	nothing
61762306a36Sopenharmony_ci **/
61862306a36Sopenharmony_cistatic void cmm_exit(void)
61962306a36Sopenharmony_ci{
62062306a36Sopenharmony_ci	if (cmm_thread_ptr)
62162306a36Sopenharmony_ci		kthread_stop(cmm_thread_ptr);
62262306a36Sopenharmony_ci	unregister_oom_notifier(&cmm_oom_nb);
62362306a36Sopenharmony_ci	unregister_reboot_notifier(&cmm_reboot_nb);
62462306a36Sopenharmony_ci	unregister_memory_notifier(&cmm_mem_nb);
62562306a36Sopenharmony_ci	cmm_free_pages(atomic_long_read(&loaned_pages));
62662306a36Sopenharmony_ci	cmm_unregister_sysfs(&cmm_dev);
62762306a36Sopenharmony_ci}
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci/**
63062306a36Sopenharmony_ci * cmm_set_disable - Disable/Enable CMM
63162306a36Sopenharmony_ci *
63262306a36Sopenharmony_ci * Return value:
63362306a36Sopenharmony_ci * 	0 on success / other on failure
63462306a36Sopenharmony_ci **/
63562306a36Sopenharmony_cistatic int cmm_set_disable(const char *val, const struct kernel_param *kp)
63662306a36Sopenharmony_ci{
63762306a36Sopenharmony_ci	int disable = simple_strtoul(val, NULL, 10);
63862306a36Sopenharmony_ci
63962306a36Sopenharmony_ci	if (disable != 0 && disable != 1)
64062306a36Sopenharmony_ci		return -EINVAL;
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci	if (disable && !cmm_disabled) {
64362306a36Sopenharmony_ci		if (cmm_thread_ptr)
64462306a36Sopenharmony_ci			kthread_stop(cmm_thread_ptr);
64562306a36Sopenharmony_ci		cmm_thread_ptr = NULL;
64662306a36Sopenharmony_ci		cmm_free_pages(atomic_long_read(&loaned_pages));
64762306a36Sopenharmony_ci	} else if (!disable && cmm_disabled) {
64862306a36Sopenharmony_ci		cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
64962306a36Sopenharmony_ci		if (IS_ERR(cmm_thread_ptr))
65062306a36Sopenharmony_ci			return PTR_ERR(cmm_thread_ptr);
65162306a36Sopenharmony_ci	}
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	cmm_disabled = disable;
65462306a36Sopenharmony_ci	return 0;
65562306a36Sopenharmony_ci}
65662306a36Sopenharmony_ci
65762306a36Sopenharmony_cimodule_param_call(disable, cmm_set_disable, param_get_uint,
65862306a36Sopenharmony_ci		  &cmm_disabled, 0644);
65962306a36Sopenharmony_ciMODULE_PARM_DESC(disable, "Disable CMM. Set to 1 to disable. "
66062306a36Sopenharmony_ci		 "[Default=" __stringify(CMM_DISABLE) "]");
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_cimodule_init(cmm_init);
66362306a36Sopenharmony_cimodule_exit(cmm_exit);
664