162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Virtio-mem device driver.
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright Red Hat, Inc. 2020
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci * Author(s): David Hildenbrand <david@redhat.com>
862306a36Sopenharmony_ci */
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/virtio.h>
1162306a36Sopenharmony_ci#include <linux/virtio_mem.h>
1262306a36Sopenharmony_ci#include <linux/workqueue.h>
1362306a36Sopenharmony_ci#include <linux/slab.h>
1462306a36Sopenharmony_ci#include <linux/module.h>
1562306a36Sopenharmony_ci#include <linux/mm.h>
1662306a36Sopenharmony_ci#include <linux/memory_hotplug.h>
1762306a36Sopenharmony_ci#include <linux/memory.h>
1862306a36Sopenharmony_ci#include <linux/hrtimer.h>
1962306a36Sopenharmony_ci#include <linux/crash_dump.h>
2062306a36Sopenharmony_ci#include <linux/mutex.h>
2162306a36Sopenharmony_ci#include <linux/bitmap.h>
2262306a36Sopenharmony_ci#include <linux/lockdep.h>
2362306a36Sopenharmony_ci#include <linux/log2.h>
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#include <acpi/acpi_numa.h>
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_cistatic bool unplug_online = true;
2862306a36Sopenharmony_cimodule_param(unplug_online, bool, 0644);
2962306a36Sopenharmony_ciMODULE_PARM_DESC(unplug_online, "Try to unplug online memory");
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic bool force_bbm;
3262306a36Sopenharmony_cimodule_param(force_bbm, bool, 0444);
3362306a36Sopenharmony_ciMODULE_PARM_DESC(force_bbm,
3462306a36Sopenharmony_ci		"Force Big Block Mode. Default is 0 (auto-selection)");
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_cistatic unsigned long bbm_block_size;
3762306a36Sopenharmony_cimodule_param(bbm_block_size, ulong, 0444);
3862306a36Sopenharmony_ciMODULE_PARM_DESC(bbm_block_size,
3962306a36Sopenharmony_ci		 "Big Block size in bytes. Default is 0 (auto-detection).");
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/*
4262306a36Sopenharmony_ci * virtio-mem currently supports the following modes of operation:
4362306a36Sopenharmony_ci *
4462306a36Sopenharmony_ci * * Sub Block Mode (SBM): A Linux memory block spans 2..X subblocks (SB). The
4562306a36Sopenharmony_ci *   size of a Sub Block (SB) is determined based on the device block size, the
4662306a36Sopenharmony_ci *   pageblock size, and the maximum allocation granularity of the buddy.
4762306a36Sopenharmony_ci *   Subblocks within a Linux memory block might either be plugged or unplugged.
4862306a36Sopenharmony_ci *   Memory is added/removed to Linux MM in Linux memory block granularity.
4962306a36Sopenharmony_ci *
5062306a36Sopenharmony_ci * * Big Block Mode (BBM): A Big Block (BB) spans 1..X Linux memory blocks.
5162306a36Sopenharmony_ci *   Memory is added/removed to Linux MM in Big Block granularity.
5262306a36Sopenharmony_ci *
5362306a36Sopenharmony_ci * The mode is determined automatically based on the Linux memory block size
5462306a36Sopenharmony_ci * and the device block size.
5562306a36Sopenharmony_ci *
5662306a36Sopenharmony_ci * User space / core MM (auto onlining) is responsible for onlining added
5762306a36Sopenharmony_ci * Linux memory blocks - and for selecting a zone. Linux Memory Blocks are
5862306a36Sopenharmony_ci * always onlined separately, and all memory within a Linux memory block is
5962306a36Sopenharmony_ci * onlined to the same zone - virtio-mem relies on this behavior.
6062306a36Sopenharmony_ci */
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/*
6362306a36Sopenharmony_ci * State of a Linux memory block in SBM.
6462306a36Sopenharmony_ci */
6562306a36Sopenharmony_cienum virtio_mem_sbm_mb_state {
6662306a36Sopenharmony_ci	/* Unplugged, not added to Linux. Can be reused later. */
6762306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_UNUSED = 0,
6862306a36Sopenharmony_ci	/* (Partially) plugged, not added to Linux. Error on add_memory(). */
6962306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_PLUGGED,
7062306a36Sopenharmony_ci	/* Fully plugged, fully added to Linux, offline. */
7162306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_OFFLINE,
7262306a36Sopenharmony_ci	/* Partially plugged, fully added to Linux, offline. */
7362306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
7462306a36Sopenharmony_ci	/* Fully plugged, fully added to Linux, onlined to a kernel zone. */
7562306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_KERNEL,
7662306a36Sopenharmony_ci	/* Partially plugged, fully added to Linux, online to a kernel zone */
7762306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
7862306a36Sopenharmony_ci	/* Fully plugged, fully added to Linux, onlined to ZONE_MOVABLE. */
7962306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_MOVABLE,
8062306a36Sopenharmony_ci	/* Partially plugged, fully added to Linux, onlined to ZONE_MOVABLE. */
8162306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
8262306a36Sopenharmony_ci	VIRTIO_MEM_SBM_MB_COUNT
8362306a36Sopenharmony_ci};
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci/*
8662306a36Sopenharmony_ci * State of a Big Block (BB) in BBM, covering 1..X Linux memory blocks.
8762306a36Sopenharmony_ci */
8862306a36Sopenharmony_cienum virtio_mem_bbm_bb_state {
8962306a36Sopenharmony_ci	/* Unplugged, not added to Linux. Can be reused later. */
9062306a36Sopenharmony_ci	VIRTIO_MEM_BBM_BB_UNUSED = 0,
9162306a36Sopenharmony_ci	/* Plugged, not added to Linux. Error on add_memory(). */
9262306a36Sopenharmony_ci	VIRTIO_MEM_BBM_BB_PLUGGED,
9362306a36Sopenharmony_ci	/* Plugged and added to Linux. */
9462306a36Sopenharmony_ci	VIRTIO_MEM_BBM_BB_ADDED,
9562306a36Sopenharmony_ci	/* All online parts are fake-offline, ready to remove. */
9662306a36Sopenharmony_ci	VIRTIO_MEM_BBM_BB_FAKE_OFFLINE,
9762306a36Sopenharmony_ci	VIRTIO_MEM_BBM_BB_COUNT
9862306a36Sopenharmony_ci};
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_cistruct virtio_mem {
10162306a36Sopenharmony_ci	struct virtio_device *vdev;
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ci	/* We might first have to unplug all memory when starting up. */
10462306a36Sopenharmony_ci	bool unplug_all_required;
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	/* Workqueue that processes the plug/unplug requests. */
10762306a36Sopenharmony_ci	struct work_struct wq;
10862306a36Sopenharmony_ci	atomic_t wq_active;
10962306a36Sopenharmony_ci	atomic_t config_changed;
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	/* Virtqueue for guest->host requests. */
11262306a36Sopenharmony_ci	struct virtqueue *vq;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	/* Wait for a host response to a guest request. */
11562306a36Sopenharmony_ci	wait_queue_head_t host_resp;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	/* Space for one guest request and the host response. */
11862306a36Sopenharmony_ci	struct virtio_mem_req req;
11962306a36Sopenharmony_ci	struct virtio_mem_resp resp;
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_ci	/* The current size of the device. */
12262306a36Sopenharmony_ci	uint64_t plugged_size;
12362306a36Sopenharmony_ci	/* The requested size of the device. */
12462306a36Sopenharmony_ci	uint64_t requested_size;
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ci	/* The device block size (for communicating with the device). */
12762306a36Sopenharmony_ci	uint64_t device_block_size;
12862306a36Sopenharmony_ci	/* The determined node id for all memory of the device. */
12962306a36Sopenharmony_ci	int nid;
13062306a36Sopenharmony_ci	/* Physical start address of the memory region. */
13162306a36Sopenharmony_ci	uint64_t addr;
13262306a36Sopenharmony_ci	/* Maximum region size in bytes. */
13362306a36Sopenharmony_ci	uint64_t region_size;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	/* The parent resource for all memory added via this device. */
13662306a36Sopenharmony_ci	struct resource *parent_resource;
13762306a36Sopenharmony_ci	/*
13862306a36Sopenharmony_ci	 * Copy of "System RAM (virtio_mem)" to be used for
13962306a36Sopenharmony_ci	 * add_memory_driver_managed().
14062306a36Sopenharmony_ci	 */
14162306a36Sopenharmony_ci	const char *resource_name;
14262306a36Sopenharmony_ci	/* Memory group identification. */
14362306a36Sopenharmony_ci	int mgid;
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci	/*
14662306a36Sopenharmony_ci	 * We don't want to add too much memory if it's not getting onlined,
14762306a36Sopenharmony_ci	 * to avoid running OOM. Besides this threshold, we allow to have at
14862306a36Sopenharmony_ci	 * least two offline blocks at a time (whatever is bigger).
14962306a36Sopenharmony_ci	 */
15062306a36Sopenharmony_ci#define VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD		(1024 * 1024 * 1024)
15162306a36Sopenharmony_ci	atomic64_t offline_size;
15262306a36Sopenharmony_ci	uint64_t offline_threshold;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	/* If set, the driver is in SBM, otherwise in BBM. */
15562306a36Sopenharmony_ci	bool in_sbm;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	union {
15862306a36Sopenharmony_ci		struct {
15962306a36Sopenharmony_ci			/* Id of the first memory block of this device. */
16062306a36Sopenharmony_ci			unsigned long first_mb_id;
16162306a36Sopenharmony_ci			/* Id of the last usable memory block of this device. */
16262306a36Sopenharmony_ci			unsigned long last_usable_mb_id;
16362306a36Sopenharmony_ci			/* Id of the next memory bock to prepare when needed. */
16462306a36Sopenharmony_ci			unsigned long next_mb_id;
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ci			/* The subblock size. */
16762306a36Sopenharmony_ci			uint64_t sb_size;
16862306a36Sopenharmony_ci			/* The number of subblocks per Linux memory block. */
16962306a36Sopenharmony_ci			uint32_t sbs_per_mb;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci			/*
17262306a36Sopenharmony_ci			 * Some of the Linux memory blocks tracked as "partially
17362306a36Sopenharmony_ci			 * plugged" are completely unplugged and can be offlined
17462306a36Sopenharmony_ci			 * and removed -- which previously failed.
17562306a36Sopenharmony_ci			 */
17662306a36Sopenharmony_ci			bool have_unplugged_mb;
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci			/* Summary of all memory block states. */
17962306a36Sopenharmony_ci			unsigned long mb_count[VIRTIO_MEM_SBM_MB_COUNT];
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci			/*
18262306a36Sopenharmony_ci			 * One byte state per memory block. Allocated via
18362306a36Sopenharmony_ci			 * vmalloc(). Resized (alloc+copy+free) on demand.
18462306a36Sopenharmony_ci			 *
18562306a36Sopenharmony_ci			 * With 128 MiB memory blocks, we have states for 512
18662306a36Sopenharmony_ci			 * GiB of memory in one 4 KiB page.
18762306a36Sopenharmony_ci			 */
18862306a36Sopenharmony_ci			uint8_t *mb_states;
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_ci			/*
19162306a36Sopenharmony_ci			 * Bitmap: one bit per subblock. Allocated similar to
19262306a36Sopenharmony_ci			 * sbm.mb_states.
19362306a36Sopenharmony_ci			 *
19462306a36Sopenharmony_ci			 * A set bit means the corresponding subblock is
19562306a36Sopenharmony_ci			 * plugged, otherwise it's unblocked.
19662306a36Sopenharmony_ci			 *
19762306a36Sopenharmony_ci			 * With 4 MiB subblocks, we manage 128 GiB of memory
19862306a36Sopenharmony_ci			 * in one 4 KiB page.
19962306a36Sopenharmony_ci			 */
20062306a36Sopenharmony_ci			unsigned long *sb_states;
20162306a36Sopenharmony_ci		} sbm;
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci		struct {
20462306a36Sopenharmony_ci			/* Id of the first big block of this device. */
20562306a36Sopenharmony_ci			unsigned long first_bb_id;
20662306a36Sopenharmony_ci			/* Id of the last usable big block of this device. */
20762306a36Sopenharmony_ci			unsigned long last_usable_bb_id;
20862306a36Sopenharmony_ci			/* Id of the next device bock to prepare when needed. */
20962306a36Sopenharmony_ci			unsigned long next_bb_id;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci			/* Summary of all big block states. */
21262306a36Sopenharmony_ci			unsigned long bb_count[VIRTIO_MEM_BBM_BB_COUNT];
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci			/* One byte state per big block. See sbm.mb_states. */
21562306a36Sopenharmony_ci			uint8_t *bb_states;
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci			/* The block size used for plugging/adding/removing. */
21862306a36Sopenharmony_ci			uint64_t bb_size;
21962306a36Sopenharmony_ci		} bbm;
22062306a36Sopenharmony_ci	};
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	/*
22362306a36Sopenharmony_ci	 * Mutex that protects the sbm.mb_count, sbm.mb_states,
22462306a36Sopenharmony_ci	 * sbm.sb_states, bbm.bb_count, and bbm.bb_states
22562306a36Sopenharmony_ci	 *
22662306a36Sopenharmony_ci	 * When this lock is held the pointers can't change, ONLINE and
22762306a36Sopenharmony_ci	 * OFFLINE blocks can't change the state and no subblocks will get
22862306a36Sopenharmony_ci	 * plugged/unplugged.
22962306a36Sopenharmony_ci	 *
23062306a36Sopenharmony_ci	 * In kdump mode, used to serialize requests, last_block_addr and
23162306a36Sopenharmony_ci	 * last_block_plugged.
23262306a36Sopenharmony_ci	 */
23362306a36Sopenharmony_ci	struct mutex hotplug_mutex;
23462306a36Sopenharmony_ci	bool hotplug_active;
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	/* An error occurred we cannot handle - stop processing requests. */
23762306a36Sopenharmony_ci	bool broken;
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	/* Cached valued of is_kdump_kernel() when the device was probed. */
24062306a36Sopenharmony_ci	bool in_kdump;
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	/* The driver is being removed. */
24362306a36Sopenharmony_ci	spinlock_t removal_lock;
24462306a36Sopenharmony_ci	bool removing;
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	/* Timer for retrying to plug/unplug memory. */
24762306a36Sopenharmony_ci	struct hrtimer retry_timer;
24862306a36Sopenharmony_ci	unsigned int retry_timer_ms;
24962306a36Sopenharmony_ci#define VIRTIO_MEM_RETRY_TIMER_MIN_MS		50000
25062306a36Sopenharmony_ci#define VIRTIO_MEM_RETRY_TIMER_MAX_MS		300000
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	/* Memory notifier (online/offline events). */
25362306a36Sopenharmony_ci	struct notifier_block memory_notifier;
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci#ifdef CONFIG_PROC_VMCORE
25662306a36Sopenharmony_ci	/* vmcore callback for /proc/vmcore handling in kdump mode */
25762306a36Sopenharmony_ci	struct vmcore_cb vmcore_cb;
25862306a36Sopenharmony_ci	uint64_t last_block_addr;
25962306a36Sopenharmony_ci	bool last_block_plugged;
26062306a36Sopenharmony_ci#endif /* CONFIG_PROC_VMCORE */
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	/* Next device in the list of virtio-mem devices. */
26362306a36Sopenharmony_ci	struct list_head next;
26462306a36Sopenharmony_ci};
26562306a36Sopenharmony_ci
26662306a36Sopenharmony_ci/*
26762306a36Sopenharmony_ci * We have to share a single online_page callback among all virtio-mem
26862306a36Sopenharmony_ci * devices. We use RCU to iterate the list in the callback.
26962306a36Sopenharmony_ci */
27062306a36Sopenharmony_cistatic DEFINE_MUTEX(virtio_mem_mutex);
27162306a36Sopenharmony_cistatic LIST_HEAD(virtio_mem_devices);
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_cistatic void virtio_mem_online_page_cb(struct page *page, unsigned int order);
27462306a36Sopenharmony_cistatic void virtio_mem_fake_offline_going_offline(unsigned long pfn,
27562306a36Sopenharmony_ci						  unsigned long nr_pages);
27662306a36Sopenharmony_cistatic void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
27762306a36Sopenharmony_ci						   unsigned long nr_pages);
27862306a36Sopenharmony_cistatic void virtio_mem_retry(struct virtio_mem *vm);
27962306a36Sopenharmony_cistatic int virtio_mem_create_resource(struct virtio_mem *vm);
28062306a36Sopenharmony_cistatic void virtio_mem_delete_resource(struct virtio_mem *vm);
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci/*
28362306a36Sopenharmony_ci * Register a virtio-mem device so it will be considered for the online_page
28462306a36Sopenharmony_ci * callback.
28562306a36Sopenharmony_ci */
28662306a36Sopenharmony_cistatic int register_virtio_mem_device(struct virtio_mem *vm)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	int rc = 0;
28962306a36Sopenharmony_ci
29062306a36Sopenharmony_ci	/* First device registers the callback. */
29162306a36Sopenharmony_ci	mutex_lock(&virtio_mem_mutex);
29262306a36Sopenharmony_ci	if (list_empty(&virtio_mem_devices))
29362306a36Sopenharmony_ci		rc = set_online_page_callback(&virtio_mem_online_page_cb);
29462306a36Sopenharmony_ci	if (!rc)
29562306a36Sopenharmony_ci		list_add_rcu(&vm->next, &virtio_mem_devices);
29662306a36Sopenharmony_ci	mutex_unlock(&virtio_mem_mutex);
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	return rc;
29962306a36Sopenharmony_ci}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci/*
30262306a36Sopenharmony_ci * Unregister a virtio-mem device so it will no longer be considered for the
30362306a36Sopenharmony_ci * online_page callback.
30462306a36Sopenharmony_ci */
30562306a36Sopenharmony_cistatic void unregister_virtio_mem_device(struct virtio_mem *vm)
30662306a36Sopenharmony_ci{
30762306a36Sopenharmony_ci	/* Last device unregisters the callback. */
30862306a36Sopenharmony_ci	mutex_lock(&virtio_mem_mutex);
30962306a36Sopenharmony_ci	list_del_rcu(&vm->next);
31062306a36Sopenharmony_ci	if (list_empty(&virtio_mem_devices))
31162306a36Sopenharmony_ci		restore_online_page_callback(&virtio_mem_online_page_cb);
31262306a36Sopenharmony_ci	mutex_unlock(&virtio_mem_mutex);
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	synchronize_rcu();
31562306a36Sopenharmony_ci}
31662306a36Sopenharmony_ci
31762306a36Sopenharmony_ci/*
31862306a36Sopenharmony_ci * Calculate the memory block id of a given address.
31962306a36Sopenharmony_ci */
32062306a36Sopenharmony_cistatic unsigned long virtio_mem_phys_to_mb_id(unsigned long addr)
32162306a36Sopenharmony_ci{
32262306a36Sopenharmony_ci	return addr / memory_block_size_bytes();
32362306a36Sopenharmony_ci}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci/*
32662306a36Sopenharmony_ci * Calculate the physical start address of a given memory block id.
32762306a36Sopenharmony_ci */
32862306a36Sopenharmony_cistatic unsigned long virtio_mem_mb_id_to_phys(unsigned long mb_id)
32962306a36Sopenharmony_ci{
33062306a36Sopenharmony_ci	return mb_id * memory_block_size_bytes();
33162306a36Sopenharmony_ci}
33262306a36Sopenharmony_ci
33362306a36Sopenharmony_ci/*
33462306a36Sopenharmony_ci * Calculate the big block id of a given address.
33562306a36Sopenharmony_ci */
33662306a36Sopenharmony_cistatic unsigned long virtio_mem_phys_to_bb_id(struct virtio_mem *vm,
33762306a36Sopenharmony_ci					      uint64_t addr)
33862306a36Sopenharmony_ci{
33962306a36Sopenharmony_ci	return addr / vm->bbm.bb_size;
34062306a36Sopenharmony_ci}
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_ci/*
34362306a36Sopenharmony_ci * Calculate the physical start address of a given big block id.
34462306a36Sopenharmony_ci */
34562306a36Sopenharmony_cistatic uint64_t virtio_mem_bb_id_to_phys(struct virtio_mem *vm,
34662306a36Sopenharmony_ci					 unsigned long bb_id)
34762306a36Sopenharmony_ci{
34862306a36Sopenharmony_ci	return bb_id * vm->bbm.bb_size;
34962306a36Sopenharmony_ci}
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_ci/*
35262306a36Sopenharmony_ci * Calculate the subblock id of a given address.
35362306a36Sopenharmony_ci */
35462306a36Sopenharmony_cistatic unsigned long virtio_mem_phys_to_sb_id(struct virtio_mem *vm,
35562306a36Sopenharmony_ci					      unsigned long addr)
35662306a36Sopenharmony_ci{
35762306a36Sopenharmony_ci	const unsigned long mb_id = virtio_mem_phys_to_mb_id(addr);
35862306a36Sopenharmony_ci	const unsigned long mb_addr = virtio_mem_mb_id_to_phys(mb_id);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	return (addr - mb_addr) / vm->sbm.sb_size;
36162306a36Sopenharmony_ci}
36262306a36Sopenharmony_ci
36362306a36Sopenharmony_ci/*
36462306a36Sopenharmony_ci * Set the state of a big block, taking care of the state counter.
36562306a36Sopenharmony_ci */
36662306a36Sopenharmony_cistatic void virtio_mem_bbm_set_bb_state(struct virtio_mem *vm,
36762306a36Sopenharmony_ci					unsigned long bb_id,
36862306a36Sopenharmony_ci					enum virtio_mem_bbm_bb_state state)
36962306a36Sopenharmony_ci{
37062306a36Sopenharmony_ci	const unsigned long idx = bb_id - vm->bbm.first_bb_id;
37162306a36Sopenharmony_ci	enum virtio_mem_bbm_bb_state old_state;
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	old_state = vm->bbm.bb_states[idx];
37462306a36Sopenharmony_ci	vm->bbm.bb_states[idx] = state;
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci	BUG_ON(vm->bbm.bb_count[old_state] == 0);
37762306a36Sopenharmony_ci	vm->bbm.bb_count[old_state]--;
37862306a36Sopenharmony_ci	vm->bbm.bb_count[state]++;
37962306a36Sopenharmony_ci}
38062306a36Sopenharmony_ci
38162306a36Sopenharmony_ci/*
38262306a36Sopenharmony_ci * Get the state of a big block.
38362306a36Sopenharmony_ci */
38462306a36Sopenharmony_cistatic enum virtio_mem_bbm_bb_state virtio_mem_bbm_get_bb_state(struct virtio_mem *vm,
38562306a36Sopenharmony_ci								unsigned long bb_id)
38662306a36Sopenharmony_ci{
38762306a36Sopenharmony_ci	return vm->bbm.bb_states[bb_id - vm->bbm.first_bb_id];
38862306a36Sopenharmony_ci}
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci/*
39162306a36Sopenharmony_ci * Prepare the big block state array for the next big block.
39262306a36Sopenharmony_ci */
39362306a36Sopenharmony_cistatic int virtio_mem_bbm_bb_states_prepare_next_bb(struct virtio_mem *vm)
39462306a36Sopenharmony_ci{
39562306a36Sopenharmony_ci	unsigned long old_bytes = vm->bbm.next_bb_id - vm->bbm.first_bb_id;
39662306a36Sopenharmony_ci	unsigned long new_bytes = old_bytes + 1;
39762306a36Sopenharmony_ci	int old_pages = PFN_UP(old_bytes);
39862306a36Sopenharmony_ci	int new_pages = PFN_UP(new_bytes);
39962306a36Sopenharmony_ci	uint8_t *new_array;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	if (vm->bbm.bb_states && old_pages == new_pages)
40262306a36Sopenharmony_ci		return 0;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	new_array = vzalloc(new_pages * PAGE_SIZE);
40562306a36Sopenharmony_ci	if (!new_array)
40662306a36Sopenharmony_ci		return -ENOMEM;
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
40962306a36Sopenharmony_ci	if (vm->bbm.bb_states)
41062306a36Sopenharmony_ci		memcpy(new_array, vm->bbm.bb_states, old_pages * PAGE_SIZE);
41162306a36Sopenharmony_ci	vfree(vm->bbm.bb_states);
41262306a36Sopenharmony_ci	vm->bbm.bb_states = new_array;
41362306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
41462306a36Sopenharmony_ci
41562306a36Sopenharmony_ci	return 0;
41662306a36Sopenharmony_ci}
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci#define virtio_mem_bbm_for_each_bb(_vm, _bb_id, _state) \
41962306a36Sopenharmony_ci	for (_bb_id = vm->bbm.first_bb_id; \
42062306a36Sopenharmony_ci	     _bb_id < vm->bbm.next_bb_id && _vm->bbm.bb_count[_state]; \
42162306a36Sopenharmony_ci	     _bb_id++) \
42262306a36Sopenharmony_ci		if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state)
42362306a36Sopenharmony_ci
42462306a36Sopenharmony_ci#define virtio_mem_bbm_for_each_bb_rev(_vm, _bb_id, _state) \
42562306a36Sopenharmony_ci	for (_bb_id = vm->bbm.next_bb_id - 1; \
42662306a36Sopenharmony_ci	     _bb_id >= vm->bbm.first_bb_id && _vm->bbm.bb_count[_state]; \
42762306a36Sopenharmony_ci	     _bb_id--) \
42862306a36Sopenharmony_ci		if (virtio_mem_bbm_get_bb_state(_vm, _bb_id) == _state)
42962306a36Sopenharmony_ci
43062306a36Sopenharmony_ci/*
43162306a36Sopenharmony_ci * Set the state of a memory block, taking care of the state counter.
43262306a36Sopenharmony_ci */
43362306a36Sopenharmony_cistatic void virtio_mem_sbm_set_mb_state(struct virtio_mem *vm,
43462306a36Sopenharmony_ci					unsigned long mb_id, uint8_t state)
43562306a36Sopenharmony_ci{
43662306a36Sopenharmony_ci	const unsigned long idx = mb_id - vm->sbm.first_mb_id;
43762306a36Sopenharmony_ci	uint8_t old_state;
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	old_state = vm->sbm.mb_states[idx];
44062306a36Sopenharmony_ci	vm->sbm.mb_states[idx] = state;
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	BUG_ON(vm->sbm.mb_count[old_state] == 0);
44362306a36Sopenharmony_ci	vm->sbm.mb_count[old_state]--;
44462306a36Sopenharmony_ci	vm->sbm.mb_count[state]++;
44562306a36Sopenharmony_ci}
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci/*
44862306a36Sopenharmony_ci * Get the state of a memory block.
44962306a36Sopenharmony_ci */
45062306a36Sopenharmony_cistatic uint8_t virtio_mem_sbm_get_mb_state(struct virtio_mem *vm,
45162306a36Sopenharmony_ci					   unsigned long mb_id)
45262306a36Sopenharmony_ci{
45362306a36Sopenharmony_ci	const unsigned long idx = mb_id - vm->sbm.first_mb_id;
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci	return vm->sbm.mb_states[idx];
45662306a36Sopenharmony_ci}
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci/*
45962306a36Sopenharmony_ci * Prepare the state array for the next memory block.
46062306a36Sopenharmony_ci */
46162306a36Sopenharmony_cistatic int virtio_mem_sbm_mb_states_prepare_next_mb(struct virtio_mem *vm)
46262306a36Sopenharmony_ci{
46362306a36Sopenharmony_ci	int old_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id);
46462306a36Sopenharmony_ci	int new_pages = PFN_UP(vm->sbm.next_mb_id - vm->sbm.first_mb_id + 1);
46562306a36Sopenharmony_ci	uint8_t *new_array;
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	if (vm->sbm.mb_states && old_pages == new_pages)
46862306a36Sopenharmony_ci		return 0;
46962306a36Sopenharmony_ci
47062306a36Sopenharmony_ci	new_array = vzalloc(new_pages * PAGE_SIZE);
47162306a36Sopenharmony_ci	if (!new_array)
47262306a36Sopenharmony_ci		return -ENOMEM;
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
47562306a36Sopenharmony_ci	if (vm->sbm.mb_states)
47662306a36Sopenharmony_ci		memcpy(new_array, vm->sbm.mb_states, old_pages * PAGE_SIZE);
47762306a36Sopenharmony_ci	vfree(vm->sbm.mb_states);
47862306a36Sopenharmony_ci	vm->sbm.mb_states = new_array;
47962306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	return 0;
48262306a36Sopenharmony_ci}
48362306a36Sopenharmony_ci
48462306a36Sopenharmony_ci#define virtio_mem_sbm_for_each_mb(_vm, _mb_id, _state) \
48562306a36Sopenharmony_ci	for (_mb_id = _vm->sbm.first_mb_id; \
48662306a36Sopenharmony_ci	     _mb_id < _vm->sbm.next_mb_id && _vm->sbm.mb_count[_state]; \
48762306a36Sopenharmony_ci	     _mb_id++) \
48862306a36Sopenharmony_ci		if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state)
48962306a36Sopenharmony_ci
49062306a36Sopenharmony_ci#define virtio_mem_sbm_for_each_mb_rev(_vm, _mb_id, _state) \
49162306a36Sopenharmony_ci	for (_mb_id = _vm->sbm.next_mb_id - 1; \
49262306a36Sopenharmony_ci	     _mb_id >= _vm->sbm.first_mb_id && _vm->sbm.mb_count[_state]; \
49362306a36Sopenharmony_ci	     _mb_id--) \
49462306a36Sopenharmony_ci		if (virtio_mem_sbm_get_mb_state(_vm, _mb_id) == _state)
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci/*
49762306a36Sopenharmony_ci * Calculate the bit number in the subblock bitmap for the given subblock
49862306a36Sopenharmony_ci * inside the given memory block.
49962306a36Sopenharmony_ci */
50062306a36Sopenharmony_cistatic int virtio_mem_sbm_sb_state_bit_nr(struct virtio_mem *vm,
50162306a36Sopenharmony_ci					  unsigned long mb_id, int sb_id)
50262306a36Sopenharmony_ci{
50362306a36Sopenharmony_ci	return (mb_id - vm->sbm.first_mb_id) * vm->sbm.sbs_per_mb + sb_id;
50462306a36Sopenharmony_ci}
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci/*
50762306a36Sopenharmony_ci * Mark all selected subblocks plugged.
50862306a36Sopenharmony_ci *
50962306a36Sopenharmony_ci * Will not modify the state of the memory block.
51062306a36Sopenharmony_ci */
51162306a36Sopenharmony_cistatic void virtio_mem_sbm_set_sb_plugged(struct virtio_mem *vm,
51262306a36Sopenharmony_ci					  unsigned long mb_id, int sb_id,
51362306a36Sopenharmony_ci					  int count)
51462306a36Sopenharmony_ci{
51562306a36Sopenharmony_ci	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
51662306a36Sopenharmony_ci
51762306a36Sopenharmony_ci	__bitmap_set(vm->sbm.sb_states, bit, count);
51862306a36Sopenharmony_ci}
51962306a36Sopenharmony_ci
52062306a36Sopenharmony_ci/*
52162306a36Sopenharmony_ci * Mark all selected subblocks unplugged.
52262306a36Sopenharmony_ci *
52362306a36Sopenharmony_ci * Will not modify the state of the memory block.
52462306a36Sopenharmony_ci */
52562306a36Sopenharmony_cistatic void virtio_mem_sbm_set_sb_unplugged(struct virtio_mem *vm,
52662306a36Sopenharmony_ci					    unsigned long mb_id, int sb_id,
52762306a36Sopenharmony_ci					    int count)
52862306a36Sopenharmony_ci{
52962306a36Sopenharmony_ci	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	__bitmap_clear(vm->sbm.sb_states, bit, count);
53262306a36Sopenharmony_ci}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci/*
53562306a36Sopenharmony_ci * Test if all selected subblocks are plugged.
53662306a36Sopenharmony_ci */
53762306a36Sopenharmony_cistatic bool virtio_mem_sbm_test_sb_plugged(struct virtio_mem *vm,
53862306a36Sopenharmony_ci					   unsigned long mb_id, int sb_id,
53962306a36Sopenharmony_ci					   int count)
54062306a36Sopenharmony_ci{
54162306a36Sopenharmony_ci	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	if (count == 1)
54462306a36Sopenharmony_ci		return test_bit(bit, vm->sbm.sb_states);
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	/* TODO: Helper similar to bitmap_set() */
54762306a36Sopenharmony_ci	return find_next_zero_bit(vm->sbm.sb_states, bit + count, bit) >=
54862306a36Sopenharmony_ci	       bit + count;
54962306a36Sopenharmony_ci}
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci/*
55262306a36Sopenharmony_ci * Test if all selected subblocks are unplugged.
55362306a36Sopenharmony_ci */
55462306a36Sopenharmony_cistatic bool virtio_mem_sbm_test_sb_unplugged(struct virtio_mem *vm,
55562306a36Sopenharmony_ci					     unsigned long mb_id, int sb_id,
55662306a36Sopenharmony_ci					     int count)
55762306a36Sopenharmony_ci{
55862306a36Sopenharmony_ci	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, sb_id);
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_ci	/* TODO: Helper similar to bitmap_set() */
56162306a36Sopenharmony_ci	return find_next_bit(vm->sbm.sb_states, bit + count, bit) >=
56262306a36Sopenharmony_ci	       bit + count;
56362306a36Sopenharmony_ci}
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci/*
56662306a36Sopenharmony_ci * Find the first unplugged subblock. Returns vm->sbm.sbs_per_mb in case there is
56762306a36Sopenharmony_ci * none.
56862306a36Sopenharmony_ci */
56962306a36Sopenharmony_cistatic int virtio_mem_sbm_first_unplugged_sb(struct virtio_mem *vm,
57062306a36Sopenharmony_ci					    unsigned long mb_id)
57162306a36Sopenharmony_ci{
57262306a36Sopenharmony_ci	const int bit = virtio_mem_sbm_sb_state_bit_nr(vm, mb_id, 0);
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	return find_next_zero_bit(vm->sbm.sb_states,
57562306a36Sopenharmony_ci				  bit + vm->sbm.sbs_per_mb, bit) - bit;
57662306a36Sopenharmony_ci}
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci/*
57962306a36Sopenharmony_ci * Prepare the subblock bitmap for the next memory block.
58062306a36Sopenharmony_ci */
58162306a36Sopenharmony_cistatic int virtio_mem_sbm_sb_states_prepare_next_mb(struct virtio_mem *vm)
58262306a36Sopenharmony_ci{
58362306a36Sopenharmony_ci	const unsigned long old_nb_mb = vm->sbm.next_mb_id - vm->sbm.first_mb_id;
58462306a36Sopenharmony_ci	const unsigned long old_nb_bits = old_nb_mb * vm->sbm.sbs_per_mb;
58562306a36Sopenharmony_ci	const unsigned long new_nb_bits = (old_nb_mb + 1) * vm->sbm.sbs_per_mb;
58662306a36Sopenharmony_ci	int old_pages = PFN_UP(BITS_TO_LONGS(old_nb_bits) * sizeof(long));
58762306a36Sopenharmony_ci	int new_pages = PFN_UP(BITS_TO_LONGS(new_nb_bits) * sizeof(long));
58862306a36Sopenharmony_ci	unsigned long *new_bitmap, *old_bitmap;
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci	if (vm->sbm.sb_states && old_pages == new_pages)
59162306a36Sopenharmony_ci		return 0;
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	new_bitmap = vzalloc(new_pages * PAGE_SIZE);
59462306a36Sopenharmony_ci	if (!new_bitmap)
59562306a36Sopenharmony_ci		return -ENOMEM;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
59862306a36Sopenharmony_ci	if (vm->sbm.sb_states)
59962306a36Sopenharmony_ci		memcpy(new_bitmap, vm->sbm.sb_states, old_pages * PAGE_SIZE);
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	old_bitmap = vm->sbm.sb_states;
60262306a36Sopenharmony_ci	vm->sbm.sb_states = new_bitmap;
60362306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	vfree(old_bitmap);
60662306a36Sopenharmony_ci	return 0;
60762306a36Sopenharmony_ci}
60862306a36Sopenharmony_ci
60962306a36Sopenharmony_ci/*
61062306a36Sopenharmony_ci * Test if we could add memory without creating too much offline memory -
61162306a36Sopenharmony_ci * to avoid running OOM if memory is getting onlined deferred.
61262306a36Sopenharmony_ci */
61362306a36Sopenharmony_cistatic bool virtio_mem_could_add_memory(struct virtio_mem *vm, uint64_t size)
61462306a36Sopenharmony_ci{
61562306a36Sopenharmony_ci	if (WARN_ON_ONCE(size > vm->offline_threshold))
61662306a36Sopenharmony_ci		return false;
61762306a36Sopenharmony_ci
61862306a36Sopenharmony_ci	return atomic64_read(&vm->offline_size) + size <= vm->offline_threshold;
61962306a36Sopenharmony_ci}
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci/*
62262306a36Sopenharmony_ci * Try adding memory to Linux. Will usually only fail if out of memory.
62362306a36Sopenharmony_ci *
62462306a36Sopenharmony_ci * Must not be called with the vm->hotplug_mutex held (possible deadlock with
62562306a36Sopenharmony_ci * onlining code).
62662306a36Sopenharmony_ci *
62762306a36Sopenharmony_ci * Will not modify the state of memory blocks in virtio-mem.
62862306a36Sopenharmony_ci */
62962306a36Sopenharmony_cistatic int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr,
63062306a36Sopenharmony_ci				 uint64_t size)
63162306a36Sopenharmony_ci{
63262306a36Sopenharmony_ci	int rc;
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ci	/*
63562306a36Sopenharmony_ci	 * When force-unloading the driver and we still have memory added to
63662306a36Sopenharmony_ci	 * Linux, the resource name has to stay.
63762306a36Sopenharmony_ci	 */
63862306a36Sopenharmony_ci	if (!vm->resource_name) {
63962306a36Sopenharmony_ci		vm->resource_name = kstrdup_const("System RAM (virtio_mem)",
64062306a36Sopenharmony_ci						  GFP_KERNEL);
64162306a36Sopenharmony_ci		if (!vm->resource_name)
64262306a36Sopenharmony_ci			return -ENOMEM;
64362306a36Sopenharmony_ci	}
64462306a36Sopenharmony_ci
64562306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "adding memory: 0x%llx - 0x%llx\n", addr,
64662306a36Sopenharmony_ci		addr + size - 1);
64762306a36Sopenharmony_ci	/* Memory might get onlined immediately. */
64862306a36Sopenharmony_ci	atomic64_add(size, &vm->offline_size);
64962306a36Sopenharmony_ci	rc = add_memory_driver_managed(vm->mgid, addr, size, vm->resource_name,
65062306a36Sopenharmony_ci				       MHP_MERGE_RESOURCE | MHP_NID_IS_MGID);
65162306a36Sopenharmony_ci	if (rc) {
65262306a36Sopenharmony_ci		atomic64_sub(size, &vm->offline_size);
65362306a36Sopenharmony_ci		dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
65462306a36Sopenharmony_ci		/*
65562306a36Sopenharmony_ci		 * TODO: Linux MM does not properly clean up yet in all cases
65662306a36Sopenharmony_ci		 * where adding of memory failed - especially on -ENOMEM.
65762306a36Sopenharmony_ci		 */
65862306a36Sopenharmony_ci	}
65962306a36Sopenharmony_ci	return rc;
66062306a36Sopenharmony_ci}
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci/*
66362306a36Sopenharmony_ci * See virtio_mem_add_memory(): Try adding a single Linux memory block.
66462306a36Sopenharmony_ci */
66562306a36Sopenharmony_cistatic int virtio_mem_sbm_add_mb(struct virtio_mem *vm, unsigned long mb_id)
66662306a36Sopenharmony_ci{
66762306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
66862306a36Sopenharmony_ci	const uint64_t size = memory_block_size_bytes();
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	return virtio_mem_add_memory(vm, addr, size);
67162306a36Sopenharmony_ci}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_ci/*
67462306a36Sopenharmony_ci * See virtio_mem_add_memory(): Try adding a big block.
67562306a36Sopenharmony_ci */
67662306a36Sopenharmony_cistatic int virtio_mem_bbm_add_bb(struct virtio_mem *vm, unsigned long bb_id)
67762306a36Sopenharmony_ci{
67862306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
67962306a36Sopenharmony_ci	const uint64_t size = vm->bbm.bb_size;
68062306a36Sopenharmony_ci
68162306a36Sopenharmony_ci	return virtio_mem_add_memory(vm, addr, size);
68262306a36Sopenharmony_ci}
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci/*
68562306a36Sopenharmony_ci * Try removing memory from Linux. Will only fail if memory blocks aren't
68662306a36Sopenharmony_ci * offline.
68762306a36Sopenharmony_ci *
68862306a36Sopenharmony_ci * Must not be called with the vm->hotplug_mutex held (possible deadlock with
68962306a36Sopenharmony_ci * onlining code).
69062306a36Sopenharmony_ci *
69162306a36Sopenharmony_ci * Will not modify the state of memory blocks in virtio-mem.
69262306a36Sopenharmony_ci */
69362306a36Sopenharmony_cistatic int virtio_mem_remove_memory(struct virtio_mem *vm, uint64_t addr,
69462306a36Sopenharmony_ci				    uint64_t size)
69562306a36Sopenharmony_ci{
69662306a36Sopenharmony_ci	int rc;
69762306a36Sopenharmony_ci
69862306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "removing memory: 0x%llx - 0x%llx\n", addr,
69962306a36Sopenharmony_ci		addr + size - 1);
70062306a36Sopenharmony_ci	rc = remove_memory(addr, size);
70162306a36Sopenharmony_ci	if (!rc) {
70262306a36Sopenharmony_ci		atomic64_sub(size, &vm->offline_size);
70362306a36Sopenharmony_ci		/*
70462306a36Sopenharmony_ci		 * We might have freed up memory we can now unplug, retry
70562306a36Sopenharmony_ci		 * immediately instead of waiting.
70662306a36Sopenharmony_ci		 */
70762306a36Sopenharmony_ci		virtio_mem_retry(vm);
70862306a36Sopenharmony_ci	} else {
70962306a36Sopenharmony_ci		dev_dbg(&vm->vdev->dev, "removing memory failed: %d\n", rc);
71062306a36Sopenharmony_ci	}
71162306a36Sopenharmony_ci	return rc;
71262306a36Sopenharmony_ci}
71362306a36Sopenharmony_ci
71462306a36Sopenharmony_ci/*
71562306a36Sopenharmony_ci * See virtio_mem_remove_memory(): Try removing a single Linux memory block.
71662306a36Sopenharmony_ci */
71762306a36Sopenharmony_cistatic int virtio_mem_sbm_remove_mb(struct virtio_mem *vm, unsigned long mb_id)
71862306a36Sopenharmony_ci{
71962306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
72062306a36Sopenharmony_ci	const uint64_t size = memory_block_size_bytes();
72162306a36Sopenharmony_ci
72262306a36Sopenharmony_ci	return virtio_mem_remove_memory(vm, addr, size);
72362306a36Sopenharmony_ci}
72462306a36Sopenharmony_ci
72562306a36Sopenharmony_ci/*
72662306a36Sopenharmony_ci * Try offlining and removing memory from Linux.
72762306a36Sopenharmony_ci *
72862306a36Sopenharmony_ci * Must not be called with the vm->hotplug_mutex held (possible deadlock with
72962306a36Sopenharmony_ci * onlining code).
73062306a36Sopenharmony_ci *
73162306a36Sopenharmony_ci * Will not modify the state of memory blocks in virtio-mem.
73262306a36Sopenharmony_ci */
73362306a36Sopenharmony_cistatic int virtio_mem_offline_and_remove_memory(struct virtio_mem *vm,
73462306a36Sopenharmony_ci						uint64_t addr,
73562306a36Sopenharmony_ci						uint64_t size)
73662306a36Sopenharmony_ci{
73762306a36Sopenharmony_ci	int rc;
73862306a36Sopenharmony_ci
73962306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev,
74062306a36Sopenharmony_ci		"offlining and removing memory: 0x%llx - 0x%llx\n", addr,
74162306a36Sopenharmony_ci		addr + size - 1);
74262306a36Sopenharmony_ci
74362306a36Sopenharmony_ci	rc = offline_and_remove_memory(addr, size);
74462306a36Sopenharmony_ci	if (!rc) {
74562306a36Sopenharmony_ci		atomic64_sub(size, &vm->offline_size);
74662306a36Sopenharmony_ci		/*
74762306a36Sopenharmony_ci		 * We might have freed up memory we can now unplug, retry
74862306a36Sopenharmony_ci		 * immediately instead of waiting.
74962306a36Sopenharmony_ci		 */
75062306a36Sopenharmony_ci		virtio_mem_retry(vm);
75162306a36Sopenharmony_ci		return 0;
75262306a36Sopenharmony_ci	}
75362306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "offlining and removing memory failed: %d\n", rc);
75462306a36Sopenharmony_ci	/*
75562306a36Sopenharmony_ci	 * We don't really expect this to fail, because we fake-offlined all
75662306a36Sopenharmony_ci	 * memory already. But it could fail in corner cases.
75762306a36Sopenharmony_ci	 */
75862306a36Sopenharmony_ci	WARN_ON_ONCE(rc != -ENOMEM && rc != -EBUSY);
75962306a36Sopenharmony_ci	return rc == -ENOMEM ? -ENOMEM : -EBUSY;
76062306a36Sopenharmony_ci}
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci/*
76362306a36Sopenharmony_ci * See virtio_mem_offline_and_remove_memory(): Try offlining and removing
76462306a36Sopenharmony_ci * a single Linux memory block.
76562306a36Sopenharmony_ci */
76662306a36Sopenharmony_cistatic int virtio_mem_sbm_offline_and_remove_mb(struct virtio_mem *vm,
76762306a36Sopenharmony_ci						unsigned long mb_id)
76862306a36Sopenharmony_ci{
76962306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id);
77062306a36Sopenharmony_ci	const uint64_t size = memory_block_size_bytes();
77162306a36Sopenharmony_ci
77262306a36Sopenharmony_ci	return virtio_mem_offline_and_remove_memory(vm, addr, size);
77362306a36Sopenharmony_ci}
77462306a36Sopenharmony_ci
77562306a36Sopenharmony_ci/*
77662306a36Sopenharmony_ci * Try (offlining and) removing memory from Linux in case all subblocks are
77762306a36Sopenharmony_ci * unplugged. Can be called on online and offline memory blocks.
77862306a36Sopenharmony_ci *
77962306a36Sopenharmony_ci * May modify the state of memory blocks in virtio-mem.
78062306a36Sopenharmony_ci */
78162306a36Sopenharmony_cistatic int virtio_mem_sbm_try_remove_unplugged_mb(struct virtio_mem *vm,
78262306a36Sopenharmony_ci						  unsigned long mb_id)
78362306a36Sopenharmony_ci{
78462306a36Sopenharmony_ci	int rc;
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci	/*
78762306a36Sopenharmony_ci	 * Once all subblocks of a memory block were unplugged, offline and
78862306a36Sopenharmony_ci	 * remove it.
78962306a36Sopenharmony_ci	 */
79062306a36Sopenharmony_ci	if (!virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
79162306a36Sopenharmony_ci		return 0;
79262306a36Sopenharmony_ci
79362306a36Sopenharmony_ci	/* offline_and_remove_memory() works for online and offline memory. */
79462306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
79562306a36Sopenharmony_ci	rc = virtio_mem_sbm_offline_and_remove_mb(vm, mb_id);
79662306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
79762306a36Sopenharmony_ci	if (!rc)
79862306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
79962306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_UNUSED);
80062306a36Sopenharmony_ci	return rc;
80162306a36Sopenharmony_ci}
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci/*
80462306a36Sopenharmony_ci * See virtio_mem_offline_and_remove_memory(): Try to offline and remove a
80562306a36Sopenharmony_ci * all Linux memory blocks covered by the big block.
80662306a36Sopenharmony_ci */
80762306a36Sopenharmony_cistatic int virtio_mem_bbm_offline_and_remove_bb(struct virtio_mem *vm,
80862306a36Sopenharmony_ci						unsigned long bb_id)
80962306a36Sopenharmony_ci{
81062306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
81162306a36Sopenharmony_ci	const uint64_t size = vm->bbm.bb_size;
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	return virtio_mem_offline_and_remove_memory(vm, addr, size);
81462306a36Sopenharmony_ci}
81562306a36Sopenharmony_ci
81662306a36Sopenharmony_ci/*
81762306a36Sopenharmony_ci * Trigger the workqueue so the device can perform its magic.
81862306a36Sopenharmony_ci */
81962306a36Sopenharmony_cistatic void virtio_mem_retry(struct virtio_mem *vm)
82062306a36Sopenharmony_ci{
82162306a36Sopenharmony_ci	unsigned long flags;
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci	spin_lock_irqsave(&vm->removal_lock, flags);
82462306a36Sopenharmony_ci	if (!vm->removing)
82562306a36Sopenharmony_ci		queue_work(system_freezable_wq, &vm->wq);
82662306a36Sopenharmony_ci	spin_unlock_irqrestore(&vm->removal_lock, flags);
82762306a36Sopenharmony_ci}
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_cistatic int virtio_mem_translate_node_id(struct virtio_mem *vm, uint16_t node_id)
83062306a36Sopenharmony_ci{
83162306a36Sopenharmony_ci	int node = NUMA_NO_NODE;
83262306a36Sopenharmony_ci
83362306a36Sopenharmony_ci#if defined(CONFIG_ACPI_NUMA)
83462306a36Sopenharmony_ci	if (virtio_has_feature(vm->vdev, VIRTIO_MEM_F_ACPI_PXM))
83562306a36Sopenharmony_ci		node = pxm_to_node(node_id);
83662306a36Sopenharmony_ci#endif
83762306a36Sopenharmony_ci	return node;
83862306a36Sopenharmony_ci}
83962306a36Sopenharmony_ci
84062306a36Sopenharmony_ci/*
84162306a36Sopenharmony_ci * Test if a virtio-mem device overlaps with the given range. Can be called
84262306a36Sopenharmony_ci * from (notifier) callbacks lockless.
84362306a36Sopenharmony_ci */
84462306a36Sopenharmony_cistatic bool virtio_mem_overlaps_range(struct virtio_mem *vm, uint64_t start,
84562306a36Sopenharmony_ci				      uint64_t size)
84662306a36Sopenharmony_ci{
84762306a36Sopenharmony_ci	return start < vm->addr + vm->region_size && vm->addr < start + size;
84862306a36Sopenharmony_ci}
84962306a36Sopenharmony_ci
85062306a36Sopenharmony_ci/*
85162306a36Sopenharmony_ci * Test if a virtio-mem device contains a given range. Can be called from
85262306a36Sopenharmony_ci * (notifier) callbacks lockless.
85362306a36Sopenharmony_ci */
85462306a36Sopenharmony_cistatic bool virtio_mem_contains_range(struct virtio_mem *vm, uint64_t start,
85562306a36Sopenharmony_ci				      uint64_t size)
85662306a36Sopenharmony_ci{
85762306a36Sopenharmony_ci	return start >= vm->addr && start + size <= vm->addr + vm->region_size;
85862306a36Sopenharmony_ci}
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_cistatic int virtio_mem_sbm_notify_going_online(struct virtio_mem *vm,
86162306a36Sopenharmony_ci					      unsigned long mb_id)
86262306a36Sopenharmony_ci{
86362306a36Sopenharmony_ci	switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
86462306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
86562306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_OFFLINE:
86662306a36Sopenharmony_ci		return NOTIFY_OK;
86762306a36Sopenharmony_ci	default:
86862306a36Sopenharmony_ci		break;
86962306a36Sopenharmony_ci	}
87062306a36Sopenharmony_ci	dev_warn_ratelimited(&vm->vdev->dev,
87162306a36Sopenharmony_ci			     "memory block onlining denied\n");
87262306a36Sopenharmony_ci	return NOTIFY_BAD;
87362306a36Sopenharmony_ci}
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_cistatic void virtio_mem_sbm_notify_offline(struct virtio_mem *vm,
87662306a36Sopenharmony_ci					  unsigned long mb_id)
87762306a36Sopenharmony_ci{
87862306a36Sopenharmony_ci	switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
87962306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL:
88062306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL:
88162306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
88262306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
88362306a36Sopenharmony_ci		break;
88462306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_KERNEL:
88562306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_MOVABLE:
88662306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
88762306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_OFFLINE);
88862306a36Sopenharmony_ci		break;
88962306a36Sopenharmony_ci	default:
89062306a36Sopenharmony_ci		BUG();
89162306a36Sopenharmony_ci		break;
89262306a36Sopenharmony_ci	}
89362306a36Sopenharmony_ci}
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_cistatic void virtio_mem_sbm_notify_online(struct virtio_mem *vm,
89662306a36Sopenharmony_ci					 unsigned long mb_id,
89762306a36Sopenharmony_ci					 unsigned long start_pfn)
89862306a36Sopenharmony_ci{
89962306a36Sopenharmony_ci	const bool is_movable = is_zone_movable_page(pfn_to_page(start_pfn));
90062306a36Sopenharmony_ci	int new_state;
90162306a36Sopenharmony_ci
90262306a36Sopenharmony_ci	switch (virtio_mem_sbm_get_mb_state(vm, mb_id)) {
90362306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
90462306a36Sopenharmony_ci		new_state = VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL;
90562306a36Sopenharmony_ci		if (is_movable)
90662306a36Sopenharmony_ci			new_state = VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL;
90762306a36Sopenharmony_ci		break;
90862306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_OFFLINE:
90962306a36Sopenharmony_ci		new_state = VIRTIO_MEM_SBM_MB_KERNEL;
91062306a36Sopenharmony_ci		if (is_movable)
91162306a36Sopenharmony_ci			new_state = VIRTIO_MEM_SBM_MB_MOVABLE;
91262306a36Sopenharmony_ci		break;
91362306a36Sopenharmony_ci	default:
91462306a36Sopenharmony_ci		BUG();
91562306a36Sopenharmony_ci		break;
91662306a36Sopenharmony_ci	}
91762306a36Sopenharmony_ci	virtio_mem_sbm_set_mb_state(vm, mb_id, new_state);
91862306a36Sopenharmony_ci}
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_cistatic void virtio_mem_sbm_notify_going_offline(struct virtio_mem *vm,
92162306a36Sopenharmony_ci						unsigned long mb_id)
92262306a36Sopenharmony_ci{
92362306a36Sopenharmony_ci	const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size);
92462306a36Sopenharmony_ci	unsigned long pfn;
92562306a36Sopenharmony_ci	int sb_id;
92662306a36Sopenharmony_ci
92762306a36Sopenharmony_ci	for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) {
92862306a36Sopenharmony_ci		if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
92962306a36Sopenharmony_ci			continue;
93062306a36Sopenharmony_ci		pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
93162306a36Sopenharmony_ci			       sb_id * vm->sbm.sb_size);
93262306a36Sopenharmony_ci		virtio_mem_fake_offline_going_offline(pfn, nr_pages);
93362306a36Sopenharmony_ci	}
93462306a36Sopenharmony_ci}
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_cistatic void virtio_mem_sbm_notify_cancel_offline(struct virtio_mem *vm,
93762306a36Sopenharmony_ci						 unsigned long mb_id)
93862306a36Sopenharmony_ci{
93962306a36Sopenharmony_ci	const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size);
94062306a36Sopenharmony_ci	unsigned long pfn;
94162306a36Sopenharmony_ci	int sb_id;
94262306a36Sopenharmony_ci
94362306a36Sopenharmony_ci	for (sb_id = 0; sb_id < vm->sbm.sbs_per_mb; sb_id++) {
94462306a36Sopenharmony_ci		if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
94562306a36Sopenharmony_ci			continue;
94662306a36Sopenharmony_ci		pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
94762306a36Sopenharmony_ci			       sb_id * vm->sbm.sb_size);
94862306a36Sopenharmony_ci		virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
94962306a36Sopenharmony_ci	}
95062306a36Sopenharmony_ci}
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_cistatic void virtio_mem_bbm_notify_going_offline(struct virtio_mem *vm,
95362306a36Sopenharmony_ci						unsigned long bb_id,
95462306a36Sopenharmony_ci						unsigned long pfn,
95562306a36Sopenharmony_ci						unsigned long nr_pages)
95662306a36Sopenharmony_ci{
95762306a36Sopenharmony_ci	/*
95862306a36Sopenharmony_ci	 * When marked as "fake-offline", all online memory of this device block
95962306a36Sopenharmony_ci	 * is allocated by us. Otherwise, we don't have any memory allocated.
96062306a36Sopenharmony_ci	 */
96162306a36Sopenharmony_ci	if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
96262306a36Sopenharmony_ci	    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
96362306a36Sopenharmony_ci		return;
96462306a36Sopenharmony_ci	virtio_mem_fake_offline_going_offline(pfn, nr_pages);
96562306a36Sopenharmony_ci}
96662306a36Sopenharmony_ci
96762306a36Sopenharmony_cistatic void virtio_mem_bbm_notify_cancel_offline(struct virtio_mem *vm,
96862306a36Sopenharmony_ci						 unsigned long bb_id,
96962306a36Sopenharmony_ci						 unsigned long pfn,
97062306a36Sopenharmony_ci						 unsigned long nr_pages)
97162306a36Sopenharmony_ci{
97262306a36Sopenharmony_ci	if (virtio_mem_bbm_get_bb_state(vm, bb_id) !=
97362306a36Sopenharmony_ci	    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE)
97462306a36Sopenharmony_ci		return;
97562306a36Sopenharmony_ci	virtio_mem_fake_offline_cancel_offline(pfn, nr_pages);
97662306a36Sopenharmony_ci}
97762306a36Sopenharmony_ci
97862306a36Sopenharmony_ci/*
97962306a36Sopenharmony_ci * This callback will either be called synchronously from add_memory() or
98062306a36Sopenharmony_ci * asynchronously (e.g., triggered via user space). We have to be careful
98162306a36Sopenharmony_ci * with locking when calling add_memory().
98262306a36Sopenharmony_ci */
98362306a36Sopenharmony_cistatic int virtio_mem_memory_notifier_cb(struct notifier_block *nb,
98462306a36Sopenharmony_ci					 unsigned long action, void *arg)
98562306a36Sopenharmony_ci{
98662306a36Sopenharmony_ci	struct virtio_mem *vm = container_of(nb, struct virtio_mem,
98762306a36Sopenharmony_ci					     memory_notifier);
98862306a36Sopenharmony_ci	struct memory_notify *mhp = arg;
98962306a36Sopenharmony_ci	const unsigned long start = PFN_PHYS(mhp->start_pfn);
99062306a36Sopenharmony_ci	const unsigned long size = PFN_PHYS(mhp->nr_pages);
99162306a36Sopenharmony_ci	int rc = NOTIFY_OK;
99262306a36Sopenharmony_ci	unsigned long id;
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci	if (!virtio_mem_overlaps_range(vm, start, size))
99562306a36Sopenharmony_ci		return NOTIFY_DONE;
99662306a36Sopenharmony_ci
99762306a36Sopenharmony_ci	if (vm->in_sbm) {
99862306a36Sopenharmony_ci		id = virtio_mem_phys_to_mb_id(start);
99962306a36Sopenharmony_ci		/*
100062306a36Sopenharmony_ci		 * In SBM, we add memory in separate memory blocks - we expect
100162306a36Sopenharmony_ci		 * it to be onlined/offlined in the same granularity. Bail out
100262306a36Sopenharmony_ci		 * if this ever changes.
100362306a36Sopenharmony_ci		 */
100462306a36Sopenharmony_ci		if (WARN_ON_ONCE(size != memory_block_size_bytes() ||
100562306a36Sopenharmony_ci				 !IS_ALIGNED(start, memory_block_size_bytes())))
100662306a36Sopenharmony_ci			return NOTIFY_BAD;
100762306a36Sopenharmony_ci	} else {
100862306a36Sopenharmony_ci		id = virtio_mem_phys_to_bb_id(vm, start);
100962306a36Sopenharmony_ci		/*
101062306a36Sopenharmony_ci		 * In BBM, we only care about onlining/offlining happening
101162306a36Sopenharmony_ci		 * within a single big block, we don't care about the
101262306a36Sopenharmony_ci		 * actual granularity as we don't track individual Linux
101362306a36Sopenharmony_ci		 * memory blocks.
101462306a36Sopenharmony_ci		 */
101562306a36Sopenharmony_ci		if (WARN_ON_ONCE(id != virtio_mem_phys_to_bb_id(vm, start + size - 1)))
101662306a36Sopenharmony_ci			return NOTIFY_BAD;
101762306a36Sopenharmony_ci	}
101862306a36Sopenharmony_ci
101962306a36Sopenharmony_ci	/*
102062306a36Sopenharmony_ci	 * Avoid circular locking lockdep warnings. We lock the mutex
102162306a36Sopenharmony_ci	 * e.g., in MEM_GOING_ONLINE and unlock it in MEM_ONLINE. The
102262306a36Sopenharmony_ci	 * blocking_notifier_call_chain() has it's own lock, which gets unlocked
102362306a36Sopenharmony_ci	 * between both notifier calls and will bail out. False positive.
102462306a36Sopenharmony_ci	 */
102562306a36Sopenharmony_ci	lockdep_off();
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci	switch (action) {
102862306a36Sopenharmony_ci	case MEM_GOING_OFFLINE:
102962306a36Sopenharmony_ci		mutex_lock(&vm->hotplug_mutex);
103062306a36Sopenharmony_ci		if (vm->removing) {
103162306a36Sopenharmony_ci			rc = notifier_from_errno(-EBUSY);
103262306a36Sopenharmony_ci			mutex_unlock(&vm->hotplug_mutex);
103362306a36Sopenharmony_ci			break;
103462306a36Sopenharmony_ci		}
103562306a36Sopenharmony_ci		vm->hotplug_active = true;
103662306a36Sopenharmony_ci		if (vm->in_sbm)
103762306a36Sopenharmony_ci			virtio_mem_sbm_notify_going_offline(vm, id);
103862306a36Sopenharmony_ci		else
103962306a36Sopenharmony_ci			virtio_mem_bbm_notify_going_offline(vm, id,
104062306a36Sopenharmony_ci							    mhp->start_pfn,
104162306a36Sopenharmony_ci							    mhp->nr_pages);
104262306a36Sopenharmony_ci		break;
104362306a36Sopenharmony_ci	case MEM_GOING_ONLINE:
104462306a36Sopenharmony_ci		mutex_lock(&vm->hotplug_mutex);
104562306a36Sopenharmony_ci		if (vm->removing) {
104662306a36Sopenharmony_ci			rc = notifier_from_errno(-EBUSY);
104762306a36Sopenharmony_ci			mutex_unlock(&vm->hotplug_mutex);
104862306a36Sopenharmony_ci			break;
104962306a36Sopenharmony_ci		}
105062306a36Sopenharmony_ci		vm->hotplug_active = true;
105162306a36Sopenharmony_ci		if (vm->in_sbm)
105262306a36Sopenharmony_ci			rc = virtio_mem_sbm_notify_going_online(vm, id);
105362306a36Sopenharmony_ci		break;
105462306a36Sopenharmony_ci	case MEM_OFFLINE:
105562306a36Sopenharmony_ci		if (vm->in_sbm)
105662306a36Sopenharmony_ci			virtio_mem_sbm_notify_offline(vm, id);
105762306a36Sopenharmony_ci
105862306a36Sopenharmony_ci		atomic64_add(size, &vm->offline_size);
105962306a36Sopenharmony_ci		/*
106062306a36Sopenharmony_ci		 * Trigger the workqueue. Now that we have some offline memory,
106162306a36Sopenharmony_ci		 * maybe we can handle pending unplug requests.
106262306a36Sopenharmony_ci		 */
106362306a36Sopenharmony_ci		if (!unplug_online)
106462306a36Sopenharmony_ci			virtio_mem_retry(vm);
106562306a36Sopenharmony_ci
106662306a36Sopenharmony_ci		vm->hotplug_active = false;
106762306a36Sopenharmony_ci		mutex_unlock(&vm->hotplug_mutex);
106862306a36Sopenharmony_ci		break;
106962306a36Sopenharmony_ci	case MEM_ONLINE:
107062306a36Sopenharmony_ci		if (vm->in_sbm)
107162306a36Sopenharmony_ci			virtio_mem_sbm_notify_online(vm, id, mhp->start_pfn);
107262306a36Sopenharmony_ci
107362306a36Sopenharmony_ci		atomic64_sub(size, &vm->offline_size);
107462306a36Sopenharmony_ci		/*
107562306a36Sopenharmony_ci		 * Start adding more memory once we onlined half of our
107662306a36Sopenharmony_ci		 * threshold. Don't trigger if it's possibly due to our actipn
107762306a36Sopenharmony_ci		 * (e.g., us adding memory which gets onlined immediately from
107862306a36Sopenharmony_ci		 * the core).
107962306a36Sopenharmony_ci		 */
108062306a36Sopenharmony_ci		if (!atomic_read(&vm->wq_active) &&
108162306a36Sopenharmony_ci		    virtio_mem_could_add_memory(vm, vm->offline_threshold / 2))
108262306a36Sopenharmony_ci			virtio_mem_retry(vm);
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci		vm->hotplug_active = false;
108562306a36Sopenharmony_ci		mutex_unlock(&vm->hotplug_mutex);
108662306a36Sopenharmony_ci		break;
108762306a36Sopenharmony_ci	case MEM_CANCEL_OFFLINE:
108862306a36Sopenharmony_ci		if (!vm->hotplug_active)
108962306a36Sopenharmony_ci			break;
109062306a36Sopenharmony_ci		if (vm->in_sbm)
109162306a36Sopenharmony_ci			virtio_mem_sbm_notify_cancel_offline(vm, id);
109262306a36Sopenharmony_ci		else
109362306a36Sopenharmony_ci			virtio_mem_bbm_notify_cancel_offline(vm, id,
109462306a36Sopenharmony_ci							     mhp->start_pfn,
109562306a36Sopenharmony_ci							     mhp->nr_pages);
109662306a36Sopenharmony_ci		vm->hotplug_active = false;
109762306a36Sopenharmony_ci		mutex_unlock(&vm->hotplug_mutex);
109862306a36Sopenharmony_ci		break;
109962306a36Sopenharmony_ci	case MEM_CANCEL_ONLINE:
110062306a36Sopenharmony_ci		if (!vm->hotplug_active)
110162306a36Sopenharmony_ci			break;
110262306a36Sopenharmony_ci		vm->hotplug_active = false;
110362306a36Sopenharmony_ci		mutex_unlock(&vm->hotplug_mutex);
110462306a36Sopenharmony_ci		break;
110562306a36Sopenharmony_ci	default:
110662306a36Sopenharmony_ci		break;
110762306a36Sopenharmony_ci	}
110862306a36Sopenharmony_ci
110962306a36Sopenharmony_ci	lockdep_on();
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ci	return rc;
111262306a36Sopenharmony_ci}
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci/*
111562306a36Sopenharmony_ci * Set a range of pages PG_offline. Remember pages that were never onlined
111662306a36Sopenharmony_ci * (via generic_online_page()) using PageDirty().
111762306a36Sopenharmony_ci */
111862306a36Sopenharmony_cistatic void virtio_mem_set_fake_offline(unsigned long pfn,
111962306a36Sopenharmony_ci					unsigned long nr_pages, bool onlined)
112062306a36Sopenharmony_ci{
112162306a36Sopenharmony_ci	page_offline_begin();
112262306a36Sopenharmony_ci	for (; nr_pages--; pfn++) {
112362306a36Sopenharmony_ci		struct page *page = pfn_to_page(pfn);
112462306a36Sopenharmony_ci
112562306a36Sopenharmony_ci		__SetPageOffline(page);
112662306a36Sopenharmony_ci		if (!onlined) {
112762306a36Sopenharmony_ci			SetPageDirty(page);
112862306a36Sopenharmony_ci			/* FIXME: remove after cleanups */
112962306a36Sopenharmony_ci			ClearPageReserved(page);
113062306a36Sopenharmony_ci		}
113162306a36Sopenharmony_ci	}
113262306a36Sopenharmony_ci	page_offline_end();
113362306a36Sopenharmony_ci}
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_ci/*
113662306a36Sopenharmony_ci * Clear PG_offline from a range of pages. If the pages were never onlined,
113762306a36Sopenharmony_ci * (via generic_online_page()), clear PageDirty().
113862306a36Sopenharmony_ci */
113962306a36Sopenharmony_cistatic void virtio_mem_clear_fake_offline(unsigned long pfn,
114062306a36Sopenharmony_ci					  unsigned long nr_pages, bool onlined)
114162306a36Sopenharmony_ci{
114262306a36Sopenharmony_ci	for (; nr_pages--; pfn++) {
114362306a36Sopenharmony_ci		struct page *page = pfn_to_page(pfn);
114462306a36Sopenharmony_ci
114562306a36Sopenharmony_ci		__ClearPageOffline(page);
114662306a36Sopenharmony_ci		if (!onlined)
114762306a36Sopenharmony_ci			ClearPageDirty(page);
114862306a36Sopenharmony_ci	}
114962306a36Sopenharmony_ci}
115062306a36Sopenharmony_ci
115162306a36Sopenharmony_ci/*
115262306a36Sopenharmony_ci * Release a range of fake-offline pages to the buddy, effectively
115362306a36Sopenharmony_ci * fake-onlining them.
115462306a36Sopenharmony_ci */
115562306a36Sopenharmony_cistatic void virtio_mem_fake_online(unsigned long pfn, unsigned long nr_pages)
115662306a36Sopenharmony_ci{
115762306a36Sopenharmony_ci	unsigned long order = MAX_ORDER;
115862306a36Sopenharmony_ci	unsigned long i;
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci	/*
116162306a36Sopenharmony_ci	 * We might get called for ranges that don't cover properly aligned
116262306a36Sopenharmony_ci	 * MAX_ORDER pages; however, we can only online properly aligned
116362306a36Sopenharmony_ci	 * pages with an order of MAX_ORDER at maximum.
116462306a36Sopenharmony_ci	 */
116562306a36Sopenharmony_ci	while (!IS_ALIGNED(pfn | nr_pages, 1 << order))
116662306a36Sopenharmony_ci		order--;
116762306a36Sopenharmony_ci
116862306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i += 1 << order) {
116962306a36Sopenharmony_ci		struct page *page = pfn_to_page(pfn + i);
117062306a36Sopenharmony_ci
117162306a36Sopenharmony_ci		/*
117262306a36Sopenharmony_ci		 * If the page is PageDirty(), it was kept fake-offline when
117362306a36Sopenharmony_ci		 * onlining the memory block. Otherwise, it was allocated
117462306a36Sopenharmony_ci		 * using alloc_contig_range(). All pages in a subblock are
117562306a36Sopenharmony_ci		 * alike.
117662306a36Sopenharmony_ci		 */
117762306a36Sopenharmony_ci		if (PageDirty(page)) {
117862306a36Sopenharmony_ci			virtio_mem_clear_fake_offline(pfn + i, 1 << order, false);
117962306a36Sopenharmony_ci			generic_online_page(page, order);
118062306a36Sopenharmony_ci		} else {
118162306a36Sopenharmony_ci			virtio_mem_clear_fake_offline(pfn + i, 1 << order, true);
118262306a36Sopenharmony_ci			free_contig_range(pfn + i, 1 << order);
118362306a36Sopenharmony_ci			adjust_managed_page_count(page, 1 << order);
118462306a36Sopenharmony_ci		}
118562306a36Sopenharmony_ci	}
118662306a36Sopenharmony_ci}
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_ci/*
118962306a36Sopenharmony_ci * Try to allocate a range, marking pages fake-offline, effectively
119062306a36Sopenharmony_ci * fake-offlining them.
119162306a36Sopenharmony_ci */
119262306a36Sopenharmony_cistatic int virtio_mem_fake_offline(struct virtio_mem *vm, unsigned long pfn,
119362306a36Sopenharmony_ci				   unsigned long nr_pages)
119462306a36Sopenharmony_ci{
119562306a36Sopenharmony_ci	const bool is_movable = is_zone_movable_page(pfn_to_page(pfn));
119662306a36Sopenharmony_ci	int rc, retry_count;
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_ci	/*
119962306a36Sopenharmony_ci	 * TODO: We want an alloc_contig_range() mode that tries to allocate
120062306a36Sopenharmony_ci	 * harder (e.g., dealing with temporarily pinned pages, PCP), especially
120162306a36Sopenharmony_ci	 * with ZONE_MOVABLE. So for now, retry a couple of times with
120262306a36Sopenharmony_ci	 * ZONE_MOVABLE before giving up - because that zone is supposed to give
120362306a36Sopenharmony_ci	 * some guarantees.
120462306a36Sopenharmony_ci	 */
120562306a36Sopenharmony_ci	for (retry_count = 0; retry_count < 5; retry_count++) {
120662306a36Sopenharmony_ci		/*
120762306a36Sopenharmony_ci		 * If the config changed, stop immediately and go back to the
120862306a36Sopenharmony_ci		 * main loop: avoid trying to keep unplugging if the device
120962306a36Sopenharmony_ci		 * might have decided to not remove any more memory.
121062306a36Sopenharmony_ci		 */
121162306a36Sopenharmony_ci		if (atomic_read(&vm->config_changed))
121262306a36Sopenharmony_ci			return -EAGAIN;
121362306a36Sopenharmony_ci
121462306a36Sopenharmony_ci		rc = alloc_contig_range(pfn, pfn + nr_pages, MIGRATE_MOVABLE,
121562306a36Sopenharmony_ci					GFP_KERNEL);
121662306a36Sopenharmony_ci		if (rc == -ENOMEM)
121762306a36Sopenharmony_ci			/* whoops, out of memory */
121862306a36Sopenharmony_ci			return rc;
121962306a36Sopenharmony_ci		else if (rc && !is_movable)
122062306a36Sopenharmony_ci			break;
122162306a36Sopenharmony_ci		else if (rc)
122262306a36Sopenharmony_ci			continue;
122362306a36Sopenharmony_ci
122462306a36Sopenharmony_ci		virtio_mem_set_fake_offline(pfn, nr_pages, true);
122562306a36Sopenharmony_ci		adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
122662306a36Sopenharmony_ci		return 0;
122762306a36Sopenharmony_ci	}
122862306a36Sopenharmony_ci
122962306a36Sopenharmony_ci	return -EBUSY;
123062306a36Sopenharmony_ci}
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci/*
123362306a36Sopenharmony_ci * Handle fake-offline pages when memory is going offline - such that the
123462306a36Sopenharmony_ci * pages can be skipped by mm-core when offlining.
123562306a36Sopenharmony_ci */
123662306a36Sopenharmony_cistatic void virtio_mem_fake_offline_going_offline(unsigned long pfn,
123762306a36Sopenharmony_ci						  unsigned long nr_pages)
123862306a36Sopenharmony_ci{
123962306a36Sopenharmony_ci	struct page *page;
124062306a36Sopenharmony_ci	unsigned long i;
124162306a36Sopenharmony_ci
124262306a36Sopenharmony_ci	/*
124362306a36Sopenharmony_ci	 * Drop our reference to the pages so the memory can get offlined
124462306a36Sopenharmony_ci	 * and add the unplugged pages to the managed page counters (so
124562306a36Sopenharmony_ci	 * offlining code can correctly subtract them again).
124662306a36Sopenharmony_ci	 */
124762306a36Sopenharmony_ci	adjust_managed_page_count(pfn_to_page(pfn), nr_pages);
124862306a36Sopenharmony_ci	/* Drop our reference to the pages so the memory can get offlined. */
124962306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++) {
125062306a36Sopenharmony_ci		page = pfn_to_page(pfn + i);
125162306a36Sopenharmony_ci		if (WARN_ON(!page_ref_dec_and_test(page)))
125262306a36Sopenharmony_ci			dump_page(page, "fake-offline page referenced");
125362306a36Sopenharmony_ci	}
125462306a36Sopenharmony_ci}
125562306a36Sopenharmony_ci
125662306a36Sopenharmony_ci/*
125762306a36Sopenharmony_ci * Handle fake-offline pages when memory offlining is canceled - to undo
125862306a36Sopenharmony_ci * what we did in virtio_mem_fake_offline_going_offline().
125962306a36Sopenharmony_ci */
126062306a36Sopenharmony_cistatic void virtio_mem_fake_offline_cancel_offline(unsigned long pfn,
126162306a36Sopenharmony_ci						   unsigned long nr_pages)
126262306a36Sopenharmony_ci{
126362306a36Sopenharmony_ci	unsigned long i;
126462306a36Sopenharmony_ci
126562306a36Sopenharmony_ci	/*
126662306a36Sopenharmony_ci	 * Get the reference we dropped when going offline and subtract the
126762306a36Sopenharmony_ci	 * unplugged pages from the managed page counters.
126862306a36Sopenharmony_ci	 */
126962306a36Sopenharmony_ci	adjust_managed_page_count(pfn_to_page(pfn), -nr_pages);
127062306a36Sopenharmony_ci	for (i = 0; i < nr_pages; i++)
127162306a36Sopenharmony_ci		page_ref_inc(pfn_to_page(pfn + i));
127262306a36Sopenharmony_ci}
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_cistatic void virtio_mem_online_page(struct virtio_mem *vm,
127562306a36Sopenharmony_ci				   struct page *page, unsigned int order)
127662306a36Sopenharmony_ci{
127762306a36Sopenharmony_ci	const unsigned long start = page_to_phys(page);
127862306a36Sopenharmony_ci	const unsigned long end = start + PFN_PHYS(1 << order);
127962306a36Sopenharmony_ci	unsigned long addr, next, id, sb_id, count;
128062306a36Sopenharmony_ci	bool do_online;
128162306a36Sopenharmony_ci
128262306a36Sopenharmony_ci	/*
128362306a36Sopenharmony_ci	 * We can get called with any order up to MAX_ORDER. If our subblock
128462306a36Sopenharmony_ci	 * size is smaller than that and we have a mixture of plugged and
128562306a36Sopenharmony_ci	 * unplugged subblocks within such a page, we have to process in
128662306a36Sopenharmony_ci	 * smaller granularity. In that case we'll adjust the order exactly once
128762306a36Sopenharmony_ci	 * within the loop.
128862306a36Sopenharmony_ci	 */
128962306a36Sopenharmony_ci	for (addr = start; addr < end; ) {
129062306a36Sopenharmony_ci		next = addr + PFN_PHYS(1 << order);
129162306a36Sopenharmony_ci
129262306a36Sopenharmony_ci		if (vm->in_sbm) {
129362306a36Sopenharmony_ci			id = virtio_mem_phys_to_mb_id(addr);
129462306a36Sopenharmony_ci			sb_id = virtio_mem_phys_to_sb_id(vm, addr);
129562306a36Sopenharmony_ci			count = virtio_mem_phys_to_sb_id(vm, next - 1) - sb_id + 1;
129662306a36Sopenharmony_ci
129762306a36Sopenharmony_ci			if (virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, count)) {
129862306a36Sopenharmony_ci				/* Fully plugged. */
129962306a36Sopenharmony_ci				do_online = true;
130062306a36Sopenharmony_ci			} else if (count == 1 ||
130162306a36Sopenharmony_ci				   virtio_mem_sbm_test_sb_unplugged(vm, id, sb_id, count)) {
130262306a36Sopenharmony_ci				/* Fully unplugged. */
130362306a36Sopenharmony_ci				do_online = false;
130462306a36Sopenharmony_ci			} else {
130562306a36Sopenharmony_ci				/*
130662306a36Sopenharmony_ci				 * Mixture, process sub-blocks instead. This
130762306a36Sopenharmony_ci				 * will be at least the size of a pageblock.
130862306a36Sopenharmony_ci				 * We'll run into this case exactly once.
130962306a36Sopenharmony_ci				 */
131062306a36Sopenharmony_ci				order = ilog2(vm->sbm.sb_size) - PAGE_SHIFT;
131162306a36Sopenharmony_ci				do_online = virtio_mem_sbm_test_sb_plugged(vm, id, sb_id, 1);
131262306a36Sopenharmony_ci				continue;
131362306a36Sopenharmony_ci			}
131462306a36Sopenharmony_ci		} else {
131562306a36Sopenharmony_ci			/*
131662306a36Sopenharmony_ci			 * If the whole block is marked fake offline, keep
131762306a36Sopenharmony_ci			 * everything that way.
131862306a36Sopenharmony_ci			 */
131962306a36Sopenharmony_ci			id = virtio_mem_phys_to_bb_id(vm, addr);
132062306a36Sopenharmony_ci			do_online = virtio_mem_bbm_get_bb_state(vm, id) !=
132162306a36Sopenharmony_ci				    VIRTIO_MEM_BBM_BB_FAKE_OFFLINE;
132262306a36Sopenharmony_ci		}
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_ci		if (do_online)
132562306a36Sopenharmony_ci			generic_online_page(pfn_to_page(PFN_DOWN(addr)), order);
132662306a36Sopenharmony_ci		else
132762306a36Sopenharmony_ci			virtio_mem_set_fake_offline(PFN_DOWN(addr), 1 << order,
132862306a36Sopenharmony_ci						    false);
132962306a36Sopenharmony_ci		addr = next;
133062306a36Sopenharmony_ci	}
133162306a36Sopenharmony_ci}
133262306a36Sopenharmony_ci
133362306a36Sopenharmony_cistatic void virtio_mem_online_page_cb(struct page *page, unsigned int order)
133462306a36Sopenharmony_ci{
133562306a36Sopenharmony_ci	const unsigned long addr = page_to_phys(page);
133662306a36Sopenharmony_ci	struct virtio_mem *vm;
133762306a36Sopenharmony_ci
133862306a36Sopenharmony_ci	rcu_read_lock();
133962306a36Sopenharmony_ci	list_for_each_entry_rcu(vm, &virtio_mem_devices, next) {
134062306a36Sopenharmony_ci		/*
134162306a36Sopenharmony_ci		 * Pages we're onlining will never cross memory blocks and,
134262306a36Sopenharmony_ci		 * therefore, not virtio-mem devices.
134362306a36Sopenharmony_ci		 */
134462306a36Sopenharmony_ci		if (!virtio_mem_contains_range(vm, addr, PFN_PHYS(1 << order)))
134562306a36Sopenharmony_ci			continue;
134662306a36Sopenharmony_ci
134762306a36Sopenharmony_ci		/*
134862306a36Sopenharmony_ci		 * virtio_mem_set_fake_offline() might sleep. We can safely
134962306a36Sopenharmony_ci		 * drop the RCU lock at this point because the device
135062306a36Sopenharmony_ci		 * cannot go away. See virtio_mem_remove() how races
135162306a36Sopenharmony_ci		 * between memory onlining and device removal are handled.
135262306a36Sopenharmony_ci		 */
135362306a36Sopenharmony_ci		rcu_read_unlock();
135462306a36Sopenharmony_ci
135562306a36Sopenharmony_ci		virtio_mem_online_page(vm, page, order);
135662306a36Sopenharmony_ci		return;
135762306a36Sopenharmony_ci	}
135862306a36Sopenharmony_ci	rcu_read_unlock();
135962306a36Sopenharmony_ci
136062306a36Sopenharmony_ci	/* not virtio-mem memory, but e.g., a DIMM. online it */
136162306a36Sopenharmony_ci	generic_online_page(page, order);
136262306a36Sopenharmony_ci}
136362306a36Sopenharmony_ci
136462306a36Sopenharmony_cistatic uint64_t virtio_mem_send_request(struct virtio_mem *vm,
136562306a36Sopenharmony_ci					const struct virtio_mem_req *req)
136662306a36Sopenharmony_ci{
136762306a36Sopenharmony_ci	struct scatterlist *sgs[2], sg_req, sg_resp;
136862306a36Sopenharmony_ci	unsigned int len;
136962306a36Sopenharmony_ci	int rc;
137062306a36Sopenharmony_ci
137162306a36Sopenharmony_ci	/* don't use the request residing on the stack (vaddr) */
137262306a36Sopenharmony_ci	vm->req = *req;
137362306a36Sopenharmony_ci
137462306a36Sopenharmony_ci	/* out: buffer for request */
137562306a36Sopenharmony_ci	sg_init_one(&sg_req, &vm->req, sizeof(vm->req));
137662306a36Sopenharmony_ci	sgs[0] = &sg_req;
137762306a36Sopenharmony_ci
137862306a36Sopenharmony_ci	/* in: buffer for response */
137962306a36Sopenharmony_ci	sg_init_one(&sg_resp, &vm->resp, sizeof(vm->resp));
138062306a36Sopenharmony_ci	sgs[1] = &sg_resp;
138162306a36Sopenharmony_ci
138262306a36Sopenharmony_ci	rc = virtqueue_add_sgs(vm->vq, sgs, 1, 1, vm, GFP_KERNEL);
138362306a36Sopenharmony_ci	if (rc < 0)
138462306a36Sopenharmony_ci		return rc;
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_ci	virtqueue_kick(vm->vq);
138762306a36Sopenharmony_ci
138862306a36Sopenharmony_ci	/* wait for a response */
138962306a36Sopenharmony_ci	wait_event(vm->host_resp, virtqueue_get_buf(vm->vq, &len));
139062306a36Sopenharmony_ci
139162306a36Sopenharmony_ci	return virtio16_to_cpu(vm->vdev, vm->resp.type);
139262306a36Sopenharmony_ci}
139362306a36Sopenharmony_ci
139462306a36Sopenharmony_cistatic int virtio_mem_send_plug_request(struct virtio_mem *vm, uint64_t addr,
139562306a36Sopenharmony_ci					uint64_t size)
139662306a36Sopenharmony_ci{
139762306a36Sopenharmony_ci	const uint64_t nb_vm_blocks = size / vm->device_block_size;
139862306a36Sopenharmony_ci	const struct virtio_mem_req req = {
139962306a36Sopenharmony_ci		.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_PLUG),
140062306a36Sopenharmony_ci		.u.plug.addr = cpu_to_virtio64(vm->vdev, addr),
140162306a36Sopenharmony_ci		.u.plug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
140262306a36Sopenharmony_ci	};
140362306a36Sopenharmony_ci	int rc = -ENOMEM;
140462306a36Sopenharmony_ci
140562306a36Sopenharmony_ci	if (atomic_read(&vm->config_changed))
140662306a36Sopenharmony_ci		return -EAGAIN;
140762306a36Sopenharmony_ci
140862306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "plugging memory: 0x%llx - 0x%llx\n", addr,
140962306a36Sopenharmony_ci		addr + size - 1);
141062306a36Sopenharmony_ci
141162306a36Sopenharmony_ci	switch (virtio_mem_send_request(vm, &req)) {
141262306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_ACK:
141362306a36Sopenharmony_ci		vm->plugged_size += size;
141462306a36Sopenharmony_ci		return 0;
141562306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_NACK:
141662306a36Sopenharmony_ci		rc = -EAGAIN;
141762306a36Sopenharmony_ci		break;
141862306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_BUSY:
141962306a36Sopenharmony_ci		rc = -ETXTBSY;
142062306a36Sopenharmony_ci		break;
142162306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_ERROR:
142262306a36Sopenharmony_ci		rc = -EINVAL;
142362306a36Sopenharmony_ci		break;
142462306a36Sopenharmony_ci	default:
142562306a36Sopenharmony_ci		break;
142662306a36Sopenharmony_ci	}
142762306a36Sopenharmony_ci
142862306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "plugging memory failed: %d\n", rc);
142962306a36Sopenharmony_ci	return rc;
143062306a36Sopenharmony_ci}
143162306a36Sopenharmony_ci
143262306a36Sopenharmony_cistatic int virtio_mem_send_unplug_request(struct virtio_mem *vm, uint64_t addr,
143362306a36Sopenharmony_ci					  uint64_t size)
143462306a36Sopenharmony_ci{
143562306a36Sopenharmony_ci	const uint64_t nb_vm_blocks = size / vm->device_block_size;
143662306a36Sopenharmony_ci	const struct virtio_mem_req req = {
143762306a36Sopenharmony_ci		.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG),
143862306a36Sopenharmony_ci		.u.unplug.addr = cpu_to_virtio64(vm->vdev, addr),
143962306a36Sopenharmony_ci		.u.unplug.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
144062306a36Sopenharmony_ci	};
144162306a36Sopenharmony_ci	int rc = -ENOMEM;
144262306a36Sopenharmony_ci
144362306a36Sopenharmony_ci	if (atomic_read(&vm->config_changed))
144462306a36Sopenharmony_ci		return -EAGAIN;
144562306a36Sopenharmony_ci
144662306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "unplugging memory: 0x%llx - 0x%llx\n", addr,
144762306a36Sopenharmony_ci		addr + size - 1);
144862306a36Sopenharmony_ci
144962306a36Sopenharmony_ci	switch (virtio_mem_send_request(vm, &req)) {
145062306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_ACK:
145162306a36Sopenharmony_ci		vm->plugged_size -= size;
145262306a36Sopenharmony_ci		return 0;
145362306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_BUSY:
145462306a36Sopenharmony_ci		rc = -ETXTBSY;
145562306a36Sopenharmony_ci		break;
145662306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_ERROR:
145762306a36Sopenharmony_ci		rc = -EINVAL;
145862306a36Sopenharmony_ci		break;
145962306a36Sopenharmony_ci	default:
146062306a36Sopenharmony_ci		break;
146162306a36Sopenharmony_ci	}
146262306a36Sopenharmony_ci
146362306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "unplugging memory failed: %d\n", rc);
146462306a36Sopenharmony_ci	return rc;
146562306a36Sopenharmony_ci}
146662306a36Sopenharmony_ci
146762306a36Sopenharmony_cistatic int virtio_mem_send_unplug_all_request(struct virtio_mem *vm)
146862306a36Sopenharmony_ci{
146962306a36Sopenharmony_ci	const struct virtio_mem_req req = {
147062306a36Sopenharmony_ci		.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_UNPLUG_ALL),
147162306a36Sopenharmony_ci	};
147262306a36Sopenharmony_ci	int rc = -ENOMEM;
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "unplugging all memory");
147562306a36Sopenharmony_ci
147662306a36Sopenharmony_ci	switch (virtio_mem_send_request(vm, &req)) {
147762306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_ACK:
147862306a36Sopenharmony_ci		vm->unplug_all_required = false;
147962306a36Sopenharmony_ci		vm->plugged_size = 0;
148062306a36Sopenharmony_ci		/* usable region might have shrunk */
148162306a36Sopenharmony_ci		atomic_set(&vm->config_changed, 1);
148262306a36Sopenharmony_ci		return 0;
148362306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_BUSY:
148462306a36Sopenharmony_ci		rc = -ETXTBSY;
148562306a36Sopenharmony_ci		break;
148662306a36Sopenharmony_ci	default:
148762306a36Sopenharmony_ci		break;
148862306a36Sopenharmony_ci	}
148962306a36Sopenharmony_ci
149062306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "unplugging all memory failed: %d\n", rc);
149162306a36Sopenharmony_ci	return rc;
149262306a36Sopenharmony_ci}
149362306a36Sopenharmony_ci
149462306a36Sopenharmony_ci/*
149562306a36Sopenharmony_ci * Plug selected subblocks. Updates the plugged state, but not the state
149662306a36Sopenharmony_ci * of the memory block.
149762306a36Sopenharmony_ci */
149862306a36Sopenharmony_cistatic int virtio_mem_sbm_plug_sb(struct virtio_mem *vm, unsigned long mb_id,
149962306a36Sopenharmony_ci				  int sb_id, int count)
150062306a36Sopenharmony_ci{
150162306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
150262306a36Sopenharmony_ci			      sb_id * vm->sbm.sb_size;
150362306a36Sopenharmony_ci	const uint64_t size = count * vm->sbm.sb_size;
150462306a36Sopenharmony_ci	int rc;
150562306a36Sopenharmony_ci
150662306a36Sopenharmony_ci	rc = virtio_mem_send_plug_request(vm, addr, size);
150762306a36Sopenharmony_ci	if (!rc)
150862306a36Sopenharmony_ci		virtio_mem_sbm_set_sb_plugged(vm, mb_id, sb_id, count);
150962306a36Sopenharmony_ci	return rc;
151062306a36Sopenharmony_ci}
151162306a36Sopenharmony_ci
151262306a36Sopenharmony_ci/*
151362306a36Sopenharmony_ci * Unplug selected subblocks. Updates the plugged state, but not the state
151462306a36Sopenharmony_ci * of the memory block.
151562306a36Sopenharmony_ci */
151662306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_sb(struct virtio_mem *vm, unsigned long mb_id,
151762306a36Sopenharmony_ci				    int sb_id, int count)
151862306a36Sopenharmony_ci{
151962306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_mb_id_to_phys(mb_id) +
152062306a36Sopenharmony_ci			      sb_id * vm->sbm.sb_size;
152162306a36Sopenharmony_ci	const uint64_t size = count * vm->sbm.sb_size;
152262306a36Sopenharmony_ci	int rc;
152362306a36Sopenharmony_ci
152462306a36Sopenharmony_ci	rc = virtio_mem_send_unplug_request(vm, addr, size);
152562306a36Sopenharmony_ci	if (!rc)
152662306a36Sopenharmony_ci		virtio_mem_sbm_set_sb_unplugged(vm, mb_id, sb_id, count);
152762306a36Sopenharmony_ci	return rc;
152862306a36Sopenharmony_ci}
152962306a36Sopenharmony_ci
153062306a36Sopenharmony_ci/*
153162306a36Sopenharmony_ci * Request to unplug a big block.
153262306a36Sopenharmony_ci *
153362306a36Sopenharmony_ci * Will not modify the state of the big block.
153462306a36Sopenharmony_ci */
153562306a36Sopenharmony_cistatic int virtio_mem_bbm_unplug_bb(struct virtio_mem *vm, unsigned long bb_id)
153662306a36Sopenharmony_ci{
153762306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
153862306a36Sopenharmony_ci	const uint64_t size = vm->bbm.bb_size;
153962306a36Sopenharmony_ci
154062306a36Sopenharmony_ci	return virtio_mem_send_unplug_request(vm, addr, size);
154162306a36Sopenharmony_ci}
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci/*
154462306a36Sopenharmony_ci * Request to plug a big block.
154562306a36Sopenharmony_ci *
154662306a36Sopenharmony_ci * Will not modify the state of the big block.
154762306a36Sopenharmony_ci */
154862306a36Sopenharmony_cistatic int virtio_mem_bbm_plug_bb(struct virtio_mem *vm, unsigned long bb_id)
154962306a36Sopenharmony_ci{
155062306a36Sopenharmony_ci	const uint64_t addr = virtio_mem_bb_id_to_phys(vm, bb_id);
155162306a36Sopenharmony_ci	const uint64_t size = vm->bbm.bb_size;
155262306a36Sopenharmony_ci
155362306a36Sopenharmony_ci	return virtio_mem_send_plug_request(vm, addr, size);
155462306a36Sopenharmony_ci}
155562306a36Sopenharmony_ci
155662306a36Sopenharmony_ci/*
155762306a36Sopenharmony_ci * Unplug the desired number of plugged subblocks of a offline or not-added
155862306a36Sopenharmony_ci * memory block. Will fail if any subblock cannot get unplugged (instead of
155962306a36Sopenharmony_ci * skipping it).
156062306a36Sopenharmony_ci *
156162306a36Sopenharmony_ci * Will not modify the state of the memory block.
156262306a36Sopenharmony_ci *
156362306a36Sopenharmony_ci * Note: can fail after some subblocks were unplugged.
156462306a36Sopenharmony_ci */
156562306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_any_sb_raw(struct virtio_mem *vm,
156662306a36Sopenharmony_ci					    unsigned long mb_id, uint64_t *nb_sb)
156762306a36Sopenharmony_ci{
156862306a36Sopenharmony_ci	int sb_id, count;
156962306a36Sopenharmony_ci	int rc;
157062306a36Sopenharmony_ci
157162306a36Sopenharmony_ci	sb_id = vm->sbm.sbs_per_mb - 1;
157262306a36Sopenharmony_ci	while (*nb_sb) {
157362306a36Sopenharmony_ci		/* Find the next candidate subblock */
157462306a36Sopenharmony_ci		while (sb_id >= 0 &&
157562306a36Sopenharmony_ci		       virtio_mem_sbm_test_sb_unplugged(vm, mb_id, sb_id, 1))
157662306a36Sopenharmony_ci			sb_id--;
157762306a36Sopenharmony_ci		if (sb_id < 0)
157862306a36Sopenharmony_ci			break;
157962306a36Sopenharmony_ci		/* Try to unplug multiple subblocks at a time */
158062306a36Sopenharmony_ci		count = 1;
158162306a36Sopenharmony_ci		while (count < *nb_sb && sb_id > 0 &&
158262306a36Sopenharmony_ci		       virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id - 1, 1)) {
158362306a36Sopenharmony_ci			count++;
158462306a36Sopenharmony_ci			sb_id--;
158562306a36Sopenharmony_ci		}
158662306a36Sopenharmony_ci
158762306a36Sopenharmony_ci		rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count);
158862306a36Sopenharmony_ci		if (rc)
158962306a36Sopenharmony_ci			return rc;
159062306a36Sopenharmony_ci		*nb_sb -= count;
159162306a36Sopenharmony_ci		sb_id--;
159262306a36Sopenharmony_ci	}
159362306a36Sopenharmony_ci
159462306a36Sopenharmony_ci	return 0;
159562306a36Sopenharmony_ci}
159662306a36Sopenharmony_ci
159762306a36Sopenharmony_ci/*
159862306a36Sopenharmony_ci * Unplug all plugged subblocks of an offline or not-added memory block.
159962306a36Sopenharmony_ci *
160062306a36Sopenharmony_ci * Will not modify the state of the memory block.
160162306a36Sopenharmony_ci *
160262306a36Sopenharmony_ci * Note: can fail after some subblocks were unplugged.
160362306a36Sopenharmony_ci */
160462306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_mb(struct virtio_mem *vm, unsigned long mb_id)
160562306a36Sopenharmony_ci{
160662306a36Sopenharmony_ci	uint64_t nb_sb = vm->sbm.sbs_per_mb;
160762306a36Sopenharmony_ci
160862306a36Sopenharmony_ci	return virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, &nb_sb);
160962306a36Sopenharmony_ci}
161062306a36Sopenharmony_ci
161162306a36Sopenharmony_ci/*
161262306a36Sopenharmony_ci * Prepare tracking data for the next memory block.
161362306a36Sopenharmony_ci */
161462306a36Sopenharmony_cistatic int virtio_mem_sbm_prepare_next_mb(struct virtio_mem *vm,
161562306a36Sopenharmony_ci					  unsigned long *mb_id)
161662306a36Sopenharmony_ci{
161762306a36Sopenharmony_ci	int rc;
161862306a36Sopenharmony_ci
161962306a36Sopenharmony_ci	if (vm->sbm.next_mb_id > vm->sbm.last_usable_mb_id)
162062306a36Sopenharmony_ci		return -ENOSPC;
162162306a36Sopenharmony_ci
162262306a36Sopenharmony_ci	/* Resize the state array if required. */
162362306a36Sopenharmony_ci	rc = virtio_mem_sbm_mb_states_prepare_next_mb(vm);
162462306a36Sopenharmony_ci	if (rc)
162562306a36Sopenharmony_ci		return rc;
162662306a36Sopenharmony_ci
162762306a36Sopenharmony_ci	/* Resize the subblock bitmap if required. */
162862306a36Sopenharmony_ci	rc = virtio_mem_sbm_sb_states_prepare_next_mb(vm);
162962306a36Sopenharmony_ci	if (rc)
163062306a36Sopenharmony_ci		return rc;
163162306a36Sopenharmony_ci
163262306a36Sopenharmony_ci	vm->sbm.mb_count[VIRTIO_MEM_SBM_MB_UNUSED]++;
163362306a36Sopenharmony_ci	*mb_id = vm->sbm.next_mb_id++;
163462306a36Sopenharmony_ci	return 0;
163562306a36Sopenharmony_ci}
163662306a36Sopenharmony_ci
163762306a36Sopenharmony_ci/*
163862306a36Sopenharmony_ci * Try to plug the desired number of subblocks and add the memory block
163962306a36Sopenharmony_ci * to Linux.
164062306a36Sopenharmony_ci *
164162306a36Sopenharmony_ci * Will modify the state of the memory block.
164262306a36Sopenharmony_ci */
164362306a36Sopenharmony_cistatic int virtio_mem_sbm_plug_and_add_mb(struct virtio_mem *vm,
164462306a36Sopenharmony_ci					  unsigned long mb_id, uint64_t *nb_sb)
164562306a36Sopenharmony_ci{
164662306a36Sopenharmony_ci	const int count = min_t(int, *nb_sb, vm->sbm.sbs_per_mb);
164762306a36Sopenharmony_ci	int rc;
164862306a36Sopenharmony_ci
164962306a36Sopenharmony_ci	if (WARN_ON_ONCE(!count))
165062306a36Sopenharmony_ci		return -EINVAL;
165162306a36Sopenharmony_ci
165262306a36Sopenharmony_ci	/*
165362306a36Sopenharmony_ci	 * Plug the requested number of subblocks before adding it to linux,
165462306a36Sopenharmony_ci	 * so that onlining will directly online all plugged subblocks.
165562306a36Sopenharmony_ci	 */
165662306a36Sopenharmony_ci	rc = virtio_mem_sbm_plug_sb(vm, mb_id, 0, count);
165762306a36Sopenharmony_ci	if (rc)
165862306a36Sopenharmony_ci		return rc;
165962306a36Sopenharmony_ci
166062306a36Sopenharmony_ci	/*
166162306a36Sopenharmony_ci	 * Mark the block properly offline before adding it to Linux,
166262306a36Sopenharmony_ci	 * so the memory notifiers will find the block in the right state.
166362306a36Sopenharmony_ci	 */
166462306a36Sopenharmony_ci	if (count == vm->sbm.sbs_per_mb)
166562306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
166662306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_OFFLINE);
166762306a36Sopenharmony_ci	else
166862306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
166962306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
167062306a36Sopenharmony_ci
167162306a36Sopenharmony_ci	/* Add the memory block to linux - if that fails, try to unplug. */
167262306a36Sopenharmony_ci	rc = virtio_mem_sbm_add_mb(vm, mb_id);
167362306a36Sopenharmony_ci	if (rc) {
167462306a36Sopenharmony_ci		int new_state = VIRTIO_MEM_SBM_MB_UNUSED;
167562306a36Sopenharmony_ci
167662306a36Sopenharmony_ci		if (virtio_mem_sbm_unplug_sb(vm, mb_id, 0, count))
167762306a36Sopenharmony_ci			new_state = VIRTIO_MEM_SBM_MB_PLUGGED;
167862306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id, new_state);
167962306a36Sopenharmony_ci		return rc;
168062306a36Sopenharmony_ci	}
168162306a36Sopenharmony_ci
168262306a36Sopenharmony_ci	*nb_sb -= count;
168362306a36Sopenharmony_ci	return 0;
168462306a36Sopenharmony_ci}
168562306a36Sopenharmony_ci
168662306a36Sopenharmony_ci/*
168762306a36Sopenharmony_ci * Try to plug the desired number of subblocks of a memory block that
168862306a36Sopenharmony_ci * is already added to Linux.
168962306a36Sopenharmony_ci *
169062306a36Sopenharmony_ci * Will modify the state of the memory block.
169162306a36Sopenharmony_ci *
169262306a36Sopenharmony_ci * Note: Can fail after some subblocks were successfully plugged.
169362306a36Sopenharmony_ci */
169462306a36Sopenharmony_cistatic int virtio_mem_sbm_plug_any_sb(struct virtio_mem *vm,
169562306a36Sopenharmony_ci				      unsigned long mb_id, uint64_t *nb_sb)
169662306a36Sopenharmony_ci{
169762306a36Sopenharmony_ci	const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
169862306a36Sopenharmony_ci	unsigned long pfn, nr_pages;
169962306a36Sopenharmony_ci	int sb_id, count;
170062306a36Sopenharmony_ci	int rc;
170162306a36Sopenharmony_ci
170262306a36Sopenharmony_ci	if (WARN_ON_ONCE(!*nb_sb))
170362306a36Sopenharmony_ci		return -EINVAL;
170462306a36Sopenharmony_ci
170562306a36Sopenharmony_ci	while (*nb_sb) {
170662306a36Sopenharmony_ci		sb_id = virtio_mem_sbm_first_unplugged_sb(vm, mb_id);
170762306a36Sopenharmony_ci		if (sb_id >= vm->sbm.sbs_per_mb)
170862306a36Sopenharmony_ci			break;
170962306a36Sopenharmony_ci		count = 1;
171062306a36Sopenharmony_ci		while (count < *nb_sb &&
171162306a36Sopenharmony_ci		       sb_id + count < vm->sbm.sbs_per_mb &&
171262306a36Sopenharmony_ci		       !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id + count, 1))
171362306a36Sopenharmony_ci			count++;
171462306a36Sopenharmony_ci
171562306a36Sopenharmony_ci		rc = virtio_mem_sbm_plug_sb(vm, mb_id, sb_id, count);
171662306a36Sopenharmony_ci		if (rc)
171762306a36Sopenharmony_ci			return rc;
171862306a36Sopenharmony_ci		*nb_sb -= count;
171962306a36Sopenharmony_ci		if (old_state == VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
172062306a36Sopenharmony_ci			continue;
172162306a36Sopenharmony_ci
172262306a36Sopenharmony_ci		/* fake-online the pages if the memory block is online */
172362306a36Sopenharmony_ci		pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
172462306a36Sopenharmony_ci			       sb_id * vm->sbm.sb_size);
172562306a36Sopenharmony_ci		nr_pages = PFN_DOWN(count * vm->sbm.sb_size);
172662306a36Sopenharmony_ci		virtio_mem_fake_online(pfn, nr_pages);
172762306a36Sopenharmony_ci	}
172862306a36Sopenharmony_ci
172962306a36Sopenharmony_ci	if (virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
173062306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id, old_state - 1);
173162306a36Sopenharmony_ci
173262306a36Sopenharmony_ci	return 0;
173362306a36Sopenharmony_ci}
173462306a36Sopenharmony_ci
173562306a36Sopenharmony_cistatic int virtio_mem_sbm_plug_request(struct virtio_mem *vm, uint64_t diff)
173662306a36Sopenharmony_ci{
173762306a36Sopenharmony_ci	const int mb_states[] = {
173862306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
173962306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
174062306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
174162306a36Sopenharmony_ci	};
174262306a36Sopenharmony_ci	uint64_t nb_sb = diff / vm->sbm.sb_size;
174362306a36Sopenharmony_ci	unsigned long mb_id;
174462306a36Sopenharmony_ci	int rc, i;
174562306a36Sopenharmony_ci
174662306a36Sopenharmony_ci	if (!nb_sb)
174762306a36Sopenharmony_ci		return 0;
174862306a36Sopenharmony_ci
174962306a36Sopenharmony_ci	/* Don't race with onlining/offlining */
175062306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
175162306a36Sopenharmony_ci
175262306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(mb_states); i++) {
175362306a36Sopenharmony_ci		virtio_mem_sbm_for_each_mb(vm, mb_id, mb_states[i]) {
175462306a36Sopenharmony_ci			rc = virtio_mem_sbm_plug_any_sb(vm, mb_id, &nb_sb);
175562306a36Sopenharmony_ci			if (rc || !nb_sb)
175662306a36Sopenharmony_ci				goto out_unlock;
175762306a36Sopenharmony_ci			cond_resched();
175862306a36Sopenharmony_ci		}
175962306a36Sopenharmony_ci	}
176062306a36Sopenharmony_ci
176162306a36Sopenharmony_ci	/*
176262306a36Sopenharmony_ci	 * We won't be working on online/offline memory blocks from this point,
176362306a36Sopenharmony_ci	 * so we can't race with memory onlining/offlining. Drop the mutex.
176462306a36Sopenharmony_ci	 */
176562306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
176662306a36Sopenharmony_ci
176762306a36Sopenharmony_ci	/* Try to plug and add unused blocks */
176862306a36Sopenharmony_ci	virtio_mem_sbm_for_each_mb(vm, mb_id, VIRTIO_MEM_SBM_MB_UNUSED) {
176962306a36Sopenharmony_ci		if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
177062306a36Sopenharmony_ci			return -ENOSPC;
177162306a36Sopenharmony_ci
177262306a36Sopenharmony_ci		rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
177362306a36Sopenharmony_ci		if (rc || !nb_sb)
177462306a36Sopenharmony_ci			return rc;
177562306a36Sopenharmony_ci		cond_resched();
177662306a36Sopenharmony_ci	}
177762306a36Sopenharmony_ci
177862306a36Sopenharmony_ci	/* Try to prepare, plug and add new blocks */
177962306a36Sopenharmony_ci	while (nb_sb) {
178062306a36Sopenharmony_ci		if (!virtio_mem_could_add_memory(vm, memory_block_size_bytes()))
178162306a36Sopenharmony_ci			return -ENOSPC;
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_ci		rc = virtio_mem_sbm_prepare_next_mb(vm, &mb_id);
178462306a36Sopenharmony_ci		if (rc)
178562306a36Sopenharmony_ci			return rc;
178662306a36Sopenharmony_ci		rc = virtio_mem_sbm_plug_and_add_mb(vm, mb_id, &nb_sb);
178762306a36Sopenharmony_ci		if (rc)
178862306a36Sopenharmony_ci			return rc;
178962306a36Sopenharmony_ci		cond_resched();
179062306a36Sopenharmony_ci	}
179162306a36Sopenharmony_ci
179262306a36Sopenharmony_ci	return 0;
179362306a36Sopenharmony_ciout_unlock:
179462306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
179562306a36Sopenharmony_ci	return rc;
179662306a36Sopenharmony_ci}
179762306a36Sopenharmony_ci
179862306a36Sopenharmony_ci/*
179962306a36Sopenharmony_ci * Plug a big block and add it to Linux.
180062306a36Sopenharmony_ci *
180162306a36Sopenharmony_ci * Will modify the state of the big block.
180262306a36Sopenharmony_ci */
180362306a36Sopenharmony_cistatic int virtio_mem_bbm_plug_and_add_bb(struct virtio_mem *vm,
180462306a36Sopenharmony_ci					  unsigned long bb_id)
180562306a36Sopenharmony_ci{
180662306a36Sopenharmony_ci	int rc;
180762306a36Sopenharmony_ci
180862306a36Sopenharmony_ci	if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
180962306a36Sopenharmony_ci			 VIRTIO_MEM_BBM_BB_UNUSED))
181062306a36Sopenharmony_ci		return -EINVAL;
181162306a36Sopenharmony_ci
181262306a36Sopenharmony_ci	rc = virtio_mem_bbm_plug_bb(vm, bb_id);
181362306a36Sopenharmony_ci	if (rc)
181462306a36Sopenharmony_ci		return rc;
181562306a36Sopenharmony_ci	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
181662306a36Sopenharmony_ci
181762306a36Sopenharmony_ci	rc = virtio_mem_bbm_add_bb(vm, bb_id);
181862306a36Sopenharmony_ci	if (rc) {
181962306a36Sopenharmony_ci		if (!virtio_mem_bbm_unplug_bb(vm, bb_id))
182062306a36Sopenharmony_ci			virtio_mem_bbm_set_bb_state(vm, bb_id,
182162306a36Sopenharmony_ci						    VIRTIO_MEM_BBM_BB_UNUSED);
182262306a36Sopenharmony_ci		else
182362306a36Sopenharmony_ci			/* Retry from the main loop. */
182462306a36Sopenharmony_ci			virtio_mem_bbm_set_bb_state(vm, bb_id,
182562306a36Sopenharmony_ci						    VIRTIO_MEM_BBM_BB_PLUGGED);
182662306a36Sopenharmony_ci		return rc;
182762306a36Sopenharmony_ci	}
182862306a36Sopenharmony_ci	return 0;
182962306a36Sopenharmony_ci}
183062306a36Sopenharmony_ci
183162306a36Sopenharmony_ci/*
183262306a36Sopenharmony_ci * Prepare tracking data for the next big block.
183362306a36Sopenharmony_ci */
183462306a36Sopenharmony_cistatic int virtio_mem_bbm_prepare_next_bb(struct virtio_mem *vm,
183562306a36Sopenharmony_ci					  unsigned long *bb_id)
183662306a36Sopenharmony_ci{
183762306a36Sopenharmony_ci	int rc;
183862306a36Sopenharmony_ci
183962306a36Sopenharmony_ci	if (vm->bbm.next_bb_id > vm->bbm.last_usable_bb_id)
184062306a36Sopenharmony_ci		return -ENOSPC;
184162306a36Sopenharmony_ci
184262306a36Sopenharmony_ci	/* Resize the big block state array if required. */
184362306a36Sopenharmony_ci	rc = virtio_mem_bbm_bb_states_prepare_next_bb(vm);
184462306a36Sopenharmony_ci	if (rc)
184562306a36Sopenharmony_ci		return rc;
184662306a36Sopenharmony_ci
184762306a36Sopenharmony_ci	vm->bbm.bb_count[VIRTIO_MEM_BBM_BB_UNUSED]++;
184862306a36Sopenharmony_ci	*bb_id = vm->bbm.next_bb_id;
184962306a36Sopenharmony_ci	vm->bbm.next_bb_id++;
185062306a36Sopenharmony_ci	return 0;
185162306a36Sopenharmony_ci}
185262306a36Sopenharmony_ci
185362306a36Sopenharmony_cistatic int virtio_mem_bbm_plug_request(struct virtio_mem *vm, uint64_t diff)
185462306a36Sopenharmony_ci{
185562306a36Sopenharmony_ci	uint64_t nb_bb = diff / vm->bbm.bb_size;
185662306a36Sopenharmony_ci	unsigned long bb_id;
185762306a36Sopenharmony_ci	int rc;
185862306a36Sopenharmony_ci
185962306a36Sopenharmony_ci	if (!nb_bb)
186062306a36Sopenharmony_ci		return 0;
186162306a36Sopenharmony_ci
186262306a36Sopenharmony_ci	/* Try to plug and add unused big blocks */
186362306a36Sopenharmony_ci	virtio_mem_bbm_for_each_bb(vm, bb_id, VIRTIO_MEM_BBM_BB_UNUSED) {
186462306a36Sopenharmony_ci		if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size))
186562306a36Sopenharmony_ci			return -ENOSPC;
186662306a36Sopenharmony_ci
186762306a36Sopenharmony_ci		rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id);
186862306a36Sopenharmony_ci		if (!rc)
186962306a36Sopenharmony_ci			nb_bb--;
187062306a36Sopenharmony_ci		if (rc || !nb_bb)
187162306a36Sopenharmony_ci			return rc;
187262306a36Sopenharmony_ci		cond_resched();
187362306a36Sopenharmony_ci	}
187462306a36Sopenharmony_ci
187562306a36Sopenharmony_ci	/* Try to prepare, plug and add new big blocks */
187662306a36Sopenharmony_ci	while (nb_bb) {
187762306a36Sopenharmony_ci		if (!virtio_mem_could_add_memory(vm, vm->bbm.bb_size))
187862306a36Sopenharmony_ci			return -ENOSPC;
187962306a36Sopenharmony_ci
188062306a36Sopenharmony_ci		rc = virtio_mem_bbm_prepare_next_bb(vm, &bb_id);
188162306a36Sopenharmony_ci		if (rc)
188262306a36Sopenharmony_ci			return rc;
188362306a36Sopenharmony_ci		rc = virtio_mem_bbm_plug_and_add_bb(vm, bb_id);
188462306a36Sopenharmony_ci		if (!rc)
188562306a36Sopenharmony_ci			nb_bb--;
188662306a36Sopenharmony_ci		if (rc)
188762306a36Sopenharmony_ci			return rc;
188862306a36Sopenharmony_ci		cond_resched();
188962306a36Sopenharmony_ci	}
189062306a36Sopenharmony_ci
189162306a36Sopenharmony_ci	return 0;
189262306a36Sopenharmony_ci}
189362306a36Sopenharmony_ci
189462306a36Sopenharmony_ci/*
189562306a36Sopenharmony_ci * Try to plug the requested amount of memory.
189662306a36Sopenharmony_ci */
189762306a36Sopenharmony_cistatic int virtio_mem_plug_request(struct virtio_mem *vm, uint64_t diff)
189862306a36Sopenharmony_ci{
189962306a36Sopenharmony_ci	if (vm->in_sbm)
190062306a36Sopenharmony_ci		return virtio_mem_sbm_plug_request(vm, diff);
190162306a36Sopenharmony_ci	return virtio_mem_bbm_plug_request(vm, diff);
190262306a36Sopenharmony_ci}
190362306a36Sopenharmony_ci
190462306a36Sopenharmony_ci/*
190562306a36Sopenharmony_ci * Unplug the desired number of plugged subblocks of an offline memory block.
190662306a36Sopenharmony_ci * Will fail if any subblock cannot get unplugged (instead of skipping it).
190762306a36Sopenharmony_ci *
190862306a36Sopenharmony_ci * Will modify the state of the memory block. Might temporarily drop the
190962306a36Sopenharmony_ci * hotplug_mutex.
191062306a36Sopenharmony_ci *
191162306a36Sopenharmony_ci * Note: Can fail after some subblocks were successfully unplugged.
191262306a36Sopenharmony_ci */
191362306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_any_sb_offline(struct virtio_mem *vm,
191462306a36Sopenharmony_ci						unsigned long mb_id,
191562306a36Sopenharmony_ci						uint64_t *nb_sb)
191662306a36Sopenharmony_ci{
191762306a36Sopenharmony_ci	int rc;
191862306a36Sopenharmony_ci
191962306a36Sopenharmony_ci	rc = virtio_mem_sbm_unplug_any_sb_raw(vm, mb_id, nb_sb);
192062306a36Sopenharmony_ci
192162306a36Sopenharmony_ci	/* some subblocks might have been unplugged even on failure */
192262306a36Sopenharmony_ci	if (!virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb))
192362306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
192462306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL);
192562306a36Sopenharmony_ci	if (rc)
192662306a36Sopenharmony_ci		return rc;
192762306a36Sopenharmony_ci
192862306a36Sopenharmony_ci	if (virtio_mem_sbm_test_sb_unplugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
192962306a36Sopenharmony_ci		/*
193062306a36Sopenharmony_ci		 * Remove the block from Linux - this should never fail.
193162306a36Sopenharmony_ci		 * Hinder the block from getting onlined by marking it
193262306a36Sopenharmony_ci		 * unplugged. Temporarily drop the mutex, so
193362306a36Sopenharmony_ci		 * any pending GOING_ONLINE requests can be serviced/rejected.
193462306a36Sopenharmony_ci		 */
193562306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
193662306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_UNUSED);
193762306a36Sopenharmony_ci
193862306a36Sopenharmony_ci		mutex_unlock(&vm->hotplug_mutex);
193962306a36Sopenharmony_ci		rc = virtio_mem_sbm_remove_mb(vm, mb_id);
194062306a36Sopenharmony_ci		BUG_ON(rc);
194162306a36Sopenharmony_ci		mutex_lock(&vm->hotplug_mutex);
194262306a36Sopenharmony_ci	}
194362306a36Sopenharmony_ci	return 0;
194462306a36Sopenharmony_ci}
194562306a36Sopenharmony_ci
194662306a36Sopenharmony_ci/*
194762306a36Sopenharmony_ci * Unplug the given plugged subblocks of an online memory block.
194862306a36Sopenharmony_ci *
194962306a36Sopenharmony_ci * Will modify the state of the memory block.
195062306a36Sopenharmony_ci */
195162306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_sb_online(struct virtio_mem *vm,
195262306a36Sopenharmony_ci					   unsigned long mb_id, int sb_id,
195362306a36Sopenharmony_ci					   int count)
195462306a36Sopenharmony_ci{
195562306a36Sopenharmony_ci	const unsigned long nr_pages = PFN_DOWN(vm->sbm.sb_size) * count;
195662306a36Sopenharmony_ci	const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
195762306a36Sopenharmony_ci	unsigned long start_pfn;
195862306a36Sopenharmony_ci	int rc;
195962306a36Sopenharmony_ci
196062306a36Sopenharmony_ci	start_pfn = PFN_DOWN(virtio_mem_mb_id_to_phys(mb_id) +
196162306a36Sopenharmony_ci			     sb_id * vm->sbm.sb_size);
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	rc = virtio_mem_fake_offline(vm, start_pfn, nr_pages);
196462306a36Sopenharmony_ci	if (rc)
196562306a36Sopenharmony_ci		return rc;
196662306a36Sopenharmony_ci
196762306a36Sopenharmony_ci	/* Try to unplug the allocated memory */
196862306a36Sopenharmony_ci	rc = virtio_mem_sbm_unplug_sb(vm, mb_id, sb_id, count);
196962306a36Sopenharmony_ci	if (rc) {
197062306a36Sopenharmony_ci		/* Return the memory to the buddy. */
197162306a36Sopenharmony_ci		virtio_mem_fake_online(start_pfn, nr_pages);
197262306a36Sopenharmony_ci		return rc;
197362306a36Sopenharmony_ci	}
197462306a36Sopenharmony_ci
197562306a36Sopenharmony_ci	switch (old_state) {
197662306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_KERNEL:
197762306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
197862306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL);
197962306a36Sopenharmony_ci		break;
198062306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_MOVABLE:
198162306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, mb_id,
198262306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL);
198362306a36Sopenharmony_ci		break;
198462306a36Sopenharmony_ci	}
198562306a36Sopenharmony_ci
198662306a36Sopenharmony_ci	return 0;
198762306a36Sopenharmony_ci}
198862306a36Sopenharmony_ci
198962306a36Sopenharmony_ci/*
199062306a36Sopenharmony_ci * Unplug the desired number of plugged subblocks of an online memory block.
199162306a36Sopenharmony_ci * Will skip subblock that are busy.
199262306a36Sopenharmony_ci *
199362306a36Sopenharmony_ci * Will modify the state of the memory block. Might temporarily drop the
199462306a36Sopenharmony_ci * hotplug_mutex.
199562306a36Sopenharmony_ci *
199662306a36Sopenharmony_ci * Note: Can fail after some subblocks were successfully unplugged. Can
199762306a36Sopenharmony_ci *       return 0 even if subblocks were busy and could not get unplugged.
199862306a36Sopenharmony_ci */
199962306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_any_sb_online(struct virtio_mem *vm,
200062306a36Sopenharmony_ci					       unsigned long mb_id,
200162306a36Sopenharmony_ci					       uint64_t *nb_sb)
200262306a36Sopenharmony_ci{
200362306a36Sopenharmony_ci	int rc, sb_id;
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_ci	/* If possible, try to unplug the complete block in one shot. */
200662306a36Sopenharmony_ci	if (*nb_sb >= vm->sbm.sbs_per_mb &&
200762306a36Sopenharmony_ci	    virtio_mem_sbm_test_sb_plugged(vm, mb_id, 0, vm->sbm.sbs_per_mb)) {
200862306a36Sopenharmony_ci		rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, 0,
200962306a36Sopenharmony_ci						     vm->sbm.sbs_per_mb);
201062306a36Sopenharmony_ci		if (!rc) {
201162306a36Sopenharmony_ci			*nb_sb -= vm->sbm.sbs_per_mb;
201262306a36Sopenharmony_ci			goto unplugged;
201362306a36Sopenharmony_ci		} else if (rc != -EBUSY)
201462306a36Sopenharmony_ci			return rc;
201562306a36Sopenharmony_ci	}
201662306a36Sopenharmony_ci
201762306a36Sopenharmony_ci	/* Fallback to single subblocks. */
201862306a36Sopenharmony_ci	for (sb_id = vm->sbm.sbs_per_mb - 1; sb_id >= 0 && *nb_sb; sb_id--) {
201962306a36Sopenharmony_ci		/* Find the next candidate subblock */
202062306a36Sopenharmony_ci		while (sb_id >= 0 &&
202162306a36Sopenharmony_ci		       !virtio_mem_sbm_test_sb_plugged(vm, mb_id, sb_id, 1))
202262306a36Sopenharmony_ci			sb_id--;
202362306a36Sopenharmony_ci		if (sb_id < 0)
202462306a36Sopenharmony_ci			break;
202562306a36Sopenharmony_ci
202662306a36Sopenharmony_ci		rc = virtio_mem_sbm_unplug_sb_online(vm, mb_id, sb_id, 1);
202762306a36Sopenharmony_ci		if (rc == -EBUSY)
202862306a36Sopenharmony_ci			continue;
202962306a36Sopenharmony_ci		else if (rc)
203062306a36Sopenharmony_ci			return rc;
203162306a36Sopenharmony_ci		*nb_sb -= 1;
203262306a36Sopenharmony_ci	}
203362306a36Sopenharmony_ci
203462306a36Sopenharmony_ciunplugged:
203562306a36Sopenharmony_ci	rc = virtio_mem_sbm_try_remove_unplugged_mb(vm, mb_id);
203662306a36Sopenharmony_ci	if (rc)
203762306a36Sopenharmony_ci		vm->sbm.have_unplugged_mb = 1;
203862306a36Sopenharmony_ci	/* Ignore errors, this is not critical. We'll retry later. */
203962306a36Sopenharmony_ci	return 0;
204062306a36Sopenharmony_ci}
204162306a36Sopenharmony_ci
204262306a36Sopenharmony_ci/*
204362306a36Sopenharmony_ci * Unplug the desired number of plugged subblocks of a memory block that is
204462306a36Sopenharmony_ci * already added to Linux. Will skip subblock of online memory blocks that are
204562306a36Sopenharmony_ci * busy (by the OS). Will fail if any subblock that's not busy cannot get
204662306a36Sopenharmony_ci * unplugged.
204762306a36Sopenharmony_ci *
204862306a36Sopenharmony_ci * Will modify the state of the memory block. Might temporarily drop the
204962306a36Sopenharmony_ci * hotplug_mutex.
205062306a36Sopenharmony_ci *
205162306a36Sopenharmony_ci * Note: Can fail after some subblocks were successfully unplugged. Can
205262306a36Sopenharmony_ci *       return 0 even if subblocks were busy and could not get unplugged.
205362306a36Sopenharmony_ci */
205462306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_any_sb(struct virtio_mem *vm,
205562306a36Sopenharmony_ci					unsigned long mb_id,
205662306a36Sopenharmony_ci					uint64_t *nb_sb)
205762306a36Sopenharmony_ci{
205862306a36Sopenharmony_ci	const int old_state = virtio_mem_sbm_get_mb_state(vm, mb_id);
205962306a36Sopenharmony_ci
206062306a36Sopenharmony_ci	switch (old_state) {
206162306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL:
206262306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_KERNEL:
206362306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL:
206462306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_MOVABLE:
206562306a36Sopenharmony_ci		return virtio_mem_sbm_unplug_any_sb_online(vm, mb_id, nb_sb);
206662306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL:
206762306a36Sopenharmony_ci	case VIRTIO_MEM_SBM_MB_OFFLINE:
206862306a36Sopenharmony_ci		return virtio_mem_sbm_unplug_any_sb_offline(vm, mb_id, nb_sb);
206962306a36Sopenharmony_ci	}
207062306a36Sopenharmony_ci	return -EINVAL;
207162306a36Sopenharmony_ci}
207262306a36Sopenharmony_ci
207362306a36Sopenharmony_cistatic int virtio_mem_sbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
207462306a36Sopenharmony_ci{
207562306a36Sopenharmony_ci	const int mb_states[] = {
207662306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL,
207762306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_OFFLINE,
207862306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL,
207962306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL,
208062306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_MOVABLE,
208162306a36Sopenharmony_ci		VIRTIO_MEM_SBM_MB_KERNEL,
208262306a36Sopenharmony_ci	};
208362306a36Sopenharmony_ci	uint64_t nb_sb = diff / vm->sbm.sb_size;
208462306a36Sopenharmony_ci	unsigned long mb_id;
208562306a36Sopenharmony_ci	int rc, i;
208662306a36Sopenharmony_ci
208762306a36Sopenharmony_ci	if (!nb_sb)
208862306a36Sopenharmony_ci		return 0;
208962306a36Sopenharmony_ci
209062306a36Sopenharmony_ci	/*
209162306a36Sopenharmony_ci	 * We'll drop the mutex a couple of times when it is safe to do so.
209262306a36Sopenharmony_ci	 * This might result in some blocks switching the state (online/offline)
209362306a36Sopenharmony_ci	 * and we could miss them in this run - we will retry again later.
209462306a36Sopenharmony_ci	 */
209562306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
209662306a36Sopenharmony_ci
209762306a36Sopenharmony_ci	/*
209862306a36Sopenharmony_ci	 * We try unplug from partially plugged blocks first, to try removing
209962306a36Sopenharmony_ci	 * whole memory blocks along with metadata. We prioritize ZONE_MOVABLE
210062306a36Sopenharmony_ci	 * as it's more reliable to unplug memory and remove whole memory
210162306a36Sopenharmony_ci	 * blocks, and we don't want to trigger a zone imbalances by
210262306a36Sopenharmony_ci	 * accidentially removing too much kernel memory.
210362306a36Sopenharmony_ci	 */
210462306a36Sopenharmony_ci	for (i = 0; i < ARRAY_SIZE(mb_states); i++) {
210562306a36Sopenharmony_ci		virtio_mem_sbm_for_each_mb_rev(vm, mb_id, mb_states[i]) {
210662306a36Sopenharmony_ci			rc = virtio_mem_sbm_unplug_any_sb(vm, mb_id, &nb_sb);
210762306a36Sopenharmony_ci			if (rc || !nb_sb)
210862306a36Sopenharmony_ci				goto out_unlock;
210962306a36Sopenharmony_ci			mutex_unlock(&vm->hotplug_mutex);
211062306a36Sopenharmony_ci			cond_resched();
211162306a36Sopenharmony_ci			mutex_lock(&vm->hotplug_mutex);
211262306a36Sopenharmony_ci		}
211362306a36Sopenharmony_ci		if (!unplug_online && i == 1) {
211462306a36Sopenharmony_ci			mutex_unlock(&vm->hotplug_mutex);
211562306a36Sopenharmony_ci			return 0;
211662306a36Sopenharmony_ci		}
211762306a36Sopenharmony_ci	}
211862306a36Sopenharmony_ci
211962306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
212062306a36Sopenharmony_ci	return nb_sb ? -EBUSY : 0;
212162306a36Sopenharmony_ciout_unlock:
212262306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
212362306a36Sopenharmony_ci	return rc;
212462306a36Sopenharmony_ci}
212562306a36Sopenharmony_ci
212662306a36Sopenharmony_ci/*
212762306a36Sopenharmony_ci * Try to offline and remove a big block from Linux and unplug it. Will fail
212862306a36Sopenharmony_ci * with -EBUSY if some memory is busy and cannot get unplugged.
212962306a36Sopenharmony_ci *
213062306a36Sopenharmony_ci * Will modify the state of the memory block. Might temporarily drop the
213162306a36Sopenharmony_ci * hotplug_mutex.
213262306a36Sopenharmony_ci */
213362306a36Sopenharmony_cistatic int virtio_mem_bbm_offline_remove_and_unplug_bb(struct virtio_mem *vm,
213462306a36Sopenharmony_ci						       unsigned long bb_id)
213562306a36Sopenharmony_ci{
213662306a36Sopenharmony_ci	const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
213762306a36Sopenharmony_ci	const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
213862306a36Sopenharmony_ci	unsigned long end_pfn = start_pfn + nr_pages;
213962306a36Sopenharmony_ci	unsigned long pfn;
214062306a36Sopenharmony_ci	struct page *page;
214162306a36Sopenharmony_ci	int rc;
214262306a36Sopenharmony_ci
214362306a36Sopenharmony_ci	if (WARN_ON_ONCE(virtio_mem_bbm_get_bb_state(vm, bb_id) !=
214462306a36Sopenharmony_ci			 VIRTIO_MEM_BBM_BB_ADDED))
214562306a36Sopenharmony_ci		return -EINVAL;
214662306a36Sopenharmony_ci
214762306a36Sopenharmony_ci	/*
214862306a36Sopenharmony_ci	 * Start by fake-offlining all memory. Once we marked the device
214962306a36Sopenharmony_ci	 * block as fake-offline, all newly onlined memory will
215062306a36Sopenharmony_ci	 * automatically be kept fake-offline. Protect from concurrent
215162306a36Sopenharmony_ci	 * onlining/offlining until we have a consistent state.
215262306a36Sopenharmony_ci	 */
215362306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
215462306a36Sopenharmony_ci	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_FAKE_OFFLINE);
215562306a36Sopenharmony_ci
215662306a36Sopenharmony_ci	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
215762306a36Sopenharmony_ci		page = pfn_to_online_page(pfn);
215862306a36Sopenharmony_ci		if (!page)
215962306a36Sopenharmony_ci			continue;
216062306a36Sopenharmony_ci
216162306a36Sopenharmony_ci		rc = virtio_mem_fake_offline(vm, pfn, PAGES_PER_SECTION);
216262306a36Sopenharmony_ci		if (rc) {
216362306a36Sopenharmony_ci			end_pfn = pfn;
216462306a36Sopenharmony_ci			goto rollback;
216562306a36Sopenharmony_ci		}
216662306a36Sopenharmony_ci	}
216762306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
216862306a36Sopenharmony_ci
216962306a36Sopenharmony_ci	rc = virtio_mem_bbm_offline_and_remove_bb(vm, bb_id);
217062306a36Sopenharmony_ci	if (rc) {
217162306a36Sopenharmony_ci		mutex_lock(&vm->hotplug_mutex);
217262306a36Sopenharmony_ci		goto rollback;
217362306a36Sopenharmony_ci	}
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ci	rc = virtio_mem_bbm_unplug_bb(vm, bb_id);
217662306a36Sopenharmony_ci	if (rc)
217762306a36Sopenharmony_ci		virtio_mem_bbm_set_bb_state(vm, bb_id,
217862306a36Sopenharmony_ci					    VIRTIO_MEM_BBM_BB_PLUGGED);
217962306a36Sopenharmony_ci	else
218062306a36Sopenharmony_ci		virtio_mem_bbm_set_bb_state(vm, bb_id,
218162306a36Sopenharmony_ci					    VIRTIO_MEM_BBM_BB_UNUSED);
218262306a36Sopenharmony_ci	return rc;
218362306a36Sopenharmony_ci
218462306a36Sopenharmony_cirollback:
218562306a36Sopenharmony_ci	for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
218662306a36Sopenharmony_ci		page = pfn_to_online_page(pfn);
218762306a36Sopenharmony_ci		if (!page)
218862306a36Sopenharmony_ci			continue;
218962306a36Sopenharmony_ci		virtio_mem_fake_online(pfn, PAGES_PER_SECTION);
219062306a36Sopenharmony_ci	}
219162306a36Sopenharmony_ci	virtio_mem_bbm_set_bb_state(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED);
219262306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
219362306a36Sopenharmony_ci	return rc;
219462306a36Sopenharmony_ci}
219562306a36Sopenharmony_ci
219662306a36Sopenharmony_ci/*
219762306a36Sopenharmony_ci * Test if a big block is completely offline.
219862306a36Sopenharmony_ci */
219962306a36Sopenharmony_cistatic bool virtio_mem_bbm_bb_is_offline(struct virtio_mem *vm,
220062306a36Sopenharmony_ci					 unsigned long bb_id)
220162306a36Sopenharmony_ci{
220262306a36Sopenharmony_ci	const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
220362306a36Sopenharmony_ci	const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
220462306a36Sopenharmony_ci	unsigned long pfn;
220562306a36Sopenharmony_ci
220662306a36Sopenharmony_ci	for (pfn = start_pfn; pfn < start_pfn + nr_pages;
220762306a36Sopenharmony_ci	     pfn += PAGES_PER_SECTION) {
220862306a36Sopenharmony_ci		if (pfn_to_online_page(pfn))
220962306a36Sopenharmony_ci			return false;
221062306a36Sopenharmony_ci	}
221162306a36Sopenharmony_ci
221262306a36Sopenharmony_ci	return true;
221362306a36Sopenharmony_ci}
221462306a36Sopenharmony_ci
221562306a36Sopenharmony_ci/*
221662306a36Sopenharmony_ci * Test if a big block is completely onlined to ZONE_MOVABLE (or offline).
221762306a36Sopenharmony_ci */
221862306a36Sopenharmony_cistatic bool virtio_mem_bbm_bb_is_movable(struct virtio_mem *vm,
221962306a36Sopenharmony_ci					 unsigned long bb_id)
222062306a36Sopenharmony_ci{
222162306a36Sopenharmony_ci	const unsigned long start_pfn = PFN_DOWN(virtio_mem_bb_id_to_phys(vm, bb_id));
222262306a36Sopenharmony_ci	const unsigned long nr_pages = PFN_DOWN(vm->bbm.bb_size);
222362306a36Sopenharmony_ci	struct page *page;
222462306a36Sopenharmony_ci	unsigned long pfn;
222562306a36Sopenharmony_ci
222662306a36Sopenharmony_ci	for (pfn = start_pfn; pfn < start_pfn + nr_pages;
222762306a36Sopenharmony_ci	     pfn += PAGES_PER_SECTION) {
222862306a36Sopenharmony_ci		page = pfn_to_online_page(pfn);
222962306a36Sopenharmony_ci		if (!page)
223062306a36Sopenharmony_ci			continue;
223162306a36Sopenharmony_ci		if (page_zonenum(page) != ZONE_MOVABLE)
223262306a36Sopenharmony_ci			return false;
223362306a36Sopenharmony_ci	}
223462306a36Sopenharmony_ci
223562306a36Sopenharmony_ci	return true;
223662306a36Sopenharmony_ci}
223762306a36Sopenharmony_ci
223862306a36Sopenharmony_cistatic int virtio_mem_bbm_unplug_request(struct virtio_mem *vm, uint64_t diff)
223962306a36Sopenharmony_ci{
224062306a36Sopenharmony_ci	uint64_t nb_bb = diff / vm->bbm.bb_size;
224162306a36Sopenharmony_ci	uint64_t bb_id;
224262306a36Sopenharmony_ci	int rc, i;
224362306a36Sopenharmony_ci
224462306a36Sopenharmony_ci	if (!nb_bb)
224562306a36Sopenharmony_ci		return 0;
224662306a36Sopenharmony_ci
224762306a36Sopenharmony_ci	/*
224862306a36Sopenharmony_ci	 * Try to unplug big blocks. Similar to SBM, start with offline
224962306a36Sopenharmony_ci	 * big blocks.
225062306a36Sopenharmony_ci	 */
225162306a36Sopenharmony_ci	for (i = 0; i < 3; i++) {
225262306a36Sopenharmony_ci		virtio_mem_bbm_for_each_bb_rev(vm, bb_id, VIRTIO_MEM_BBM_BB_ADDED) {
225362306a36Sopenharmony_ci			cond_resched();
225462306a36Sopenharmony_ci
225562306a36Sopenharmony_ci			/*
225662306a36Sopenharmony_ci			 * As we're holding no locks, these checks are racy,
225762306a36Sopenharmony_ci			 * but we don't care.
225862306a36Sopenharmony_ci			 */
225962306a36Sopenharmony_ci			if (i == 0 && !virtio_mem_bbm_bb_is_offline(vm, bb_id))
226062306a36Sopenharmony_ci				continue;
226162306a36Sopenharmony_ci			if (i == 1 && !virtio_mem_bbm_bb_is_movable(vm, bb_id))
226262306a36Sopenharmony_ci				continue;
226362306a36Sopenharmony_ci			rc = virtio_mem_bbm_offline_remove_and_unplug_bb(vm, bb_id);
226462306a36Sopenharmony_ci			if (rc == -EBUSY)
226562306a36Sopenharmony_ci				continue;
226662306a36Sopenharmony_ci			if (!rc)
226762306a36Sopenharmony_ci				nb_bb--;
226862306a36Sopenharmony_ci			if (rc || !nb_bb)
226962306a36Sopenharmony_ci				return rc;
227062306a36Sopenharmony_ci		}
227162306a36Sopenharmony_ci		if (i == 0 && !unplug_online)
227262306a36Sopenharmony_ci			return 0;
227362306a36Sopenharmony_ci	}
227462306a36Sopenharmony_ci
227562306a36Sopenharmony_ci	return nb_bb ? -EBUSY : 0;
227662306a36Sopenharmony_ci}
227762306a36Sopenharmony_ci
227862306a36Sopenharmony_ci/*
227962306a36Sopenharmony_ci * Try to unplug the requested amount of memory.
228062306a36Sopenharmony_ci */
228162306a36Sopenharmony_cistatic int virtio_mem_unplug_request(struct virtio_mem *vm, uint64_t diff)
228262306a36Sopenharmony_ci{
228362306a36Sopenharmony_ci	if (vm->in_sbm)
228462306a36Sopenharmony_ci		return virtio_mem_sbm_unplug_request(vm, diff);
228562306a36Sopenharmony_ci	return virtio_mem_bbm_unplug_request(vm, diff);
228662306a36Sopenharmony_ci}
228762306a36Sopenharmony_ci
228862306a36Sopenharmony_ci/*
228962306a36Sopenharmony_ci * Try to unplug all blocks that couldn't be unplugged before, for example,
229062306a36Sopenharmony_ci * because the hypervisor was busy. Further, offline and remove any memory
229162306a36Sopenharmony_ci * blocks where we previously failed.
229262306a36Sopenharmony_ci */
229362306a36Sopenharmony_cistatic int virtio_mem_cleanup_pending_mb(struct virtio_mem *vm)
229462306a36Sopenharmony_ci{
229562306a36Sopenharmony_ci	unsigned long id;
229662306a36Sopenharmony_ci	int rc = 0;
229762306a36Sopenharmony_ci
229862306a36Sopenharmony_ci	if (!vm->in_sbm) {
229962306a36Sopenharmony_ci		virtio_mem_bbm_for_each_bb(vm, id,
230062306a36Sopenharmony_ci					   VIRTIO_MEM_BBM_BB_PLUGGED) {
230162306a36Sopenharmony_ci			rc = virtio_mem_bbm_unplug_bb(vm, id);
230262306a36Sopenharmony_ci			if (rc)
230362306a36Sopenharmony_ci				return rc;
230462306a36Sopenharmony_ci			virtio_mem_bbm_set_bb_state(vm, id,
230562306a36Sopenharmony_ci						    VIRTIO_MEM_BBM_BB_UNUSED);
230662306a36Sopenharmony_ci		}
230762306a36Sopenharmony_ci		return 0;
230862306a36Sopenharmony_ci	}
230962306a36Sopenharmony_ci
231062306a36Sopenharmony_ci	virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_PLUGGED) {
231162306a36Sopenharmony_ci		rc = virtio_mem_sbm_unplug_mb(vm, id);
231262306a36Sopenharmony_ci		if (rc)
231362306a36Sopenharmony_ci			return rc;
231462306a36Sopenharmony_ci		virtio_mem_sbm_set_mb_state(vm, id,
231562306a36Sopenharmony_ci					    VIRTIO_MEM_SBM_MB_UNUSED);
231662306a36Sopenharmony_ci	}
231762306a36Sopenharmony_ci
231862306a36Sopenharmony_ci	if (!vm->sbm.have_unplugged_mb)
231962306a36Sopenharmony_ci		return 0;
232062306a36Sopenharmony_ci
232162306a36Sopenharmony_ci	/*
232262306a36Sopenharmony_ci	 * Let's retry (offlining and) removing completely unplugged Linux
232362306a36Sopenharmony_ci	 * memory blocks.
232462306a36Sopenharmony_ci	 */
232562306a36Sopenharmony_ci	vm->sbm.have_unplugged_mb = false;
232662306a36Sopenharmony_ci
232762306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
232862306a36Sopenharmony_ci	virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_MOVABLE_PARTIAL)
232962306a36Sopenharmony_ci		rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
233062306a36Sopenharmony_ci	virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_KERNEL_PARTIAL)
233162306a36Sopenharmony_ci		rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
233262306a36Sopenharmony_ci	virtio_mem_sbm_for_each_mb(vm, id, VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL)
233362306a36Sopenharmony_ci		rc |= virtio_mem_sbm_try_remove_unplugged_mb(vm, id);
233462306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
233562306a36Sopenharmony_ci
233662306a36Sopenharmony_ci	if (rc)
233762306a36Sopenharmony_ci		vm->sbm.have_unplugged_mb = true;
233862306a36Sopenharmony_ci	/* Ignore errors, this is not critical. We'll retry later. */
233962306a36Sopenharmony_ci	return 0;
234062306a36Sopenharmony_ci}
234162306a36Sopenharmony_ci
234262306a36Sopenharmony_ci/*
234362306a36Sopenharmony_ci * Update all parts of the config that could have changed.
234462306a36Sopenharmony_ci */
234562306a36Sopenharmony_cistatic void virtio_mem_refresh_config(struct virtio_mem *vm)
234662306a36Sopenharmony_ci{
234762306a36Sopenharmony_ci	const struct range pluggable_range = mhp_get_pluggable_range(true);
234862306a36Sopenharmony_ci	uint64_t new_plugged_size, usable_region_size, end_addr;
234962306a36Sopenharmony_ci
235062306a36Sopenharmony_ci	/* the plugged_size is just a reflection of what _we_ did previously */
235162306a36Sopenharmony_ci	virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
235262306a36Sopenharmony_ci			&new_plugged_size);
235362306a36Sopenharmony_ci	if (WARN_ON_ONCE(new_plugged_size != vm->plugged_size))
235462306a36Sopenharmony_ci		vm->plugged_size = new_plugged_size;
235562306a36Sopenharmony_ci
235662306a36Sopenharmony_ci	/* calculate the last usable memory block id */
235762306a36Sopenharmony_ci	virtio_cread_le(vm->vdev, struct virtio_mem_config,
235862306a36Sopenharmony_ci			usable_region_size, &usable_region_size);
235962306a36Sopenharmony_ci	end_addr = min(vm->addr + usable_region_size - 1,
236062306a36Sopenharmony_ci		       pluggable_range.end);
236162306a36Sopenharmony_ci
236262306a36Sopenharmony_ci	if (vm->in_sbm) {
236362306a36Sopenharmony_ci		vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr);
236462306a36Sopenharmony_ci		if (!IS_ALIGNED(end_addr + 1, memory_block_size_bytes()))
236562306a36Sopenharmony_ci			vm->sbm.last_usable_mb_id--;
236662306a36Sopenharmony_ci	} else {
236762306a36Sopenharmony_ci		vm->bbm.last_usable_bb_id = virtio_mem_phys_to_bb_id(vm,
236862306a36Sopenharmony_ci								     end_addr);
236962306a36Sopenharmony_ci		if (!IS_ALIGNED(end_addr + 1, vm->bbm.bb_size))
237062306a36Sopenharmony_ci			vm->bbm.last_usable_bb_id--;
237162306a36Sopenharmony_ci	}
237262306a36Sopenharmony_ci	/*
237362306a36Sopenharmony_ci	 * If we cannot plug any of our device memory (e.g., nothing in the
237462306a36Sopenharmony_ci	 * usable region is addressable), the last usable memory block id will
237562306a36Sopenharmony_ci	 * be smaller than the first usable memory block id. We'll stop
237662306a36Sopenharmony_ci	 * attempting to add memory with -ENOSPC from our main loop.
237762306a36Sopenharmony_ci	 */
237862306a36Sopenharmony_ci
237962306a36Sopenharmony_ci	/* see if there is a request to change the size */
238062306a36Sopenharmony_ci	virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
238162306a36Sopenharmony_ci			&vm->requested_size);
238262306a36Sopenharmony_ci
238362306a36Sopenharmony_ci	dev_info(&vm->vdev->dev, "plugged size: 0x%llx", vm->plugged_size);
238462306a36Sopenharmony_ci	dev_info(&vm->vdev->dev, "requested size: 0x%llx", vm->requested_size);
238562306a36Sopenharmony_ci}
238662306a36Sopenharmony_ci
238762306a36Sopenharmony_ci/*
238862306a36Sopenharmony_ci * Workqueue function for handling plug/unplug requests and config updates.
238962306a36Sopenharmony_ci */
239062306a36Sopenharmony_cistatic void virtio_mem_run_wq(struct work_struct *work)
239162306a36Sopenharmony_ci{
239262306a36Sopenharmony_ci	struct virtio_mem *vm = container_of(work, struct virtio_mem, wq);
239362306a36Sopenharmony_ci	uint64_t diff;
239462306a36Sopenharmony_ci	int rc;
239562306a36Sopenharmony_ci
239662306a36Sopenharmony_ci	if (unlikely(vm->in_kdump)) {
239762306a36Sopenharmony_ci		dev_warn_once(&vm->vdev->dev,
239862306a36Sopenharmony_ci			     "unexpected workqueue run in kdump kernel\n");
239962306a36Sopenharmony_ci		return;
240062306a36Sopenharmony_ci	}
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci	hrtimer_cancel(&vm->retry_timer);
240362306a36Sopenharmony_ci
240462306a36Sopenharmony_ci	if (vm->broken)
240562306a36Sopenharmony_ci		return;
240662306a36Sopenharmony_ci
240762306a36Sopenharmony_ci	atomic_set(&vm->wq_active, 1);
240862306a36Sopenharmony_ciretry:
240962306a36Sopenharmony_ci	rc = 0;
241062306a36Sopenharmony_ci
241162306a36Sopenharmony_ci	/* Make sure we start with a clean state if there are leftovers. */
241262306a36Sopenharmony_ci	if (unlikely(vm->unplug_all_required))
241362306a36Sopenharmony_ci		rc = virtio_mem_send_unplug_all_request(vm);
241462306a36Sopenharmony_ci
241562306a36Sopenharmony_ci	if (atomic_read(&vm->config_changed)) {
241662306a36Sopenharmony_ci		atomic_set(&vm->config_changed, 0);
241762306a36Sopenharmony_ci		virtio_mem_refresh_config(vm);
241862306a36Sopenharmony_ci	}
241962306a36Sopenharmony_ci
242062306a36Sopenharmony_ci	/* Cleanup any leftovers from previous runs */
242162306a36Sopenharmony_ci	if (!rc)
242262306a36Sopenharmony_ci		rc = virtio_mem_cleanup_pending_mb(vm);
242362306a36Sopenharmony_ci
242462306a36Sopenharmony_ci	if (!rc && vm->requested_size != vm->plugged_size) {
242562306a36Sopenharmony_ci		if (vm->requested_size > vm->plugged_size) {
242662306a36Sopenharmony_ci			diff = vm->requested_size - vm->plugged_size;
242762306a36Sopenharmony_ci			rc = virtio_mem_plug_request(vm, diff);
242862306a36Sopenharmony_ci		} else {
242962306a36Sopenharmony_ci			diff = vm->plugged_size - vm->requested_size;
243062306a36Sopenharmony_ci			rc = virtio_mem_unplug_request(vm, diff);
243162306a36Sopenharmony_ci		}
243262306a36Sopenharmony_ci	}
243362306a36Sopenharmony_ci
243462306a36Sopenharmony_ci	/*
243562306a36Sopenharmony_ci	 * Keep retrying to offline and remove completely unplugged Linux
243662306a36Sopenharmony_ci	 * memory blocks.
243762306a36Sopenharmony_ci	 */
243862306a36Sopenharmony_ci	if (!rc && vm->in_sbm && vm->sbm.have_unplugged_mb)
243962306a36Sopenharmony_ci		rc = -EBUSY;
244062306a36Sopenharmony_ci
244162306a36Sopenharmony_ci	switch (rc) {
244262306a36Sopenharmony_ci	case 0:
244362306a36Sopenharmony_ci		vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
244462306a36Sopenharmony_ci		break;
244562306a36Sopenharmony_ci	case -ENOSPC:
244662306a36Sopenharmony_ci		/*
244762306a36Sopenharmony_ci		 * We cannot add any more memory (alignment, physical limit)
244862306a36Sopenharmony_ci		 * or we have too many offline memory blocks.
244962306a36Sopenharmony_ci		 */
245062306a36Sopenharmony_ci		break;
245162306a36Sopenharmony_ci	case -ETXTBSY:
245262306a36Sopenharmony_ci		/*
245362306a36Sopenharmony_ci		 * The hypervisor cannot process our request right now
245462306a36Sopenharmony_ci		 * (e.g., out of memory, migrating);
245562306a36Sopenharmony_ci		 */
245662306a36Sopenharmony_ci	case -EBUSY:
245762306a36Sopenharmony_ci		/*
245862306a36Sopenharmony_ci		 * We cannot free up any memory to unplug it (all plugged memory
245962306a36Sopenharmony_ci		 * is busy).
246062306a36Sopenharmony_ci		 */
246162306a36Sopenharmony_ci	case -ENOMEM:
246262306a36Sopenharmony_ci		/* Out of memory, try again later. */
246362306a36Sopenharmony_ci		hrtimer_start(&vm->retry_timer, ms_to_ktime(vm->retry_timer_ms),
246462306a36Sopenharmony_ci			      HRTIMER_MODE_REL);
246562306a36Sopenharmony_ci		break;
246662306a36Sopenharmony_ci	case -EAGAIN:
246762306a36Sopenharmony_ci		/* Retry immediately (e.g., the config changed). */
246862306a36Sopenharmony_ci		goto retry;
246962306a36Sopenharmony_ci	default:
247062306a36Sopenharmony_ci		/* Unknown error, mark as broken */
247162306a36Sopenharmony_ci		dev_err(&vm->vdev->dev,
247262306a36Sopenharmony_ci			"unknown error, marking device broken: %d\n", rc);
247362306a36Sopenharmony_ci		vm->broken = true;
247462306a36Sopenharmony_ci	}
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_ci	atomic_set(&vm->wq_active, 0);
247762306a36Sopenharmony_ci}
247862306a36Sopenharmony_ci
247962306a36Sopenharmony_cistatic enum hrtimer_restart virtio_mem_timer_expired(struct hrtimer *timer)
248062306a36Sopenharmony_ci{
248162306a36Sopenharmony_ci	struct virtio_mem *vm = container_of(timer, struct virtio_mem,
248262306a36Sopenharmony_ci					     retry_timer);
248362306a36Sopenharmony_ci
248462306a36Sopenharmony_ci	virtio_mem_retry(vm);
248562306a36Sopenharmony_ci	vm->retry_timer_ms = min_t(unsigned int, vm->retry_timer_ms * 2,
248662306a36Sopenharmony_ci				   VIRTIO_MEM_RETRY_TIMER_MAX_MS);
248762306a36Sopenharmony_ci	return HRTIMER_NORESTART;
248862306a36Sopenharmony_ci}
248962306a36Sopenharmony_ci
249062306a36Sopenharmony_cistatic void virtio_mem_handle_response(struct virtqueue *vq)
249162306a36Sopenharmony_ci{
249262306a36Sopenharmony_ci	struct virtio_mem *vm = vq->vdev->priv;
249362306a36Sopenharmony_ci
249462306a36Sopenharmony_ci	wake_up(&vm->host_resp);
249562306a36Sopenharmony_ci}
249662306a36Sopenharmony_ci
249762306a36Sopenharmony_cistatic int virtio_mem_init_vq(struct virtio_mem *vm)
249862306a36Sopenharmony_ci{
249962306a36Sopenharmony_ci	struct virtqueue *vq;
250062306a36Sopenharmony_ci
250162306a36Sopenharmony_ci	vq = virtio_find_single_vq(vm->vdev, virtio_mem_handle_response,
250262306a36Sopenharmony_ci				   "guest-request");
250362306a36Sopenharmony_ci	if (IS_ERR(vq))
250462306a36Sopenharmony_ci		return PTR_ERR(vq);
250562306a36Sopenharmony_ci	vm->vq = vq;
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_ci	return 0;
250862306a36Sopenharmony_ci}
250962306a36Sopenharmony_ci
251062306a36Sopenharmony_cistatic int virtio_mem_init_hotplug(struct virtio_mem *vm)
251162306a36Sopenharmony_ci{
251262306a36Sopenharmony_ci	const struct range pluggable_range = mhp_get_pluggable_range(true);
251362306a36Sopenharmony_ci	uint64_t unit_pages, sb_size, addr;
251462306a36Sopenharmony_ci	int rc;
251562306a36Sopenharmony_ci
251662306a36Sopenharmony_ci	/* bad device setup - warn only */
251762306a36Sopenharmony_ci	if (!IS_ALIGNED(vm->addr, memory_block_size_bytes()))
251862306a36Sopenharmony_ci		dev_warn(&vm->vdev->dev,
251962306a36Sopenharmony_ci			 "The alignment of the physical start address can make some memory unusable.\n");
252062306a36Sopenharmony_ci	if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes()))
252162306a36Sopenharmony_ci		dev_warn(&vm->vdev->dev,
252262306a36Sopenharmony_ci			 "The alignment of the physical end address can make some memory unusable.\n");
252362306a36Sopenharmony_ci	if (vm->addr < pluggable_range.start ||
252462306a36Sopenharmony_ci	    vm->addr + vm->region_size - 1 > pluggable_range.end)
252562306a36Sopenharmony_ci		dev_warn(&vm->vdev->dev,
252662306a36Sopenharmony_ci			 "Some device memory is not addressable/pluggable. This can make some memory unusable.\n");
252762306a36Sopenharmony_ci
252862306a36Sopenharmony_ci	/* Prepare the offline threshold - make sure we can add two blocks. */
252962306a36Sopenharmony_ci	vm->offline_threshold = max_t(uint64_t, 2 * memory_block_size_bytes(),
253062306a36Sopenharmony_ci				      VIRTIO_MEM_DEFAULT_OFFLINE_THRESHOLD);
253162306a36Sopenharmony_ci
253262306a36Sopenharmony_ci	/*
253362306a36Sopenharmony_ci	 * alloc_contig_range() works reliably with pageblock
253462306a36Sopenharmony_ci	 * granularity on ZONE_NORMAL, use pageblock_nr_pages.
253562306a36Sopenharmony_ci	 */
253662306a36Sopenharmony_ci	sb_size = PAGE_SIZE * pageblock_nr_pages;
253762306a36Sopenharmony_ci	sb_size = max_t(uint64_t, vm->device_block_size, sb_size);
253862306a36Sopenharmony_ci
253962306a36Sopenharmony_ci	if (sb_size < memory_block_size_bytes() && !force_bbm) {
254062306a36Sopenharmony_ci		/* SBM: At least two subblocks per Linux memory block. */
254162306a36Sopenharmony_ci		vm->in_sbm = true;
254262306a36Sopenharmony_ci		vm->sbm.sb_size = sb_size;
254362306a36Sopenharmony_ci		vm->sbm.sbs_per_mb = memory_block_size_bytes() /
254462306a36Sopenharmony_ci				     vm->sbm.sb_size;
254562306a36Sopenharmony_ci
254662306a36Sopenharmony_ci		/* Round up to the next full memory block */
254762306a36Sopenharmony_ci		addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
254862306a36Sopenharmony_ci		       memory_block_size_bytes() - 1;
254962306a36Sopenharmony_ci		vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr);
255062306a36Sopenharmony_ci		vm->sbm.next_mb_id = vm->sbm.first_mb_id;
255162306a36Sopenharmony_ci	} else {
255262306a36Sopenharmony_ci		/* BBM: At least one Linux memory block. */
255362306a36Sopenharmony_ci		vm->bbm.bb_size = max_t(uint64_t, vm->device_block_size,
255462306a36Sopenharmony_ci					memory_block_size_bytes());
255562306a36Sopenharmony_ci
255662306a36Sopenharmony_ci		if (bbm_block_size) {
255762306a36Sopenharmony_ci			if (!is_power_of_2(bbm_block_size)) {
255862306a36Sopenharmony_ci				dev_warn(&vm->vdev->dev,
255962306a36Sopenharmony_ci					 "bbm_block_size is not a power of 2");
256062306a36Sopenharmony_ci			} else if (bbm_block_size < vm->bbm.bb_size) {
256162306a36Sopenharmony_ci				dev_warn(&vm->vdev->dev,
256262306a36Sopenharmony_ci					 "bbm_block_size is too small");
256362306a36Sopenharmony_ci			} else {
256462306a36Sopenharmony_ci				vm->bbm.bb_size = bbm_block_size;
256562306a36Sopenharmony_ci			}
256662306a36Sopenharmony_ci		}
256762306a36Sopenharmony_ci
256862306a36Sopenharmony_ci		/* Round up to the next aligned big block */
256962306a36Sopenharmony_ci		addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
257062306a36Sopenharmony_ci		       vm->bbm.bb_size - 1;
257162306a36Sopenharmony_ci		vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
257262306a36Sopenharmony_ci		vm->bbm.next_bb_id = vm->bbm.first_bb_id;
257362306a36Sopenharmony_ci
257462306a36Sopenharmony_ci		/* Make sure we can add two big blocks. */
257562306a36Sopenharmony_ci		vm->offline_threshold = max_t(uint64_t, 2 * vm->bbm.bb_size,
257662306a36Sopenharmony_ci					      vm->offline_threshold);
257762306a36Sopenharmony_ci	}
257862306a36Sopenharmony_ci
257962306a36Sopenharmony_ci	dev_info(&vm->vdev->dev, "memory block size: 0x%lx",
258062306a36Sopenharmony_ci		 memory_block_size_bytes());
258162306a36Sopenharmony_ci	if (vm->in_sbm)
258262306a36Sopenharmony_ci		dev_info(&vm->vdev->dev, "subblock size: 0x%llx",
258362306a36Sopenharmony_ci			 (unsigned long long)vm->sbm.sb_size);
258462306a36Sopenharmony_ci	else
258562306a36Sopenharmony_ci		dev_info(&vm->vdev->dev, "big block size: 0x%llx",
258662306a36Sopenharmony_ci			 (unsigned long long)vm->bbm.bb_size);
258762306a36Sopenharmony_ci
258862306a36Sopenharmony_ci	/* create the parent resource for all memory */
258962306a36Sopenharmony_ci	rc = virtio_mem_create_resource(vm);
259062306a36Sopenharmony_ci	if (rc)
259162306a36Sopenharmony_ci		return rc;
259262306a36Sopenharmony_ci
259362306a36Sopenharmony_ci	/* use a single dynamic memory group to cover the whole memory device */
259462306a36Sopenharmony_ci	if (vm->in_sbm)
259562306a36Sopenharmony_ci		unit_pages = PHYS_PFN(memory_block_size_bytes());
259662306a36Sopenharmony_ci	else
259762306a36Sopenharmony_ci		unit_pages = PHYS_PFN(vm->bbm.bb_size);
259862306a36Sopenharmony_ci	rc = memory_group_register_dynamic(vm->nid, unit_pages);
259962306a36Sopenharmony_ci	if (rc < 0)
260062306a36Sopenharmony_ci		goto out_del_resource;
260162306a36Sopenharmony_ci	vm->mgid = rc;
260262306a36Sopenharmony_ci
260362306a36Sopenharmony_ci	/*
260462306a36Sopenharmony_ci	 * If we still have memory plugged, we have to unplug all memory first.
260562306a36Sopenharmony_ci	 * Registering our parent resource makes sure that this memory isn't
260662306a36Sopenharmony_ci	 * actually in use (e.g., trying to reload the driver).
260762306a36Sopenharmony_ci	 */
260862306a36Sopenharmony_ci	if (vm->plugged_size) {
260962306a36Sopenharmony_ci		vm->unplug_all_required = true;
261062306a36Sopenharmony_ci		dev_info(&vm->vdev->dev, "unplugging all memory is required\n");
261162306a36Sopenharmony_ci	}
261262306a36Sopenharmony_ci
261362306a36Sopenharmony_ci	/* register callbacks */
261462306a36Sopenharmony_ci	vm->memory_notifier.notifier_call = virtio_mem_memory_notifier_cb;
261562306a36Sopenharmony_ci	rc = register_memory_notifier(&vm->memory_notifier);
261662306a36Sopenharmony_ci	if (rc)
261762306a36Sopenharmony_ci		goto out_unreg_group;
261862306a36Sopenharmony_ci	rc = register_virtio_mem_device(vm);
261962306a36Sopenharmony_ci	if (rc)
262062306a36Sopenharmony_ci		goto out_unreg_mem;
262162306a36Sopenharmony_ci
262262306a36Sopenharmony_ci	return 0;
262362306a36Sopenharmony_ciout_unreg_mem:
262462306a36Sopenharmony_ci	unregister_memory_notifier(&vm->memory_notifier);
262562306a36Sopenharmony_ciout_unreg_group:
262662306a36Sopenharmony_ci	memory_group_unregister(vm->mgid);
262762306a36Sopenharmony_ciout_del_resource:
262862306a36Sopenharmony_ci	virtio_mem_delete_resource(vm);
262962306a36Sopenharmony_ci	return rc;
263062306a36Sopenharmony_ci}
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci#ifdef CONFIG_PROC_VMCORE
263362306a36Sopenharmony_cistatic int virtio_mem_send_state_request(struct virtio_mem *vm, uint64_t addr,
263462306a36Sopenharmony_ci					 uint64_t size)
263562306a36Sopenharmony_ci{
263662306a36Sopenharmony_ci	const uint64_t nb_vm_blocks = size / vm->device_block_size;
263762306a36Sopenharmony_ci	const struct virtio_mem_req req = {
263862306a36Sopenharmony_ci		.type = cpu_to_virtio16(vm->vdev, VIRTIO_MEM_REQ_STATE),
263962306a36Sopenharmony_ci		.u.state.addr = cpu_to_virtio64(vm->vdev, addr),
264062306a36Sopenharmony_ci		.u.state.nb_blocks = cpu_to_virtio16(vm->vdev, nb_vm_blocks),
264162306a36Sopenharmony_ci	};
264262306a36Sopenharmony_ci	int rc = -ENOMEM;
264362306a36Sopenharmony_ci
264462306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "requesting state: 0x%llx - 0x%llx\n", addr,
264562306a36Sopenharmony_ci		addr + size - 1);
264662306a36Sopenharmony_ci
264762306a36Sopenharmony_ci	switch (virtio_mem_send_request(vm, &req)) {
264862306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_ACK:
264962306a36Sopenharmony_ci		return virtio16_to_cpu(vm->vdev, vm->resp.u.state.state);
265062306a36Sopenharmony_ci	case VIRTIO_MEM_RESP_ERROR:
265162306a36Sopenharmony_ci		rc = -EINVAL;
265262306a36Sopenharmony_ci		break;
265362306a36Sopenharmony_ci	default:
265462306a36Sopenharmony_ci		break;
265562306a36Sopenharmony_ci	}
265662306a36Sopenharmony_ci
265762306a36Sopenharmony_ci	dev_dbg(&vm->vdev->dev, "requesting state failed: %d\n", rc);
265862306a36Sopenharmony_ci	return rc;
265962306a36Sopenharmony_ci}
266062306a36Sopenharmony_ci
266162306a36Sopenharmony_cistatic bool virtio_mem_vmcore_pfn_is_ram(struct vmcore_cb *cb,
266262306a36Sopenharmony_ci					 unsigned long pfn)
266362306a36Sopenharmony_ci{
266462306a36Sopenharmony_ci	struct virtio_mem *vm = container_of(cb, struct virtio_mem,
266562306a36Sopenharmony_ci					     vmcore_cb);
266662306a36Sopenharmony_ci	uint64_t addr = PFN_PHYS(pfn);
266762306a36Sopenharmony_ci	bool is_ram;
266862306a36Sopenharmony_ci	int rc;
266962306a36Sopenharmony_ci
267062306a36Sopenharmony_ci	if (!virtio_mem_contains_range(vm, addr, PAGE_SIZE))
267162306a36Sopenharmony_ci		return true;
267262306a36Sopenharmony_ci	if (!vm->plugged_size)
267362306a36Sopenharmony_ci		return false;
267462306a36Sopenharmony_ci
267562306a36Sopenharmony_ci	/*
267662306a36Sopenharmony_ci	 * We have to serialize device requests and access to the information
267762306a36Sopenharmony_ci	 * about the block queried last.
267862306a36Sopenharmony_ci	 */
267962306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
268062306a36Sopenharmony_ci
268162306a36Sopenharmony_ci	addr = ALIGN_DOWN(addr, vm->device_block_size);
268262306a36Sopenharmony_ci	if (addr != vm->last_block_addr) {
268362306a36Sopenharmony_ci		rc = virtio_mem_send_state_request(vm, addr,
268462306a36Sopenharmony_ci						   vm->device_block_size);
268562306a36Sopenharmony_ci		/* On any kind of error, we're going to signal !ram. */
268662306a36Sopenharmony_ci		if (rc == VIRTIO_MEM_STATE_PLUGGED)
268762306a36Sopenharmony_ci			vm->last_block_plugged = true;
268862306a36Sopenharmony_ci		else
268962306a36Sopenharmony_ci			vm->last_block_plugged = false;
269062306a36Sopenharmony_ci		vm->last_block_addr = addr;
269162306a36Sopenharmony_ci	}
269262306a36Sopenharmony_ci
269362306a36Sopenharmony_ci	is_ram = vm->last_block_plugged;
269462306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
269562306a36Sopenharmony_ci	return is_ram;
269662306a36Sopenharmony_ci}
269762306a36Sopenharmony_ci#endif /* CONFIG_PROC_VMCORE */
269862306a36Sopenharmony_ci
269962306a36Sopenharmony_cistatic int virtio_mem_init_kdump(struct virtio_mem *vm)
270062306a36Sopenharmony_ci{
270162306a36Sopenharmony_ci#ifdef CONFIG_PROC_VMCORE
270262306a36Sopenharmony_ci	dev_info(&vm->vdev->dev, "memory hot(un)plug disabled in kdump kernel\n");
270362306a36Sopenharmony_ci	vm->vmcore_cb.pfn_is_ram = virtio_mem_vmcore_pfn_is_ram;
270462306a36Sopenharmony_ci	register_vmcore_cb(&vm->vmcore_cb);
270562306a36Sopenharmony_ci	return 0;
270662306a36Sopenharmony_ci#else /* CONFIG_PROC_VMCORE */
270762306a36Sopenharmony_ci	dev_warn(&vm->vdev->dev, "disabled in kdump kernel without vmcore\n");
270862306a36Sopenharmony_ci	return -EBUSY;
270962306a36Sopenharmony_ci#endif /* CONFIG_PROC_VMCORE */
271062306a36Sopenharmony_ci}
271162306a36Sopenharmony_ci
271262306a36Sopenharmony_cistatic int virtio_mem_init(struct virtio_mem *vm)
271362306a36Sopenharmony_ci{
271462306a36Sopenharmony_ci	uint16_t node_id;
271562306a36Sopenharmony_ci
271662306a36Sopenharmony_ci	if (!vm->vdev->config->get) {
271762306a36Sopenharmony_ci		dev_err(&vm->vdev->dev, "config access disabled\n");
271862306a36Sopenharmony_ci		return -EINVAL;
271962306a36Sopenharmony_ci	}
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_ci	/* Fetch all properties that can't change. */
272262306a36Sopenharmony_ci	virtio_cread_le(vm->vdev, struct virtio_mem_config, plugged_size,
272362306a36Sopenharmony_ci			&vm->plugged_size);
272462306a36Sopenharmony_ci	virtio_cread_le(vm->vdev, struct virtio_mem_config, block_size,
272562306a36Sopenharmony_ci			&vm->device_block_size);
272662306a36Sopenharmony_ci	virtio_cread_le(vm->vdev, struct virtio_mem_config, node_id,
272762306a36Sopenharmony_ci			&node_id);
272862306a36Sopenharmony_ci	vm->nid = virtio_mem_translate_node_id(vm, node_id);
272962306a36Sopenharmony_ci	virtio_cread_le(vm->vdev, struct virtio_mem_config, addr, &vm->addr);
273062306a36Sopenharmony_ci	virtio_cread_le(vm->vdev, struct virtio_mem_config, region_size,
273162306a36Sopenharmony_ci			&vm->region_size);
273262306a36Sopenharmony_ci
273362306a36Sopenharmony_ci	/* Determine the nid for the device based on the lowest address. */
273462306a36Sopenharmony_ci	if (vm->nid == NUMA_NO_NODE)
273562306a36Sopenharmony_ci		vm->nid = memory_add_physaddr_to_nid(vm->addr);
273662306a36Sopenharmony_ci
273762306a36Sopenharmony_ci	dev_info(&vm->vdev->dev, "start address: 0x%llx", vm->addr);
273862306a36Sopenharmony_ci	dev_info(&vm->vdev->dev, "region size: 0x%llx", vm->region_size);
273962306a36Sopenharmony_ci	dev_info(&vm->vdev->dev, "device block size: 0x%llx",
274062306a36Sopenharmony_ci		 (unsigned long long)vm->device_block_size);
274162306a36Sopenharmony_ci	if (vm->nid != NUMA_NO_NODE && IS_ENABLED(CONFIG_NUMA))
274262306a36Sopenharmony_ci		dev_info(&vm->vdev->dev, "nid: %d", vm->nid);
274362306a36Sopenharmony_ci
274462306a36Sopenharmony_ci	/*
274562306a36Sopenharmony_ci	 * We don't want to (un)plug or reuse any memory when in kdump. The
274662306a36Sopenharmony_ci	 * memory is still accessible (but not exposed to Linux).
274762306a36Sopenharmony_ci	 */
274862306a36Sopenharmony_ci	if (vm->in_kdump)
274962306a36Sopenharmony_ci		return virtio_mem_init_kdump(vm);
275062306a36Sopenharmony_ci	return virtio_mem_init_hotplug(vm);
275162306a36Sopenharmony_ci}
275262306a36Sopenharmony_ci
275362306a36Sopenharmony_cistatic int virtio_mem_create_resource(struct virtio_mem *vm)
275462306a36Sopenharmony_ci{
275562306a36Sopenharmony_ci	/*
275662306a36Sopenharmony_ci	 * When force-unloading the driver and removing the device, we
275762306a36Sopenharmony_ci	 * could have a garbage pointer. Duplicate the string.
275862306a36Sopenharmony_ci	 */
275962306a36Sopenharmony_ci	const char *name = kstrdup(dev_name(&vm->vdev->dev), GFP_KERNEL);
276062306a36Sopenharmony_ci
276162306a36Sopenharmony_ci	if (!name)
276262306a36Sopenharmony_ci		return -ENOMEM;
276362306a36Sopenharmony_ci
276462306a36Sopenharmony_ci	/* Disallow mapping device memory via /dev/mem completely. */
276562306a36Sopenharmony_ci	vm->parent_resource = __request_mem_region(vm->addr, vm->region_size,
276662306a36Sopenharmony_ci						   name, IORESOURCE_SYSTEM_RAM |
276762306a36Sopenharmony_ci						   IORESOURCE_EXCLUSIVE);
276862306a36Sopenharmony_ci	if (!vm->parent_resource) {
276962306a36Sopenharmony_ci		kfree(name);
277062306a36Sopenharmony_ci		dev_warn(&vm->vdev->dev, "could not reserve device region\n");
277162306a36Sopenharmony_ci		dev_info(&vm->vdev->dev,
277262306a36Sopenharmony_ci			 "reloading the driver is not supported\n");
277362306a36Sopenharmony_ci		return -EBUSY;
277462306a36Sopenharmony_ci	}
277562306a36Sopenharmony_ci
277662306a36Sopenharmony_ci	/* The memory is not actually busy - make add_memory() work. */
277762306a36Sopenharmony_ci	vm->parent_resource->flags &= ~IORESOURCE_BUSY;
277862306a36Sopenharmony_ci	return 0;
277962306a36Sopenharmony_ci}
278062306a36Sopenharmony_ci
278162306a36Sopenharmony_cistatic void virtio_mem_delete_resource(struct virtio_mem *vm)
278262306a36Sopenharmony_ci{
278362306a36Sopenharmony_ci	const char *name;
278462306a36Sopenharmony_ci
278562306a36Sopenharmony_ci	if (!vm->parent_resource)
278662306a36Sopenharmony_ci		return;
278762306a36Sopenharmony_ci
278862306a36Sopenharmony_ci	name = vm->parent_resource->name;
278962306a36Sopenharmony_ci	release_resource(vm->parent_resource);
279062306a36Sopenharmony_ci	kfree(vm->parent_resource);
279162306a36Sopenharmony_ci	kfree(name);
279262306a36Sopenharmony_ci	vm->parent_resource = NULL;
279362306a36Sopenharmony_ci}
279462306a36Sopenharmony_ci
279562306a36Sopenharmony_cistatic int virtio_mem_range_has_system_ram(struct resource *res, void *arg)
279662306a36Sopenharmony_ci{
279762306a36Sopenharmony_ci	return 1;
279862306a36Sopenharmony_ci}
279962306a36Sopenharmony_ci
280062306a36Sopenharmony_cistatic bool virtio_mem_has_memory_added(struct virtio_mem *vm)
280162306a36Sopenharmony_ci{
280262306a36Sopenharmony_ci	const unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
280362306a36Sopenharmony_ci
280462306a36Sopenharmony_ci	return walk_iomem_res_desc(IORES_DESC_NONE, flags, vm->addr,
280562306a36Sopenharmony_ci				   vm->addr + vm->region_size, NULL,
280662306a36Sopenharmony_ci				   virtio_mem_range_has_system_ram) == 1;
280762306a36Sopenharmony_ci}
280862306a36Sopenharmony_ci
280962306a36Sopenharmony_cistatic int virtio_mem_probe(struct virtio_device *vdev)
281062306a36Sopenharmony_ci{
281162306a36Sopenharmony_ci	struct virtio_mem *vm;
281262306a36Sopenharmony_ci	int rc;
281362306a36Sopenharmony_ci
281462306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct virtio_mem_req) != 24);
281562306a36Sopenharmony_ci	BUILD_BUG_ON(sizeof(struct virtio_mem_resp) != 10);
281662306a36Sopenharmony_ci
281762306a36Sopenharmony_ci	vdev->priv = vm = kzalloc(sizeof(*vm), GFP_KERNEL);
281862306a36Sopenharmony_ci	if (!vm)
281962306a36Sopenharmony_ci		return -ENOMEM;
282062306a36Sopenharmony_ci
282162306a36Sopenharmony_ci	init_waitqueue_head(&vm->host_resp);
282262306a36Sopenharmony_ci	vm->vdev = vdev;
282362306a36Sopenharmony_ci	INIT_WORK(&vm->wq, virtio_mem_run_wq);
282462306a36Sopenharmony_ci	mutex_init(&vm->hotplug_mutex);
282562306a36Sopenharmony_ci	INIT_LIST_HEAD(&vm->next);
282662306a36Sopenharmony_ci	spin_lock_init(&vm->removal_lock);
282762306a36Sopenharmony_ci	hrtimer_init(&vm->retry_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
282862306a36Sopenharmony_ci	vm->retry_timer.function = virtio_mem_timer_expired;
282962306a36Sopenharmony_ci	vm->retry_timer_ms = VIRTIO_MEM_RETRY_TIMER_MIN_MS;
283062306a36Sopenharmony_ci	vm->in_kdump = is_kdump_kernel();
283162306a36Sopenharmony_ci
283262306a36Sopenharmony_ci	/* register the virtqueue */
283362306a36Sopenharmony_ci	rc = virtio_mem_init_vq(vm);
283462306a36Sopenharmony_ci	if (rc)
283562306a36Sopenharmony_ci		goto out_free_vm;
283662306a36Sopenharmony_ci
283762306a36Sopenharmony_ci	/* initialize the device by querying the config */
283862306a36Sopenharmony_ci	rc = virtio_mem_init(vm);
283962306a36Sopenharmony_ci	if (rc)
284062306a36Sopenharmony_ci		goto out_del_vq;
284162306a36Sopenharmony_ci
284262306a36Sopenharmony_ci	virtio_device_ready(vdev);
284362306a36Sopenharmony_ci
284462306a36Sopenharmony_ci	/* trigger a config update to start processing the requested_size */
284562306a36Sopenharmony_ci	if (!vm->in_kdump) {
284662306a36Sopenharmony_ci		atomic_set(&vm->config_changed, 1);
284762306a36Sopenharmony_ci		queue_work(system_freezable_wq, &vm->wq);
284862306a36Sopenharmony_ci	}
284962306a36Sopenharmony_ci
285062306a36Sopenharmony_ci	return 0;
285162306a36Sopenharmony_ciout_del_vq:
285262306a36Sopenharmony_ci	vdev->config->del_vqs(vdev);
285362306a36Sopenharmony_ciout_free_vm:
285462306a36Sopenharmony_ci	kfree(vm);
285562306a36Sopenharmony_ci	vdev->priv = NULL;
285662306a36Sopenharmony_ci
285762306a36Sopenharmony_ci	return rc;
285862306a36Sopenharmony_ci}
285962306a36Sopenharmony_ci
286062306a36Sopenharmony_cistatic void virtio_mem_deinit_hotplug(struct virtio_mem *vm)
286162306a36Sopenharmony_ci{
286262306a36Sopenharmony_ci	unsigned long mb_id;
286362306a36Sopenharmony_ci	int rc;
286462306a36Sopenharmony_ci
286562306a36Sopenharmony_ci	/*
286662306a36Sopenharmony_ci	 * Make sure the workqueue won't be triggered anymore and no memory
286762306a36Sopenharmony_ci	 * blocks can be onlined/offlined until we're finished here.
286862306a36Sopenharmony_ci	 */
286962306a36Sopenharmony_ci	mutex_lock(&vm->hotplug_mutex);
287062306a36Sopenharmony_ci	spin_lock_irq(&vm->removal_lock);
287162306a36Sopenharmony_ci	vm->removing = true;
287262306a36Sopenharmony_ci	spin_unlock_irq(&vm->removal_lock);
287362306a36Sopenharmony_ci	mutex_unlock(&vm->hotplug_mutex);
287462306a36Sopenharmony_ci
287562306a36Sopenharmony_ci	/* wait until the workqueue stopped */
287662306a36Sopenharmony_ci	cancel_work_sync(&vm->wq);
287762306a36Sopenharmony_ci	hrtimer_cancel(&vm->retry_timer);
287862306a36Sopenharmony_ci
287962306a36Sopenharmony_ci	if (vm->in_sbm) {
288062306a36Sopenharmony_ci		/*
288162306a36Sopenharmony_ci		 * After we unregistered our callbacks, user space can online
288262306a36Sopenharmony_ci		 * partially plugged offline blocks. Make sure to remove them.
288362306a36Sopenharmony_ci		 */
288462306a36Sopenharmony_ci		virtio_mem_sbm_for_each_mb(vm, mb_id,
288562306a36Sopenharmony_ci					   VIRTIO_MEM_SBM_MB_OFFLINE_PARTIAL) {
288662306a36Sopenharmony_ci			rc = virtio_mem_sbm_remove_mb(vm, mb_id);
288762306a36Sopenharmony_ci			BUG_ON(rc);
288862306a36Sopenharmony_ci			virtio_mem_sbm_set_mb_state(vm, mb_id,
288962306a36Sopenharmony_ci						    VIRTIO_MEM_SBM_MB_UNUSED);
289062306a36Sopenharmony_ci		}
289162306a36Sopenharmony_ci		/*
289262306a36Sopenharmony_ci		 * After we unregistered our callbacks, user space can no longer
289362306a36Sopenharmony_ci		 * offline partially plugged online memory blocks. No need to
289462306a36Sopenharmony_ci		 * worry about them.
289562306a36Sopenharmony_ci		 */
289662306a36Sopenharmony_ci	}
289762306a36Sopenharmony_ci
289862306a36Sopenharmony_ci	/* unregister callbacks */
289962306a36Sopenharmony_ci	unregister_virtio_mem_device(vm);
290062306a36Sopenharmony_ci	unregister_memory_notifier(&vm->memory_notifier);
290162306a36Sopenharmony_ci
290262306a36Sopenharmony_ci	/*
290362306a36Sopenharmony_ci	 * There is no way we could reliably remove all memory we have added to
290462306a36Sopenharmony_ci	 * the system. And there is no way to stop the driver/device from going
290562306a36Sopenharmony_ci	 * away. Warn at least.
290662306a36Sopenharmony_ci	 */
290762306a36Sopenharmony_ci	if (virtio_mem_has_memory_added(vm)) {
290862306a36Sopenharmony_ci		dev_warn(&vm->vdev->dev,
290962306a36Sopenharmony_ci			 "device still has system memory added\n");
291062306a36Sopenharmony_ci	} else {
291162306a36Sopenharmony_ci		virtio_mem_delete_resource(vm);
291262306a36Sopenharmony_ci		kfree_const(vm->resource_name);
291362306a36Sopenharmony_ci		memory_group_unregister(vm->mgid);
291462306a36Sopenharmony_ci	}
291562306a36Sopenharmony_ci
291662306a36Sopenharmony_ci	/* remove all tracking data - no locking needed */
291762306a36Sopenharmony_ci	if (vm->in_sbm) {
291862306a36Sopenharmony_ci		vfree(vm->sbm.mb_states);
291962306a36Sopenharmony_ci		vfree(vm->sbm.sb_states);
292062306a36Sopenharmony_ci	} else {
292162306a36Sopenharmony_ci		vfree(vm->bbm.bb_states);
292262306a36Sopenharmony_ci	}
292362306a36Sopenharmony_ci}
292462306a36Sopenharmony_ci
292562306a36Sopenharmony_cistatic void virtio_mem_deinit_kdump(struct virtio_mem *vm)
292662306a36Sopenharmony_ci{
292762306a36Sopenharmony_ci#ifdef CONFIG_PROC_VMCORE
292862306a36Sopenharmony_ci	unregister_vmcore_cb(&vm->vmcore_cb);
292962306a36Sopenharmony_ci#endif /* CONFIG_PROC_VMCORE */
293062306a36Sopenharmony_ci}
293162306a36Sopenharmony_ci
293262306a36Sopenharmony_cistatic void virtio_mem_remove(struct virtio_device *vdev)
293362306a36Sopenharmony_ci{
293462306a36Sopenharmony_ci	struct virtio_mem *vm = vdev->priv;
293562306a36Sopenharmony_ci
293662306a36Sopenharmony_ci	if (vm->in_kdump)
293762306a36Sopenharmony_ci		virtio_mem_deinit_kdump(vm);
293862306a36Sopenharmony_ci	else
293962306a36Sopenharmony_ci		virtio_mem_deinit_hotplug(vm);
294062306a36Sopenharmony_ci
294162306a36Sopenharmony_ci	/* reset the device and cleanup the queues */
294262306a36Sopenharmony_ci	virtio_reset_device(vdev);
294362306a36Sopenharmony_ci	vdev->config->del_vqs(vdev);
294462306a36Sopenharmony_ci
294562306a36Sopenharmony_ci	kfree(vm);
294662306a36Sopenharmony_ci	vdev->priv = NULL;
294762306a36Sopenharmony_ci}
294862306a36Sopenharmony_ci
294962306a36Sopenharmony_cistatic void virtio_mem_config_changed(struct virtio_device *vdev)
295062306a36Sopenharmony_ci{
295162306a36Sopenharmony_ci	struct virtio_mem *vm = vdev->priv;
295262306a36Sopenharmony_ci
295362306a36Sopenharmony_ci	if (unlikely(vm->in_kdump))
295462306a36Sopenharmony_ci		return;
295562306a36Sopenharmony_ci
295662306a36Sopenharmony_ci	atomic_set(&vm->config_changed, 1);
295762306a36Sopenharmony_ci	virtio_mem_retry(vm);
295862306a36Sopenharmony_ci}
295962306a36Sopenharmony_ci
296062306a36Sopenharmony_ci#ifdef CONFIG_PM_SLEEP
296162306a36Sopenharmony_cistatic int virtio_mem_freeze(struct virtio_device *vdev)
296262306a36Sopenharmony_ci{
296362306a36Sopenharmony_ci	/*
296462306a36Sopenharmony_ci	 * When restarting the VM, all memory is usually unplugged. Don't
296562306a36Sopenharmony_ci	 * allow to suspend/hibernate.
296662306a36Sopenharmony_ci	 */
296762306a36Sopenharmony_ci	dev_err(&vdev->dev, "save/restore not supported.\n");
296862306a36Sopenharmony_ci	return -EPERM;
296962306a36Sopenharmony_ci}
297062306a36Sopenharmony_ci
297162306a36Sopenharmony_cistatic int virtio_mem_restore(struct virtio_device *vdev)
297262306a36Sopenharmony_ci{
297362306a36Sopenharmony_ci	return -EPERM;
297462306a36Sopenharmony_ci}
297562306a36Sopenharmony_ci#endif
297662306a36Sopenharmony_ci
297762306a36Sopenharmony_cistatic unsigned int virtio_mem_features[] = {
297862306a36Sopenharmony_ci#if defined(CONFIG_NUMA) && defined(CONFIG_ACPI_NUMA)
297962306a36Sopenharmony_ci	VIRTIO_MEM_F_ACPI_PXM,
298062306a36Sopenharmony_ci#endif
298162306a36Sopenharmony_ci	VIRTIO_MEM_F_UNPLUGGED_INACCESSIBLE,
298262306a36Sopenharmony_ci};
298362306a36Sopenharmony_ci
298462306a36Sopenharmony_cistatic const struct virtio_device_id virtio_mem_id_table[] = {
298562306a36Sopenharmony_ci	{ VIRTIO_ID_MEM, VIRTIO_DEV_ANY_ID },
298662306a36Sopenharmony_ci	{ 0 },
298762306a36Sopenharmony_ci};
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_cistatic struct virtio_driver virtio_mem_driver = {
299062306a36Sopenharmony_ci	.feature_table = virtio_mem_features,
299162306a36Sopenharmony_ci	.feature_table_size = ARRAY_SIZE(virtio_mem_features),
299262306a36Sopenharmony_ci	.driver.name = KBUILD_MODNAME,
299362306a36Sopenharmony_ci	.driver.owner = THIS_MODULE,
299462306a36Sopenharmony_ci	.id_table = virtio_mem_id_table,
299562306a36Sopenharmony_ci	.probe = virtio_mem_probe,
299662306a36Sopenharmony_ci	.remove = virtio_mem_remove,
299762306a36Sopenharmony_ci	.config_changed = virtio_mem_config_changed,
299862306a36Sopenharmony_ci#ifdef CONFIG_PM_SLEEP
299962306a36Sopenharmony_ci	.freeze	=	virtio_mem_freeze,
300062306a36Sopenharmony_ci	.restore =	virtio_mem_restore,
300162306a36Sopenharmony_ci#endif
300262306a36Sopenharmony_ci};
300362306a36Sopenharmony_ci
300462306a36Sopenharmony_cimodule_virtio_driver(virtio_mem_driver);
300562306a36Sopenharmony_ciMODULE_DEVICE_TABLE(virtio, virtio_mem_id_table);
300662306a36Sopenharmony_ciMODULE_AUTHOR("David Hildenbrand <david@redhat.com>");
300762306a36Sopenharmony_ciMODULE_DESCRIPTION("Virtio-mem driver");
300862306a36Sopenharmony_ciMODULE_LICENSE("GPL");
3009